From 6cb804a3e508c04c7439ac368bca56b10e251aa4 Mon Sep 17 00:00:00 2001 From: arkw Date: Fri, 24 Jan 2025 16:44:31 +0900 Subject: [PATCH 01/16] update async_bindings.ts update opfs tests change dropFiles args --- lib/CMakeLists.txt | 3 + lib/src/webdb_api.cc | 20 +- .../duckdb-wasm/src/bindings/bindings_base.ts | 57 ++++- .../src/bindings/bindings_interface.ts | 3 +- packages/duckdb-wasm/src/bindings/config.ts | 4 + .../duckdb-wasm/src/bindings/duckdb_module.ts | 2 + .../src/parallel/async_bindings.ts | 64 ++++- .../src/parallel/async_bindings_interface.ts | 3 + .../src/parallel/worker_dispatcher.ts | 4 +- .../src/parallel/worker_request.ts | 4 +- packages/duckdb-wasm/test/opfs.test.ts | 230 +++++++++++++----- 11 files changed, 317 insertions(+), 77 deletions(-) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8cfc7a59e..a0a7a88da 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -295,6 +295,9 @@ if(EMSCRIPTEN) _malloc, \ _calloc, \ _free, \ + stringToUTF8, \ + lengthBytesUTF8, \ + stackAlloc, \ _duckdb_web_clear_response, \ _duckdb_web_collect_file_stats, \ _duckdb_web_connect, \ diff --git a/lib/src/webdb_api.cc b/lib/src/webdb_api.cc index bad9c7f9e..dbe5b721b 100644 --- a/lib/src/webdb_api.cc +++ b/lib/src/webdb_api.cc @@ -1,3 +1,4 @@ +#include #include #include @@ -94,9 +95,24 @@ void duckdb_web_fs_drop_file(WASMResponse* packed, const char* file_name) { WASMResponseBuffer::Get().Store(*packed, webdb.DropFile(file_name)); } /// Drop a file -void duckdb_web_fs_drop_files(WASMResponse* packed) { +void duckdb_web_fs_drop_files(WASMResponse* packed, const char** names, int name_count) { GET_WEBDB(*packed); - WASMResponseBuffer::Get().Store(*packed, webdb.DropFiles()); + if (name_count == 0) { + WASMResponseBuffer::Get().Store(*packed, webdb.DropFiles()); + } else { + for (int i = 0; i < name_count; i++) { + const char* name = names[i]; + if (name == nullptr) { + std::cerr << "Error: NULL pointer detected at index " << i << std::endl; + continue; + } + if (std::strlen(name) == 0) { + std::cerr << "Error: Empty string detected at index " << i << std::endl; + continue; + } + WASMResponseBuffer::Get().Store(*packed, webdb.DropFile(name)); + } + } } /// Glob file infos void duckdb_web_fs_glob_file_infos(WASMResponse* packed, const char* file_name) { diff --git a/packages/duckdb-wasm/src/bindings/bindings_base.ts b/packages/duckdb-wasm/src/bindings/bindings_base.ts index f395bdb10..34f201d3d 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_base.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_base.ts @@ -583,12 +583,52 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { dropResponseBuffers(this.mod); } /** Drop files */ - public dropFiles(): void { - const [s, d, n] = callSRet(this.mod, 'duckdb_web_fs_drop_files', [], []); - if (s !== StatusCode.SUCCESS) { - throw new Error(readString(this.mod, d, n)); + public dropFiles(names?:string[]): void { + const pointers:number[] = []; + let pointerOfArray:number = -1; + try { + for (const str of (names ?? [])) { + if (str !== null && str !== undefined && str.length > 0) { + const size = this.mod.lengthBytesUTF8(str) + 1; + const ret = this.mod._malloc(size); + if (!ret) { + throw new Error(`Failed to allocate memory for string: ${str}`); + } + this.mod.stringToUTF8(str, ret, size); + pointers.push(ret); + } + } + pointerOfArray = this.mod._malloc(pointers.length * 4); + if (!pointerOfArray) { + throw new Error(`Failed to allocate memory for pointers array`); + } + for (let i = 0; i < pointers.length; i++) { + this.mod.HEAP32[(pointerOfArray >> 2) + i] = pointers[i]; + } + const [s, d, n] = callSRet( + this.mod, + 'duckdb_web_fs_drop_files', + [ + 'number', + 'number' + ], + [ + pointerOfArray, + pointers.length + ] + ); + if (s !== StatusCode.SUCCESS) { + throw new Error(readString(this.mod, d, n)); + } + dropResponseBuffers(this.mod); + } finally { + for (const pointer of pointers) { + this.mod._free(pointer); + } + if( pointerOfArray > 0 ){ + this.mod._free(pointerOfArray); + } } - dropResponseBuffers(this.mod); } /** Flush all files */ public flushFiles(): void { @@ -622,6 +662,13 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { throw new Error("Not an OPFS file name: " + file); } } + public async registerOPFSFileNameAsync(file: string): Promise { + if (file.startsWith("opfs://")) { + return await this.prepareFileHandle(file, DuckDBDataProtocol.BROWSER_FSACCESS); + } else { + throw new Error("Not an OPFS file name: " + file); + } + } public collectFileStatistics(file: string, enable: boolean): void { const [s, d, n] = callSRet(this.mod, 'duckdb_web_collect_file_stats', ['string', 'boolean'], [file, enable]); if (s !== StatusCode.SUCCESS) { diff --git a/packages/duckdb-wasm/src/bindings/bindings_interface.ts b/packages/duckdb-wasm/src/bindings/bindings_interface.ts index 271a42ef9..004d2d46c 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_interface.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_interface.ts @@ -58,11 +58,12 @@ export interface DuckDBBindings { prepareDBFileHandle(path: string, protocol: DuckDBDataProtocol): Promise; globFiles(path: string): WebFile[]; dropFile(name: string): void; - dropFiles(): void; + dropFiles(names?: string[]): void; flushFiles(): void; copyFileToPath(name: string, path: string): void; copyFileToBuffer(name: string): Uint8Array; registerOPFSFileName(file: string): void; + registerOPFSFileNameAsync(file: string): Promise; collectFileStatistics(file: string, enable: boolean): void; exportFileStatistics(file: string): FileStatistics; } diff --git a/packages/duckdb-wasm/src/bindings/config.ts b/packages/duckdb-wasm/src/bindings/config.ts index ce29ca0f5..27389d1eb 100644 --- a/packages/duckdb-wasm/src/bindings/config.ts +++ b/packages/duckdb-wasm/src/bindings/config.ts @@ -70,4 +70,8 @@ export interface DuckDBConfig { * Custom user agent string */ customUserAgent?: string; + /** + * Auto Opfs File Registration + */ + autoFileRegistration?: boolean; } diff --git a/packages/duckdb-wasm/src/bindings/duckdb_module.ts b/packages/duckdb-wasm/src/bindings/duckdb_module.ts index c75c3e2ed..aafbfb6cb 100644 --- a/packages/duckdb-wasm/src/bindings/duckdb_module.ts +++ b/packages/duckdb-wasm/src/bindings/duckdb_module.ts @@ -7,6 +7,8 @@ export interface DuckDBModule extends EmscriptenModule { stackSave: typeof stackSave; stackAlloc: typeof stackAlloc; stackRestore: typeof stackRestore; + lengthBytesUTF8: typeof lengthBytesUTF8; + stringToUTF8: typeof stringToUTF8; ccall: typeof ccall; PThread: PThread; diff --git a/packages/duckdb-wasm/src/parallel/async_bindings.ts b/packages/duckdb-wasm/src/parallel/async_bindings.ts index 4aaf3a6fa..cd95549fa 100644 --- a/packages/duckdb-wasm/src/parallel/async_bindings.ts +++ b/packages/duckdb-wasm/src/parallel/async_bindings.ts @@ -20,6 +20,7 @@ import { WebFile } from '../bindings/web_file'; import { DuckDBDataProtocol } from '../bindings'; const TEXT_ENCODER = new TextEncoder(); +const OPFS_PROTOCOL_REGEX = /'(opfs:\/\/\S*?)'/g; export class AsyncDuckDB implements AsyncDuckDBBindings { /** The message handler */ @@ -45,6 +46,8 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { protected _nextMessageId = 0; /** The pending requests */ protected _pendingRequests: Map = new Map(); + /** The DuckDBConfig */ + protected _config: DuckDBConfig = {}; constructor(logger: Logger, worker: Worker | null = null) { this._logger = logger; @@ -59,6 +62,11 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { return this._logger; } + /** Get the logger */ + public get config(): DuckDBConfig { + return this._config; + } + /** Attach to worker */ protected attach(worker: Worker): void { this._worker = worker; @@ -100,7 +108,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { transfer: ArrayBuffer[] = [], ): Promise> { if (!this._worker) { - console.error('cannot send a message since the worker is not set!'); + console.error('cannot send a message since the worker is not set!:' + task.type+"," + task.data); return undefined as any; } const mid = this._nextMessageId++; @@ -317,8 +325,8 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { return await this.postTask(task); } /** Try to drop files */ - public async dropFiles(): Promise { - const task = new WorkerTask(WorkerRequestType.DROP_FILES, null); + public async dropFiles(names?: string[]): Promise { + const task = new WorkerTask(WorkerRequestType.DROP_FILES, names); return await this.postTask(task); } /** Flush all files */ @@ -360,6 +368,8 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { /** Open a new database */ public async open(config: DuckDBConfig): Promise { + config.autoFileRegistration = config.autoFileRegistration ?? false; + this._config = config; const task = new WorkerTask(WorkerRequestType.OPEN, config); await this.postTask(task); } @@ -394,18 +404,49 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { /** Run a query */ public async runQuery(conn: ConnectionID, text: string): Promise { + if( this._config.autoFileRegistration ){ + const files = await this._preFileRegistration(text); + try { + return await this._runQueryAsync(conn, text); + } finally { + if( files.length > 0 ){ + await this.dropFiles(files); + } + } + } else { + return await this._runQueryAsync(conn, text); + } + } + private async _runQueryAsync(conn: ConnectionID, text: string): Promise { const task = new WorkerTask( WorkerRequestType.RUN_QUERY, [conn, text], ); return await this.postTask(task); } - /** Start a pending query */ public async startPendingQuery( conn: ConnectionID, text: string, allowStreamResult: boolean = false, + ): Promise { + if( this._config.autoFileRegistration ){ + const files = await this._preFileRegistration(text); + try { + return await this._startPendingQueryAsync(conn, text, allowStreamResult); + } finally { + if( files.length > 0 ){ + await this.dropFiles(files); + } + } + } else { + return await this._startPendingQueryAsync(conn, text, allowStreamResult); + } + } + private async _startPendingQueryAsync( + conn: ConnectionID, + text: string, + allowStreamResult: boolean = false, ): Promise { const task = new WorkerTask< WorkerRequestType.START_PENDING_QUERY, @@ -647,4 +688,19 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { ); await this.postTask(task); } + + private async _preFileRegistration(text: string) { + const files = [...text.matchAll(OPFS_PROTOCOL_REGEX)].map(match => match[1]); + const result: string[] = []; + for (const file of files) { + try { + await this.registerOPFSFileName(file); + result.push(file); + } catch (e) { + console.error(e); + throw new Error("file Not found:" + file); + } + } + return result; + } } diff --git a/packages/duckdb-wasm/src/parallel/async_bindings_interface.ts b/packages/duckdb-wasm/src/parallel/async_bindings_interface.ts index 97ba2b191..a7845f2bd 100644 --- a/packages/duckdb-wasm/src/parallel/async_bindings_interface.ts +++ b/packages/duckdb-wasm/src/parallel/async_bindings_interface.ts @@ -32,4 +32,7 @@ export interface AsyncDuckDBBindings { insertArrowFromIPCStream(conn: number, buffer: Uint8Array, options?: CSVInsertOptions): Promise; insertCSVFromPath(conn: number, path: string, options: CSVInsertOptions): Promise; insertJSONFromPath(conn: number, path: string, options: JSONInsertOptions): Promise; + + dropFile(name: string):Promise; + dropFiles(names?: string[]):Promise; } diff --git a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts index 3a5a8f295..d3b666ba7 100644 --- a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts +++ b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts @@ -149,7 +149,7 @@ export abstract class AsyncDuckDBDispatcher implements Logger { this.sendOK(request); break; case WorkerRequestType.DROP_FILES: - this._bindings.dropFiles(); + this._bindings.dropFiles(request.data); this.sendOK(request); break; case WorkerRequestType.FLUSH_FILES: @@ -361,7 +361,7 @@ export abstract class AsyncDuckDBDispatcher implements Logger { break; case WorkerRequestType.REGISTER_OPFS_FILE_NAME: - this._bindings.registerOPFSFileName(request.data[0]); + await this._bindings.registerOPFSFileNameAsync(request.data[0]); this.sendOK(request); break; diff --git a/packages/duckdb-wasm/src/parallel/worker_request.ts b/packages/duckdb-wasm/src/parallel/worker_request.ts index 9b9df0634..d92d13dab 100644 --- a/packages/duckdb-wasm/src/parallel/worker_request.ts +++ b/packages/duckdb-wasm/src/parallel/worker_request.ts @@ -116,7 +116,7 @@ export type WorkerRequestVariant = | WorkerRequest | WorkerRequest | WorkerRequest - | WorkerRequest + | WorkerRequest | WorkerRequest | WorkerRequest | WorkerRequest @@ -176,7 +176,7 @@ export type WorkerTaskVariant = | WorkerTask | WorkerTask | WorkerTask - | WorkerTask + | WorkerTask | WorkerTask | WorkerTask | WorkerTask diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index eaf1a0fcc..6dbb231e4 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -1,8 +1,8 @@ import * as duckdb from '../src/'; -import {LogLevel} from '../src/'; import * as arrow from 'apache-arrow'; export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): void { + const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.ERROR); let db: duckdb.AsyncDuckDB; let conn: duckdb.AsyncDuckDBConnection; @@ -11,19 +11,10 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); afterAll(async () => { - if (conn) { - await conn.close(); - } - if (db) { - await db.terminate(); - } removeFiles(); }); beforeEach(async () => { - removeFiles(); - // - const logger = new duckdb.ConsoleLogger(LogLevel.ERROR); const worker = new Worker(bundle().mainWorker!); db = new duckdb.AsyncDuckDB(logger, worker); await db.instantiate(bundle().mainModule, bundle().pthreadWorker); @@ -36,18 +27,26 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo afterEach(async () => { if (conn) { - await conn.close(); + await conn.close().catch(() => { + }); } if (db) { - await db.terminate(); + await db.reset().catch(() => { + }); + await db.terminate().catch(() => { + }); + await db.dropFiles().catch(() => { + }); } - removeFiles(); + await removeFiles(); }); describe('Load Data in OPFS', () => { it('Import Small Parquet file', async () => { - await conn.send(`CREATE TABLE stu AS SELECT * FROM "${baseDir}/uni/studenten.parquet"`); + //1. data preparation + await conn.send(`CREATE TABLE stu AS SELECT * FROM "${ baseDir }/uni/studenten.parquet"`); await conn.send(`CHECKPOINT;`); + const result = await conn.send(`SELECT matrnr FROM stu;`); const batches = []; for await (const batch of result) { @@ -58,10 +57,11 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo new Int32Array([24002, 25403, 26120, 26830, 27550, 28106, 29120, 29555]), ); }); - it('Import Larget Parquet file', async () => { - await conn.send(`CREATE TABLE lineitem AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + //1. data preparation + await conn.send(`CREATE TABLE lineitem AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`CHECKPOINT;`); + const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem;`); const batches = []; for await (const batch of result) { @@ -72,12 +72,15 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); it('Load Existing DB File', async () => { - await conn.send(`CREATE TABLE tmp AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + //1. data preparation + await conn.send(`CREATE TABLE tmp AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`CHECKPOINT;`); + await conn.close(); + await db.reset(); + await db.dropFiles(); await db.terminate(); - const logger = new duckdb.ConsoleLogger(LogLevel.ERROR); const worker = new Worker(bundle().mainWorker!); db = new duckdb.AsyncDuckDB(logger, worker); await db.instantiate(bundle().mainModule, bundle().pthreadWorker); @@ -98,16 +101,13 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Load Parquet file that are already with empty handler', async () => { //1. write to opfs - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => - res.arrayBuffer(), - ); - const opfsRoot = await navigator.storage.getDirectory(); - const fileHandle = await opfsRoot.getFileHandle('test.parquet', {create: true}); - const writable = await fileHandle.createWritable(); - await writable.write(parquetBuffer); - await writable.close(); + const fileHandler = await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/tpch/0_01/parquet/lineitem.parquet`, + path: 'test.parquet' + }); //2. handle is empty object, because worker gets a File Handle using the file name. - await db.registerFileHandle('test.parquet', null, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test.parquet', fileHandler, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + //3. data preparation await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); await conn.send(`CHECKPOINT;`); @@ -122,18 +122,14 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Load Parquet file that are already with opfs file handler in datadir', async () => { //1. write to opfs - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => - res.arrayBuffer(), - ); - const opfsRoot = await navigator.storage.getDirectory(); - const datadir = await opfsRoot.getDirectoryHandle("datadir", {create: true}); - const fileHandle = await datadir.getFileHandle('test.parquet', {create: true}); - const writable = await fileHandle.createWritable(); - await writable.write(parquetBuffer); - await writable.close(); + const fileHandler = await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/tpch/0_01/parquet/lineitem.parquet`, + path: 'datadir/test.parquet' + }); //2. handle is opfs file handler - await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); + await db.registerFileHandle('datadir/test.parquet', fileHandler, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + //3. data preparation + await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('datadir/test.parquet')`); await conn.send(`CHECKPOINT;`); const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem1;`); @@ -146,16 +142,14 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); it('Load Parquet file that are already', async () => { - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => - res.arrayBuffer(), - ); - const opfsRoot = await navigator.storage.getDirectory(); - const fileHandle = await opfsRoot.getFileHandle('test.parquet', {create: true}); - const writable = await fileHandle.createWritable(); - await writable.write(parquetBuffer); - await writable.close(); - + //1. write to opfs + const fileHandle = await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/tpch/0_01/parquet/lineitem.parquet`, + path: 'test.parquet' + }); + //2. handle is opfs file handler await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + //3. data preparation await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); await conn.send(`CHECKPOINT;`); await conn.send(`CREATE TABLE lineitem2 AS SELECT * FROM read_parquet('test.parquet')`); @@ -196,19 +190,21 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); it('Drop File + Export as CSV to OPFS + Load CSV', async () => { + //1. write to opfs const opfsRoot = await navigator.storage.getDirectory(); - const testHandle = await opfsRoot.getFileHandle('test.csv', {create: true}); - await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + const fileHandler = await opfsRoot.getFileHandle('test.csv', { create: true }); + //2. handle is opfs file handler + await db.registerFileHandle('test.csv', fileHandler, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + //3. data preparation + await conn.send(`CREATE TABLE zzz AS SELECT * FROM '${ baseDir }/tpch/0_01/parquet/lineitem.parquet'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test.csv'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'non_existing.csv'`); await conn.close(); await db.dropFile('test.csv'); await db.reset(); - await db.open({}); conn = await db.connect(); - await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test.csv', fileHandler, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test.csv';`); const batches = []; @@ -221,26 +217,29 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.dropFile('test.csv'); }); - it('Drop Files + Export as CSV to OPFS + Load CSV', async () => { + //1. write to opfs const opfsRoot = await navigator.storage.getDirectory(); - const testHandle1 = await opfsRoot.getFileHandle('test1.csv', {create: true}); - const testHandle2 = await opfsRoot.getFileHandle('test2.csv', {create: true}); - const testHandle3 = await opfsRoot.getFileHandle('test3.csv', {create: true}); + const testHandle1 = await opfsRoot.getFileHandle('test1.csv', { create: true }); + const testHandle2 = await opfsRoot.getFileHandle('test2.csv', { create: true }); + const testHandle3 = await opfsRoot.getFileHandle('test3.csv', { create: true }); + //2. handle is opfs file handler await db.registerFileHandle('test1.csv', testHandle1, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); await db.registerFileHandle('test2.csv', testHandle2, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); await db.registerFileHandle('test3.csv', testHandle3, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - - await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + //3. data preparation + await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test1.csv'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test2.csv'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test3.csv'`); await conn.close(); - await db.dropFiles(); + //4. dropFiles + await db.dropFiles(['test1.csv', 'test2.csv', 'test3.csv']); + + //5. reset await db.reset(); - await db.open({}); conn = await db.connect(); await db.registerFileHandle('test1.csv', testHandle1, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); await db.registerFileHandle('test2.csv', testHandle2, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); @@ -273,8 +272,88 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo const table3 = await new arrow.Table<{ cnt: arrow.Int }>(batches3); expect(table3.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); } + }); - await db.dropFiles(); + it('Load Parquet file when FROM clause', async () => { + //1. write to opfs + await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/tpch/0_01/parquet/lineitem.parquet`, + path: 'test.parquet' + }); + await conn.close(); + await db.reset(); + await db.dropFile('test.parquet'); + db.config.autoFileRegistration = true; + conn = await db.connect(); + //2. send query + const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'opfs://test.parquet'`); + const batches1 = []; + for await (const batch of result1) { + batches1.push(batch); + } + const table1 = await new arrow.Table<{ cnt: arrow.Int }>(batches1); + expect(table1.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + }); + + it('Load Parquet file when FROM clause + read_parquet', async () => { + //1. write to opfs + await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/uni/studenten.parquet`, + path: 'test.parquet' + }); + await conn.close(); + await db.reset(); + await db.dropFile('test.parquet'); + db.config.autoFileRegistration = true; + conn = await db.connect(); + //2. send query + const result = await conn.send(`SELECT * FROM read_parquet('opfs://test.parquet');`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.toArray()).toEqual( + new Int32Array([24002, 25403, 26120, 26830, 27550, 28106, 29120, 29555]), + ); + }); + + it('Load Parquet file with dir when FROM clause', async () => { + //1. write to opfs + await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/tpch/0_01/parquet/lineitem.parquet`, + path: 'datadir/test.parquet' + }); + await conn.close(); + await db.reset(); + await db.dropFile('datadir/test.parquet'); + db.config.autoFileRegistration = true; + conn = await db.connect(); + //2. send query + const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'opfs://datadir/test.parquet'`); + const batches1 = []; + for await (const batch of result1) { + batches1.push(batch); + } + const table1 = await new arrow.Table<{ cnt: arrow.Int }>(batches1); + expect(table1.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + }); + + it('Load Parquet file with dir when FROM clause with IO Error', async () => { + //1. write to opfs + await getOpfsFileHandlerFromUrl({ + url: `${ baseDir }/tpch/0_01/parquet/lineitem.parquet`, + path: 'datadir/test.parquet' + }); + try { + //2. send query + await expectAsync( + conn.send(`SELECT count(*)::INTEGER as cnt FROM 'opfs://datadir/test.parquet'`) + ).toBeRejectedWithError("IO Error: No files found that match the pattern \"opfs://datadir/test.parquet\""); + } finally { + await db.reset(); + await db.dropFiles(); + } }); }); @@ -304,4 +383,33 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await opfsRoot.removeEntry('datadir').catch(() => { }); } + + async function getOpfsFileHandlerFromUrl(params: { + url: string; + path: string; + }): Promise { + const PATH_SEP_REGEX = /\/|\\/; + const parquetBuffer = await fetch(params.url).then(res => + res.arrayBuffer(), + ); + const opfsRoot = await navigator.storage.getDirectory(); + let dirHandle: FileSystemDirectoryHandle = opfsRoot; + let fileName = params.path; + if (PATH_SEP_REGEX.test(params.path)) { + const folders = params.path.split(PATH_SEP_REGEX); + fileName = folders.pop()!; + if (!fileName) { + throw new Error(`Invalid path ${ params.path }`); + } + for (const folder of folders) { + dirHandle = await dirHandle.getDirectoryHandle(folder, { create: true }); + } + } + const fileHandle = await dirHandle.getFileHandle(fileName, { create: true }); + const writable = await fileHandle.createWritable(); + await writable.write(parquetBuffer); + await writable.close(); + + return fileHandle; + } } From add8fc817e807d9d04ed5f40d4bae3a8558caf0a Mon Sep 17 00:00:00 2001 From: arkw Date: Mon, 27 Jan 2025 04:01:34 +0900 Subject: [PATCH 02/16] add test for copy csv --- packages/duckdb-wasm/test/opfs.test.ts | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index 6dbb231e4..c3cc5b6b8 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -7,11 +7,11 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo let conn: duckdb.AsyncDuckDBConnection; beforeAll(async () => { - removeFiles(); + await removeFiles(); }); afterAll(async () => { - removeFiles(); + await removeFiles(); }); beforeEach(async () => { @@ -355,6 +355,22 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.dropFiles(); } }); + + it('Copy CSV to OPFS + Load CSV', async () => { + //1. data preparation + db.config.autoFileRegistration = true; + await conn.query(`COPY ( SELECT 32 AS value ) TO 'opfs://file.csv'`); + await conn.query(`COPY ( SELECT 42 AS value ) TO 'opfs://file.csv'`); + const result = await conn.send(`SELECT * FROM 'opfs://file.csv';`); + const batches = []; + for await (const batch of result) { + batches.push(batch); + } + const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); + expect(table.getChildAt(0)?.toArray()).toEqual( + new BigInt64Array([42n]), + ); + }); }); async function removeFiles() { From 64c728824d625eace93d189f6a7b2693a8c9db64 Mon Sep 17 00:00:00 2001 From: arkw Date: Wed, 19 Feb 2025 15:10:02 +0900 Subject: [PATCH 03/16] Merge branch 'develop' into feature/from_opfs_path --- packages/duckdb-wasm/src/bindings/config.ts | 11 +++++++-- .../src/parallel/async_bindings.ts | 24 ++++++++++++++----- packages/duckdb-wasm/test/opfs.test.ts | 16 +++++++++---- submodules/duckdb | 2 +- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/packages/duckdb-wasm/src/bindings/config.ts b/packages/duckdb-wasm/src/bindings/config.ts index 27389d1eb..6d7659a86 100644 --- a/packages/duckdb-wasm/src/bindings/config.ts +++ b/packages/duckdb-wasm/src/bindings/config.ts @@ -29,6 +29,13 @@ export interface DuckDBFilesystemConfig { allowFullHTTPReads?: boolean; } +export interface DuckDBOPFSConfig { + /** + * Auto Opfs File Registration + */ + autoFileRegistration?: boolean; +} + export enum DuckDBAccessMode { UNDEFINED = 0, AUTOMATIC = 1, @@ -71,7 +78,7 @@ export interface DuckDBConfig { */ customUserAgent?: string; /** - * Auto Opfs File Registration + * opfs string */ - autoFileRegistration?: boolean; + opfs?: DuckDBOPFSConfig; } diff --git a/packages/duckdb-wasm/src/parallel/async_bindings.ts b/packages/duckdb-wasm/src/parallel/async_bindings.ts index cd95549fa..7f7ebd0a4 100644 --- a/packages/duckdb-wasm/src/parallel/async_bindings.ts +++ b/packages/duckdb-wasm/src/parallel/async_bindings.ts @@ -20,7 +20,8 @@ import { WebFile } from '../bindings/web_file'; import { DuckDBDataProtocol } from '../bindings'; const TEXT_ENCODER = new TextEncoder(); -const OPFS_PROTOCOL_REGEX = /'(opfs:\/\/\S*?)'/g; +const REGEX_OPFS_FILE = /'(opfs:\/\/\S*?)'/g; +const REGEX_OPFS_PROTOCOL = /(opfs:\/\/\S*?)/g; export class AsyncDuckDB implements AsyncDuckDBBindings { /** The message handler */ @@ -368,7 +369,6 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { /** Open a new database */ public async open(config: DuckDBConfig): Promise { - config.autoFileRegistration = config.autoFileRegistration ?? false; this._config = config; const task = new WorkerTask(WorkerRequestType.OPEN, config); await this.postTask(task); @@ -404,7 +404,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { /** Run a query */ public async runQuery(conn: ConnectionID, text: string): Promise { - if( this._config.autoFileRegistration ){ + if( this.isOpenedOPFSAutoFileRegistration() ){ const files = await this._preFileRegistration(text); try { return await this._runQueryAsync(conn, text); @@ -417,6 +417,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { return await this._runQueryAsync(conn, text); } } + private async _runQueryAsync(conn: ConnectionID, text: string): Promise { const task = new WorkerTask( WorkerRequestType.RUN_QUERY, @@ -424,13 +425,14 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { ); return await this.postTask(task); } + /** Start a pending query */ public async startPendingQuery( conn: ConnectionID, text: string, allowStreamResult: boolean = false, ): Promise { - if( this._config.autoFileRegistration ){ + if( this.isOpenedOPFSAutoFileRegistration() ){ const files = await this._preFileRegistration(text); try { return await this._startPendingQueryAsync(conn, text, allowStreamResult); @@ -443,6 +445,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { return await this._startPendingQueryAsync(conn, text, allowStreamResult); } } + private async _startPendingQueryAsync( conn: ConnectionID, text: string, @@ -455,6 +458,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { >(WorkerRequestType.START_PENDING_QUERY, [conn, text, allowStreamResult]); return await this.postTask(task); } + /** Poll a pending query */ public async pollPendingQuery(conn: ConnectionID): Promise { const task = new WorkerTask( @@ -689,8 +693,16 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { await this.postTask(task); } + private isOpenedOPFSAutoFileRegistration():boolean { + let path = this.config.path ?? ""; + if( path.search(REGEX_OPFS_PROTOCOL) > -1){ + return this.config.opfs?.autoFileRegistration ?? false; + } + return false; + } + private async _preFileRegistration(text: string) { - const files = [...text.matchAll(OPFS_PROTOCOL_REGEX)].map(match => match[1]); + const files = [...text.matchAll(REGEX_OPFS_FILE)].map(match => match[1]); const result: string[] = []; for (const file of files) { try { @@ -698,7 +710,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { result.push(file); } catch (e) { console.error(e); - throw new Error("file Not found:" + file); + throw new Error("File Not found:" + file); } } return result; diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index c3cc5b6b8..e13709d58 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -283,7 +283,9 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await conn.close(); await db.reset(); await db.dropFile('test.parquet'); - db.config.autoFileRegistration = true; + db.config.opfs = { + autoFileRegistration: true + }; conn = await db.connect(); //2. send query const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'opfs://test.parquet'`); @@ -304,7 +306,9 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await conn.close(); await db.reset(); await db.dropFile('test.parquet'); - db.config.autoFileRegistration = true; + db.config.opfs = { + autoFileRegistration: true + }; conn = await db.connect(); //2. send query const result = await conn.send(`SELECT * FROM read_parquet('opfs://test.parquet');`); @@ -327,7 +331,9 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await conn.close(); await db.reset(); await db.dropFile('datadir/test.parquet'); - db.config.autoFileRegistration = true; + db.config.opfs = { + autoFileRegistration: true + }; conn = await db.connect(); //2. send query const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'opfs://datadir/test.parquet'`); @@ -358,7 +364,9 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Copy CSV to OPFS + Load CSV', async () => { //1. data preparation - db.config.autoFileRegistration = true; + db.config.opfs = { + autoFileRegistration: true + }; await conn.query(`COPY ( SELECT 32 AS value ) TO 'opfs://file.csv'`); await conn.query(`COPY ( SELECT 42 AS value ) TO 'opfs://file.csv'`); const result = await conn.send(`SELECT * FROM 'opfs://file.csv';`); diff --git a/submodules/duckdb b/submodules/duckdb index 19864453f..5f5512b82 160000 --- a/submodules/duckdb +++ b/submodules/duckdb @@ -1 +1 @@ -Subproject commit 19864453f7d0ed095256d848b46e7b8630989bac +Subproject commit 5f5512b827df6397afd31daedb4bbdee76520019 From e532a2a067c181b69584f163ce84ead964f2e2c7 Mon Sep 17 00:00:00 2001 From: arkw Date: Thu, 20 Feb 2025 08:24:19 +0900 Subject: [PATCH 04/16] add opfs_util and fix --- packages/duckdb-wasm/src/bindings/config.ts | 6 +++-- .../src/parallel/async_bindings.ts | 22 +++++++++---------- packages/duckdb-wasm/src/utils/opfs_util.ts | 10 +++++++++ packages/duckdb-wasm/test/opfs.test.ts | 8 +++---- 4 files changed, 28 insertions(+), 18 deletions(-) create mode 100644 packages/duckdb-wasm/src/utils/opfs_util.ts diff --git a/packages/duckdb-wasm/src/bindings/config.ts b/packages/duckdb-wasm/src/bindings/config.ts index 6d7659a86..a3013d19e 100644 --- a/packages/duckdb-wasm/src/bindings/config.ts +++ b/packages/duckdb-wasm/src/bindings/config.ts @@ -31,9 +31,11 @@ export interface DuckDBFilesystemConfig { export interface DuckDBOPFSConfig { /** - * Auto Opfs File Registration + * Defines how `opfs://` files are handled during SQL execution. + * - "auto": Automatically register `opfs://` files and drop them after execution. + * - "manual": Files must be manually registered and dropped. */ - autoFileRegistration?: boolean; + fileHandling?: "auto" | "manual"; } export enum DuckDBAccessMode { diff --git a/packages/duckdb-wasm/src/parallel/async_bindings.ts b/packages/duckdb-wasm/src/parallel/async_bindings.ts index 7f7ebd0a4..e2c9f3056 100644 --- a/packages/duckdb-wasm/src/parallel/async_bindings.ts +++ b/packages/duckdb-wasm/src/parallel/async_bindings.ts @@ -18,10 +18,9 @@ import { InstantiationProgress } from '../bindings/progress'; import { arrowToSQLField } from '../json_typedef'; import { WebFile } from '../bindings/web_file'; import { DuckDBDataProtocol } from '../bindings'; +import { searchOPFSFiles, isOPFSProtocol } from "../utils/opfs_util"; const TEXT_ENCODER = new TextEncoder(); -const REGEX_OPFS_FILE = /'(opfs:\/\/\S*?)'/g; -const REGEX_OPFS_PROTOCOL = /(opfs:\/\/\S*?)/g; export class AsyncDuckDB implements AsyncDuckDBBindings { /** The message handler */ @@ -404,8 +403,8 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { /** Run a query */ public async runQuery(conn: ConnectionID, text: string): Promise { - if( this.isOpenedOPFSAutoFileRegistration() ){ - const files = await this._preFileRegistration(text); + if( this.shouldOPFSFileHandling() ){ + const files = await this.registerOPFSFileFromSQL(text); try { return await this._runQueryAsync(conn, text); } finally { @@ -432,8 +431,8 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { text: string, allowStreamResult: boolean = false, ): Promise { - if( this.isOpenedOPFSAutoFileRegistration() ){ - const files = await this._preFileRegistration(text); + if( this.shouldOPFSFileHandling() ){ + const files = await this.registerOPFSFileFromSQL(text); try { return await this._startPendingQueryAsync(conn, text, allowStreamResult); } finally { @@ -693,16 +692,15 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { await this.postTask(task); } - private isOpenedOPFSAutoFileRegistration():boolean { - let path = this.config.path ?? ""; - if( path.search(REGEX_OPFS_PROTOCOL) > -1){ - return this.config.opfs?.autoFileRegistration ?? false; + private shouldOPFSFileHandling():boolean { + if( isOPFSProtocol(this.config.path ?? "")){ + return this.config.opfs?.fileHandling == "auto"; } return false; } - private async _preFileRegistration(text: string) { - const files = [...text.matchAll(REGEX_OPFS_FILE)].map(match => match[1]); + private async registerOPFSFileFromSQL(text: string) { + const files = searchOPFSFiles(text); const result: string[] = []; for (const file of files) { try { diff --git a/packages/duckdb-wasm/src/utils/opfs_util.ts b/packages/duckdb-wasm/src/utils/opfs_util.ts new file mode 100644 index 000000000..822eb4a7c --- /dev/null +++ b/packages/duckdb-wasm/src/utils/opfs_util.ts @@ -0,0 +1,10 @@ +export const REGEX_OPFS_FILE = /'(opfs:\/\/\S*?)'/g; +export const REGEX_OPFS_PROTOCOL = /(opfs:\/\/\S*?)/g; + +export function isOPFSProtocol(path: string): boolean { + return path.search(REGEX_OPFS_PROTOCOL) > -1; +} + +export function searchOPFSFiles(text: string) { + return [...text.matchAll(REGEX_OPFS_FILE)].map(match => match[1]); +} \ No newline at end of file diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index e13709d58..63686c813 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -284,7 +284,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.reset(); await db.dropFile('test.parquet'); db.config.opfs = { - autoFileRegistration: true + fileHandling: "auto" }; conn = await db.connect(); //2. send query @@ -307,7 +307,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.reset(); await db.dropFile('test.parquet'); db.config.opfs = { - autoFileRegistration: true + fileHandling: "auto" }; conn = await db.connect(); //2. send query @@ -332,7 +332,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.reset(); await db.dropFile('datadir/test.parquet'); db.config.opfs = { - autoFileRegistration: true + fileHandling: "auto" }; conn = await db.connect(); //2. send query @@ -365,7 +365,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Copy CSV to OPFS + Load CSV', async () => { //1. data preparation db.config.opfs = { - autoFileRegistration: true + fileHandling: "auto" }; await conn.query(`COPY ( SELECT 32 AS value ) TO 'opfs://file.csv'`); await conn.query(`COPY ( SELECT 42 AS value ) TO 'opfs://file.csv'`); From c743a7497435888e59130c89dec77fe84c3a29ca Mon Sep 17 00:00:00 2001 From: arkw Date: Thu, 20 Feb 2025 19:58:11 +0900 Subject: [PATCH 05/16] change space indents --- packages/duckdb-wasm/src/bindings/bindings_base.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/duckdb-wasm/src/bindings/bindings_base.ts b/packages/duckdb-wasm/src/bindings/bindings_base.ts index 34f201d3d..68975162c 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_base.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_base.ts @@ -525,7 +525,7 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { } } } - return handle; + return handle; } /** Register a file object URL async */ public async registerFileHandleAsync( @@ -656,10 +656,10 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { } /** Enable tracking of file statistics */ public registerOPFSFileName(file: string): Promise { - if (file.startsWith("opfs://")) { - return this.prepareFileHandle(file, DuckDBDataProtocol.BROWSER_FSACCESS); - } else { - throw new Error("Not an OPFS file name: " + file); + if (file.startsWith("opfs://")) { + return this.prepareFileHandle(file, DuckDBDataProtocol.BROWSER_FSACCESS); + } else { + throw new Error("Not an OPFS file name: " + file); } } public async registerOPFSFileNameAsync(file: string): Promise { From d42ff6754eec96e751cf07ab3c4264d504325145 Mon Sep 17 00:00:00 2001 From: arkw Date: Thu, 20 Feb 2025 20:03:09 +0900 Subject: [PATCH 06/16] fix registerOPFSFileName --- packages/duckdb-wasm/src/bindings/bindings_base.ts | 9 +-------- packages/duckdb-wasm/src/bindings/bindings_interface.ts | 3 +-- packages/duckdb-wasm/src/parallel/worker_dispatcher.ts | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/packages/duckdb-wasm/src/bindings/bindings_base.ts b/packages/duckdb-wasm/src/bindings/bindings_base.ts index 68975162c..cf729a2ff 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_base.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_base.ts @@ -655,14 +655,7 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { return copy; } /** Enable tracking of file statistics */ - public registerOPFSFileName(file: string): Promise { - if (file.startsWith("opfs://")) { - return this.prepareFileHandle(file, DuckDBDataProtocol.BROWSER_FSACCESS); - } else { - throw new Error("Not an OPFS file name: " + file); - } - } - public async registerOPFSFileNameAsync(file: string): Promise { + public async registerOPFSFileName(file: string): Promise { if (file.startsWith("opfs://")) { return await this.prepareFileHandle(file, DuckDBDataProtocol.BROWSER_FSACCESS); } else { diff --git a/packages/duckdb-wasm/src/bindings/bindings_interface.ts b/packages/duckdb-wasm/src/bindings/bindings_interface.ts index 004d2d46c..08f8fee47 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_interface.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_interface.ts @@ -62,8 +62,7 @@ export interface DuckDBBindings { flushFiles(): void; copyFileToPath(name: string, path: string): void; copyFileToBuffer(name: string): Uint8Array; - registerOPFSFileName(file: string): void; - registerOPFSFileNameAsync(file: string): Promise; + registerOPFSFileName(file: string): Promise; collectFileStatistics(file: string, enable: boolean): void; exportFileStatistics(file: string): FileStatistics; } diff --git a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts index d3b666ba7..db05cb50f 100644 --- a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts +++ b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts @@ -361,7 +361,7 @@ export abstract class AsyncDuckDBDispatcher implements Logger { break; case WorkerRequestType.REGISTER_OPFS_FILE_NAME: - await this._bindings.registerOPFSFileNameAsync(request.data[0]); + await this._bindings.registerOPFSFileName(request.data[0]); this.sendOK(request); break; From 2c03955e6142ef33f607ffc4a2d34e97fdb16f7d Mon Sep 17 00:00:00 2001 From: arkw Date: Thu, 20 Feb 2025 20:51:49 +0900 Subject: [PATCH 07/16] fix --- packages/duckdb-wasm/test/opfs.test.ts | 120 ++++++++++++------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index eaf1a0fcc..051a7ebf6 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -1,10 +1,21 @@ -import * as duckdb from '../src/'; -import {LogLevel} from '../src/'; +import { + AsyncDuckDB, + AsyncDuckDBConnection, + ConsoleLogger, + DuckDBAccessMode, + DuckDBBundle, + DuckDBDataProtocol, + LogLevel +} from '../src/'; import * as arrow from 'apache-arrow'; -export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): void { - let db: duckdb.AsyncDuckDB; - let conn: duckdb.AsyncDuckDBConnection; +export function testOPFS(baseDir: string, bundle: () => DuckDBBundle): void { + const logger = new ConsoleLogger(LogLevel.ERROR); + + let db: AsyncDuckDB; + let conn: AsyncDuckDBConnection; + const _ignore: () => void = () => { + }; beforeAll(async () => { removeFiles(); @@ -17,19 +28,18 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo if (db) { await db.terminate(); } - removeFiles(); + await removeFiles(); }); beforeEach(async () => { - removeFiles(); + await removeFiles(); // - const logger = new duckdb.ConsoleLogger(LogLevel.ERROR); const worker = new Worker(bundle().mainWorker!); - db = new duckdb.AsyncDuckDB(logger, worker); + db = new AsyncDuckDB(logger, worker); await db.instantiate(bundle().mainModule, bundle().pthreadWorker); await db.open({ path: 'opfs://test.db', - accessMode: duckdb.DuckDBAccessMode.READ_WRITE + accessMode: DuckDBAccessMode.READ_WRITE }); conn = await db.connect(); }); @@ -41,12 +51,12 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo if (db) { await db.terminate(); } - removeFiles(); + await removeFiles(); }); describe('Load Data in OPFS', () => { it('Import Small Parquet file', async () => { - await conn.send(`CREATE TABLE stu AS SELECT * FROM "${baseDir}/uni/studenten.parquet"`); + await conn.send(`CREATE TABLE stu AS SELECT * FROM "${ baseDir }/uni/studenten.parquet"`); await conn.send(`CHECKPOINT;`); const result = await conn.send(`SELECT matrnr FROM stu;`); const batches = []; @@ -60,7 +70,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); it('Import Larget Parquet file', async () => { - await conn.send(`CREATE TABLE lineitem AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + await conn.send(`CREATE TABLE lineitem AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`CHECKPOINT;`); const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem;`); const batches = []; @@ -72,18 +82,17 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); it('Load Existing DB File', async () => { - await conn.send(`CREATE TABLE tmp AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + await conn.send(`CREATE TABLE tmp AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`CHECKPOINT;`); await conn.close(); await db.terminate(); - const logger = new duckdb.ConsoleLogger(LogLevel.ERROR); const worker = new Worker(bundle().mainWorker!); - db = new duckdb.AsyncDuckDB(logger, worker); + db = new AsyncDuckDB(logger, worker); await db.instantiate(bundle().mainModule, bundle().pthreadWorker); await db.open({ path: 'opfs://test.db', - accessMode: duckdb.DuckDBAccessMode.READ_WRITE + accessMode: DuckDBAccessMode.READ_WRITE }); conn = await db.connect(); @@ -98,16 +107,16 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Load Parquet file that are already with empty handler', async () => { //1. write to opfs - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => + const parquetBuffer = await fetch(`${ baseDir }/tpch/0_01/parquet/lineitem.parquet`).then(res => res.arrayBuffer(), ); const opfsRoot = await navigator.storage.getDirectory(); - const fileHandle = await opfsRoot.getFileHandle('test.parquet', {create: true}); + const fileHandle = await opfsRoot.getFileHandle('test.parquet', { create: true }); const writable = await fileHandle.createWritable(); await writable.write(parquetBuffer); await writable.close(); //2. handle is empty object, because worker gets a File Handle using the file name. - await db.registerFileHandle('test.parquet', null, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test.parquet', null, DuckDBDataProtocol.BROWSER_FSACCESS, true); await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); await conn.send(`CHECKPOINT;`); @@ -122,17 +131,17 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Load Parquet file that are already with opfs file handler in datadir', async () => { //1. write to opfs - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => + const parquetBuffer = await fetch(`${ baseDir }/tpch/0_01/parquet/lineitem.parquet`).then(res => res.arrayBuffer(), ); const opfsRoot = await navigator.storage.getDirectory(); - const datadir = await opfsRoot.getDirectoryHandle("datadir", {create: true}); - const fileHandle = await datadir.getFileHandle('test.parquet', {create: true}); + const datadir = await opfsRoot.getDirectoryHandle("datadir", { create: true }); + const fileHandle = await datadir.getFileHandle('test.parquet', { create: true }); const writable = await fileHandle.createWritable(); await writable.write(parquetBuffer); await writable.close(); //2. handle is opfs file handler - await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test.parquet', fileHandle, DuckDBDataProtocol.BROWSER_FSACCESS, true); await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); await conn.send(`CHECKPOINT;`); @@ -146,16 +155,16 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); it('Load Parquet file that are already', async () => { - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => + const parquetBuffer = await fetch(`${ baseDir }/tpch/0_01/parquet/lineitem.parquet`).then(res => res.arrayBuffer(), ); const opfsRoot = await navigator.storage.getDirectory(); - const fileHandle = await opfsRoot.getFileHandle('test.parquet', {create: true}); + const fileHandle = await opfsRoot.getFileHandle('test.parquet', { create: true }); const writable = await fileHandle.createWritable(); await writable.write(parquetBuffer); await writable.close(); - await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test.parquet', fileHandle, DuckDBDataProtocol.BROWSER_FSACCESS, true); await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); await conn.send(`CHECKPOINT;`); await conn.send(`CREATE TABLE lineitem2 AS SELECT * FROM read_parquet('test.parquet')`); @@ -197,9 +206,9 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Drop File + Export as CSV to OPFS + Load CSV', async () => { const opfsRoot = await navigator.storage.getDirectory(); - const testHandle = await opfsRoot.getFileHandle('test.csv', {create: true}); - await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + const testHandle = await opfsRoot.getFileHandle('test.csv', { create: true }); + await db.registerFileHandle('test.csv', testHandle, DuckDBDataProtocol.BROWSER_FSACCESS, true); + await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test.csv'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'non_existing.csv'`); await conn.close(); @@ -208,7 +217,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.open({}); conn = await db.connect(); - await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test.csv', testHandle, DuckDBDataProtocol.BROWSER_FSACCESS, true); const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test.csv';`); const batches = []; @@ -224,14 +233,14 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo it('Drop Files + Export as CSV to OPFS + Load CSV', async () => { const opfsRoot = await navigator.storage.getDirectory(); - const testHandle1 = await opfsRoot.getFileHandle('test1.csv', {create: true}); - const testHandle2 = await opfsRoot.getFileHandle('test2.csv', {create: true}); - const testHandle3 = await opfsRoot.getFileHandle('test3.csv', {create: true}); - await db.registerFileHandle('test1.csv', testHandle1, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await db.registerFileHandle('test2.csv', testHandle2, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await db.registerFileHandle('test3.csv', testHandle3, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - - await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + const testHandle1 = await opfsRoot.getFileHandle('test1.csv', { create: true }); + const testHandle2 = await opfsRoot.getFileHandle('test2.csv', { create: true }); + const testHandle3 = await opfsRoot.getFileHandle('test3.csv', { create: true }); + await db.registerFileHandle('test1.csv', testHandle1, DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test2.csv', testHandle2, DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test3.csv', testHandle3, DuckDBDataProtocol.BROWSER_FSACCESS, true); + + await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${ baseDir }/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test1.csv'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test2.csv'`); await conn.send(`COPY (SELECT * FROM zzz) TO 'test3.csv'`); @@ -242,9 +251,9 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo await db.open({}); conn = await db.connect(); - await db.registerFileHandle('test1.csv', testHandle1, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await db.registerFileHandle('test2.csv', testHandle2, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await db.registerFileHandle('test3.csv', testHandle3, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test1.csv', testHandle1, DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test2.csv', testHandle2, DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test3.csv', testHandle3, DuckDBDataProtocol.BROWSER_FSACCESS, true); { const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test1.csv';`); @@ -280,28 +289,19 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo async function removeFiles() { const opfsRoot = await navigator.storage.getDirectory(); - await opfsRoot.removeEntry('test.db').catch(() => { - }); - await opfsRoot.removeEntry('test.db.wal').catch(() => { - }); - await opfsRoot.removeEntry('test.csv').catch(() => { - }); - await opfsRoot.removeEntry('test1.csv').catch(() => { - }); - await opfsRoot.removeEntry('test2.csv').catch(() => { - }); - await opfsRoot.removeEntry('test3.csv').catch(() => { - }); - await opfsRoot.removeEntry('test.parquet').catch(() => { - }); + await opfsRoot.removeEntry('test.db').catch(_ignore); + await opfsRoot.removeEntry('test.db.wal').catch(_ignore); + await opfsRoot.removeEntry('test.csv').catch(_ignore); + await opfsRoot.removeEntry('test1.csv').catch(_ignore); + await opfsRoot.removeEntry('test2.csv').catch(_ignore); + await opfsRoot.removeEntry('test3.csv').catch(_ignore); + await opfsRoot.removeEntry('test.parquet').catch(_ignore); try { const datadir = await opfsRoot.getDirectoryHandle('datadir'); - datadir.removeEntry('test.parquet').catch(() => { - }); + datadir.removeEntry('test.parquet').catch(_ignore); } catch (e) { // } - await opfsRoot.removeEntry('datadir').catch(() => { - }); + await opfsRoot.removeEntry('datadir').catch(_ignore); } } From 3a768f55ca8709f0aabb0309b05c3df3a5840b90 Mon Sep 17 00:00:00 2001 From: arkw Date: Thu, 20 Feb 2025 20:56:02 +0900 Subject: [PATCH 08/16] fix --- packages/duckdb-wasm/test/opfs.test.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index 051a7ebf6..37387cfe1 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -14,8 +14,6 @@ export function testOPFS(baseDir: string, bundle: () => DuckDBBundle): void { let db: AsyncDuckDB; let conn: AsyncDuckDBConnection; - const _ignore: () => void = () => { - }; beforeAll(async () => { removeFiles(); @@ -305,3 +303,6 @@ export function testOPFS(baseDir: string, bundle: () => DuckDBBundle): void { await opfsRoot.removeEntry('datadir').catch(_ignore); } } + +//ignore block +const _ignore: () => void = () => {}; From d45a2343f4a764eced83a61a72e685d1246ab08a Mon Sep 17 00:00:00 2001 From: arkw Date: Wed, 19 Feb 2025 16:00:22 +0900 Subject: [PATCH 09/16] remove patch --- patches/duckdb/binary_executor.patch | 92 ---------------------------- 1 file changed, 92 deletions(-) delete mode 100644 patches/duckdb/binary_executor.patch diff --git a/patches/duckdb/binary_executor.patch b/patches/duckdb/binary_executor.patch deleted file mode 100644 index d93319cbc..000000000 --- a/patches/duckdb/binary_executor.patch +++ /dev/null @@ -1,92 +0,0 @@ -diff --git a/src/include/duckdb/common/vector_operations/binary_executor.hpp b/src/include/duckdb/common/vector_operations/binary_executor.hpp -index 55c10bb289..c5f57edabf 100644 ---- a/src/include/duckdb/common/vector_operations/binary_executor.hpp -+++ b/src/include/duckdb/common/vector_operations/binary_executor.hpp -@@ -381,6 +381,8 @@ public: - } - } - -+#define DUCKDB_SMALLER_BINARY -+ - template - static idx_t SelectFlat(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel) { -@@ -417,14 +419,22 @@ public: - ldata, rdata, sel, count, combined_mask, true_sel, false_sel); - } - } -- -+#ifndef DUCKDB_SMALLER_BINARY - template -+#else -+ template -+#endif - static inline idx_t - SelectGenericLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lvalidity, - ValidityMask &rvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { - idx_t true_count = 0, false_count = 0; -+#ifdef DUCKDB_SMALLER_BINARY -+ const bool HAS_TRUE_SEL = true_sel; -+ const bool HAS_FALSE_SEL = false_sel; -+ const bool NO_NULL = false; -+#endif - for (idx_t i = 0; i < count; i++) { - auto result_idx = result_sel->get_index(i); - auto lindex = lsel->get_index(i); -@@ -452,6 +462,7 @@ public: - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lvalidity, - ValidityMask &rvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { -+#ifndef DUCKDB_SMALLER_BINARY - if (true_sel && false_sel) { - return SelectGenericLoop( - ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); -@@ -463,6 +474,10 @@ public: - return SelectGenericLoop( - ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); - } -+#else -+ return SelectGenericLoop(ldata, rdata, lsel, rsel, result_sel, count, lvalidity, -+ rvalidity, true_sel, false_sel); -+#endif - } - - template -@@ -471,10 +486,13 @@ public: - const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, - const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lvalidity, - ValidityMask &rvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { -+#ifndef DUCKDB_SMALLER_BINARY - if (!lvalidity.AllValid() || !rvalidity.AllValid()) { - return SelectGenericLoopSelSwitch( - ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); -- } else { -+ } else -+#endif -+ { - return SelectGenericLoopSelSwitch( - ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); - } -@@ -502,6 +520,7 @@ public: - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && - right.GetVectorType() == VectorType::CONSTANT_VECTOR) { - return SelectConstant(left, right, sel, count, true_sel, false_sel); -+#ifndef DUCKDB_SMALLER_BINARY - } else if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && - right.GetVectorType() == VectorType::FLAT_VECTOR) { - return SelectFlat(left, right, sel, count, true_sel, false_sel); -@@ -511,10 +530,12 @@ public: - } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && - right.GetVectorType() == VectorType::FLAT_VECTOR) { - return SelectFlat(left, right, sel, count, true_sel, false_sel); -+#endif - } else { - return SelectGeneric(left, right, sel, count, true_sel, false_sel); - } - } -+#undef DUCKDB_SMALLER_BINARY - }; - - } // namespace duckdb From 744d91055c0fd9e224169357775328a539f1feee Mon Sep 17 00:00:00 2001 From: arkw Date: Mon, 3 Mar 2025 09:53:24 +0900 Subject: [PATCH 10/16] add fix --- lib/src/webdb_api.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/src/webdb_api.cc b/lib/src/webdb_api.cc index dbe5b721b..845e96df5 100644 --- a/lib/src/webdb_api.cc +++ b/lib/src/webdb_api.cc @@ -1,4 +1,5 @@ #include +#include #include #include @@ -94,15 +95,15 @@ void duckdb_web_fs_drop_file(WASMResponse* packed, const char* file_name) { GET_WEBDB(*packed); WASMResponseBuffer::Get().Store(*packed, webdb.DropFile(file_name)); } -/// Drop a file +/// Drop a files void duckdb_web_fs_drop_files(WASMResponse* packed, const char** names, int name_count) { GET_WEBDB(*packed); - if (name_count == 0) { + if (name_count == 0 || names == NULL) { WASMResponseBuffer::Get().Store(*packed, webdb.DropFiles()); } else { for (int i = 0; i < name_count; i++) { const char* name = names[i]; - if (name == nullptr) { + if (name == NULL) { std::cerr << "Error: NULL pointer detected at index " << i << std::endl; continue; } From 801e69c485c438bc1feca62ca287ef9f3e06dcd0 Mon Sep 17 00:00:00 2001 From: arkw Date: Mon, 3 Mar 2025 10:14:26 +0900 Subject: [PATCH 11/16] fix(format) --- lib/src/webdb_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/webdb_api.cc b/lib/src/webdb_api.cc index 845e96df5..c8377b211 100644 --- a/lib/src/webdb_api.cc +++ b/lib/src/webdb_api.cc @@ -1,5 +1,5 @@ -#include #include +#include #include #include From ac4dc128cbce7beaa1cef06a6771a2ea72305514 Mon Sep 17 00:00:00 2001 From: arkw Date: Mon, 3 Mar 2025 19:48:16 +0900 Subject: [PATCH 12/16] fix: mkdirs --- packages/duckdb-wasm/src/bindings/runtime_browser.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/duckdb-wasm/src/bindings/runtime_browser.ts b/packages/duckdb-wasm/src/bindings/runtime_browser.ts index 0b4ebedca..d5f39ee05 100644 --- a/packages/duckdb-wasm/src/bindings/runtime_browser.ts +++ b/packages/duckdb-wasm/src/bindings/runtime_browser.ts @@ -129,11 +129,14 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { let fileName = opfsPath; if (PATH_SEP_REGEX.test(opfsPath)) { const folders = opfsPath.split(PATH_SEP_REGEX); - fileName = folders.pop()!; + if (folders.length === 0) { + throw new Error(`Invalid path ${opfsPath}`); + } + fileName = folders[folders.length - 1]; if (!fileName) { - throw new Error(`Invalid path ${path}`); + throw new Error(`Invalid path ${opfsPath}. File Not Found.`); } - // mkdir -p + folders.pop(); for (const folder of folders) { dirHandle = await dirHandle.getDirectoryHandle(folder, { create: true }); } From 840a4858e8e614cb88471850328a74898ed46deb Mon Sep 17 00:00:00 2001 From: arkw Date: Mon, 3 Mar 2025 09:53:24 +0900 Subject: [PATCH 13/16] fix: dropFiles --- lib/src/webdb_api.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/src/webdb_api.cc b/lib/src/webdb_api.cc index dbe5b721b..c8377b211 100644 --- a/lib/src/webdb_api.cc +++ b/lib/src/webdb_api.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -94,15 +95,15 @@ void duckdb_web_fs_drop_file(WASMResponse* packed, const char* file_name) { GET_WEBDB(*packed); WASMResponseBuffer::Get().Store(*packed, webdb.DropFile(file_name)); } -/// Drop a file +/// Drop a files void duckdb_web_fs_drop_files(WASMResponse* packed, const char** names, int name_count) { GET_WEBDB(*packed); - if (name_count == 0) { + if (name_count == 0 || names == NULL) { WASMResponseBuffer::Get().Store(*packed, webdb.DropFiles()); } else { for (int i = 0; i < name_count; i++) { const char* name = names[i]; - if (name == nullptr) { + if (name == NULL) { std::cerr << "Error: NULL pointer detected at index " << i << std::endl; continue; } From 04610854cef8a1f5205959f34595d8de312e13ee Mon Sep 17 00:00:00 2001 From: arkw Date: Thu, 6 Mar 2025 09:43:40 +0900 Subject: [PATCH 14/16] Bump to DuckDB v1.2.1 --- submodules/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/duckdb b/submodules/duckdb index 5f5512b82..8e52ec439 160000 --- a/submodules/duckdb +++ b/submodules/duckdb @@ -1 +1 @@ -Subproject commit 5f5512b827df6397afd31daedb4bbdee76520019 +Subproject commit 8e52ec43959ab363643d63cb78ee214577111da4 From e3b284bddb85c1163e8d6d1c80fd9aa4e4be9b91 Mon Sep 17 00:00:00 2001 From: arikawa Date: Sun, 20 Apr 2025 16:21:49 +0900 Subject: [PATCH 15/16] Bump duckdb to 7c039464e4 --- submodules/duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/duckdb b/submodules/duckdb index 5f5512b82..7c039464e 160000 --- a/submodules/duckdb +++ b/submodules/duckdb @@ -1 +1 @@ -Subproject commit 5f5512b827df6397afd31daedb4bbdee76520019 +Subproject commit 7c039464e452ddc3330e2691d3fa6d305521d09b From 8b9d0577cf3f4f698b3866b3f4f11b43ba648e13 Mon Sep 17 00:00:00 2001 From: arikawa Date: Mon, 21 Apr 2025 14:51:55 +0900 Subject: [PATCH 16/16] del: no need patch --- patches/duckdb/extension_install_rework.patch | 9 --------- 1 file changed, 9 deletions(-) diff --git a/patches/duckdb/extension_install_rework.patch b/patches/duckdb/extension_install_rework.patch index 43e015f44..b3f0d7db9 100644 --- a/patches/duckdb/extension_install_rework.patch +++ b/patches/duckdb/extension_install_rework.patch @@ -107,15 +107,6 @@ index e8ab595ab0..fb3e6371a3 100644 #ifdef WASM_LOADABLE_EXTENSIONS // Install is currently a no-op return nullptr; -@@ -209,7 +215,7 @@ string ExtensionHelper::ExtensionUrlTemplate(optional_ptr