From 01a57bfebc17ed00aa7a703b7a0ecae79d4a41d4 Mon Sep 17 00:00:00 2001
From: Hans <hans.chen@bricks.tools>
Date: Fri, 14 Mar 2025 17:16:44 +0800
Subject: [PATCH 1/2] Support load tokenizer from subfolder

---
 src/pipelines.js  | 9 ++++++---
 src/tokenizers.js | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/pipelines.js b/src/pipelines.js
index afb627a4a..8b1b663b6 100644
--- a/src/pipelines.js
+++ b/src/pipelines.js
@@ -3399,7 +3399,7 @@ export async function pipeline(
         revision = 'main',
         device = null,
         dtype = null,
-        subfolder = 'onnx',
+        subfolder = null,
         use_external_data_format = null,
         model_file_name = null,
         session_options = {},
@@ -3466,6 +3466,7 @@ export async function pipeline(
  * @private
  */
 async function loadItems(mapping, model, pretrainedOptions) {
+    const { subfolder, ...rest } = pretrainedOptions;
 
     const result = Object.create(null);
 
@@ -3474,6 +3475,8 @@ async function loadItems(mapping, model, pretrainedOptions) {
     for (const [name, cls] of mapping.entries()) {
         if (!cls) continue;
 
+        const options = name === 'model' ? { ...rest, subfolder: subfolder ?? 'onnx' } : pretrainedOptions;
+
         /**@type {Promise} */
         let promise;
         if (Array.isArray(cls)) {
@@ -3487,7 +3490,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
                         return;
                     }
                     try {
-                        resolve(await c.from_pretrained(model, pretrainedOptions));
+                        resolve(await c.from_pretrained(model, options));
                         return;
                     } catch (err) {
                         if (err.message?.includes('Unsupported model type')) {
@@ -3506,7 +3509,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
                 reject(e);
             })
         } else {
-            promise = cls.from_pretrained(model, pretrainedOptions);
+            promise = cls.from_pretrained(model, options);
         }
 
         result[name] = promise;
diff --git a/src/tokenizers.js b/src/tokenizers.js
index 83e33cc52..819ee0c56 100644
--- a/src/tokenizers.js
+++ b/src/tokenizers.js
@@ -65,8 +65,8 @@ import {
 async function loadTokenizer(pretrained_model_name_or_path, options) {
 
     const info = await Promise.all([
-        getModelJSON(pretrained_model_name_or_path, 'tokenizer.json', true, options),
-        getModelJSON(pretrained_model_name_or_path, 'tokenizer_config.json', true, options),
+        getModelJSON(pretrained_model_name_or_path, `${options.subfolder ?? ''}/tokenizer.json`, true, options),
+        getModelJSON(pretrained_model_name_or_path, `${options.subfolder ?? ''}/tokenizer_config.json`, true, options),
     ])
 
     // Override legacy option if `options.legacy` is not null

From c75499cbbd60a883898bc3590360ba4c0753e1d5 Mon Sep 17 00:00:00 2001
From: Hans <hans.chen@bricks.tools>
Date: Thu, 3 Apr 2025 15:57:20 +0800
Subject: [PATCH 2/2] prevent `subfolder` set false

---
 src/tokenizers.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tokenizers.js b/src/tokenizers.js
index 819ee0c56..7cb191b79 100644
--- a/src/tokenizers.js
+++ b/src/tokenizers.js
@@ -65,8 +65,8 @@ import {
 async function loadTokenizer(pretrained_model_name_or_path, options) {
 
     const info = await Promise.all([
-        getModelJSON(pretrained_model_name_or_path, `${options.subfolder ?? ''}/tokenizer.json`, true, options),
-        getModelJSON(pretrained_model_name_or_path, `${options.subfolder ?? ''}/tokenizer_config.json`, true, options),
+        getModelJSON(pretrained_model_name_or_path, `${options.subfolder || ''}/tokenizer.json`, true, options),
+        getModelJSON(pretrained_model_name_or_path, `${options.subfolder || ''}/tokenizer_config.json`, true, options),
     ])
 
     // Override legacy option if `options.legacy` is not null