Skip to content

Commit d3157b4

Browse files
laipz8200Yeuoly
andauthored
feat(large_language_model): Adds plugin-based token counting configuration option (langgenius#17706)
Signed-off-by: -LAN- <[email protected]> Co-authored-by: Yeuoly <[email protected]>
1 parent 8b3be42 commit d3157b4

File tree

10 files changed

+32
-60
lines changed

10 files changed

+32
-60
lines changed

api/.env.example

+1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
326326
MULTIMODAL_SEND_FORMAT=base64
327327
PROMPT_GENERATION_MAX_TOKENS=512
328328
CODE_GENERATION_MAX_TOKENS=1024
329+
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
329330

330331
# Mail configuration, support: resend, smtp
331332
MAIL_TYPE=

api/configs/feature/__init__.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -442,14 +442,19 @@ class LoggingConfig(BaseSettings):
442442

443443
class ModelLoadBalanceConfig(BaseSettings):
444444
"""
445-
Configuration for model load balancing
445+
Configuration for model load balancing and token counting
446446
"""
447447

448448
MODEL_LB_ENABLED: bool = Field(
449449
description="Enable or disable load balancing for models",
450450
default=False,
451451
)
452452

453+
PLUGIN_BASED_TOKEN_COUNTING_ENABLED: bool = Field(
454+
description="Enable or disable plugin based token counting. If disabled, token counting will return 0.",
455+
default=False,
456+
)
457+
453458

454459
class BillingConfig(BaseSettings):
455460
"""

api/core/app/apps/agent_chat/app_runner.py

-14
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,6 @@ def run(
5353
query = application_generate_entity.query
5454
files = application_generate_entity.files
5555

56-
# Pre-calculate the number of tokens of the prompt messages,
57-
# and return the rest number of tokens by model context token size limit and max token size limit.
58-
# If the rest number of tokens is not enough, raise exception.
59-
# Include: prompt template, inputs, query(optional), files(optional)
60-
# Not Include: memory, external data, dataset context
61-
self.get_pre_calculate_rest_tokens(
62-
app_record=app_record,
63-
model_config=application_generate_entity.model_conf,
64-
prompt_template_entity=app_config.prompt_template,
65-
inputs=dict(inputs),
66-
files=list(files),
67-
query=query,
68-
)
69-
7056
memory = None
7157
if application_generate_entity.conversation_id:
7258
# get memory of conversation (read-only)

api/core/app/apps/chat/app_runner.py

-14
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,6 @@ def run(
6161
)
6262
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
6363

64-
# Pre-calculate the number of tokens of the prompt messages,
65-
# and return the rest number of tokens by model context token size limit and max token size limit.
66-
# If the rest number of tokens is not enough, raise exception.
67-
# Include: prompt template, inputs, query(optional), files(optional)
68-
# Not Include: memory, external data, dataset context
69-
self.get_pre_calculate_rest_tokens(
70-
app_record=app_record,
71-
model_config=application_generate_entity.model_conf,
72-
prompt_template_entity=app_config.prompt_template,
73-
inputs=inputs,
74-
files=files,
75-
query=query,
76-
)
77-
7864
memory = None
7965
if application_generate_entity.conversation_id:
8066
# get memory of conversation (read-only)

api/core/app/apps/completion/app_runner.py

-14
Original file line numberDiff line numberDiff line change
@@ -54,20 +54,6 @@ def run(
5454
)
5555
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
5656

57-
# Pre-calculate the number of tokens of the prompt messages,
58-
# and return the rest number of tokens by model context token size limit and max token size limit.
59-
# If the rest number of tokens is not enough, raise exception.
60-
# Include: prompt template, inputs, query(optional), files(optional)
61-
# Not Include: memory, external data, dataset context
62-
self.get_pre_calculate_rest_tokens(
63-
app_record=app_record,
64-
model_config=application_generate_entity.model_conf,
65-
prompt_template_entity=app_config.prompt_template,
66-
inputs=inputs,
67-
files=files,
68-
query=query,
69-
)
70-
7157
# organize all inputs and template to prompt messages
7258
# Include: prompt template, inputs, query(optional), files(optional)
7359
prompt_messages, stop = self.organize_prompt_messages(

api/core/model_runtime/docs/en_US/customizable_model_scale_out.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[Pr
192192
```
193193

194194

195-
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
195+
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens and ensure environment variable `PLUGIN_BASED_TOKEN_COUNTING_ENABLED` is set to `true`, This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
196196

197197
- Model Credentials Validation
198198

api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ provider_credential_schema:
179179
"""
180180
```
181181

182-
有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。
182+
有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,并确保环境变量`PLUGIN_BASED_TOKEN_COUNTING_ENABLED`设置为`true`,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。
183183

184184
- 模型凭据校验
185185

api/core/model_runtime/model_providers/__base/large_language_model.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -295,18 +295,20 @@ def get_num_tokens(
295295
:param tools: tools for tool calling
296296
:return:
297297
"""
298-
plugin_model_manager = PluginModelManager()
299-
return plugin_model_manager.get_llm_num_tokens(
300-
tenant_id=self.tenant_id,
301-
user_id="unknown",
302-
plugin_id=self.plugin_id,
303-
provider=self.provider_name,
304-
model_type=self.model_type.value,
305-
model=model,
306-
credentials=credentials,
307-
prompt_messages=prompt_messages,
308-
tools=tools,
309-
)
298+
if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED:
299+
plugin_model_manager = PluginModelManager()
300+
return plugin_model_manager.get_llm_num_tokens(
301+
tenant_id=self.tenant_id,
302+
user_id="unknown",
303+
plugin_id=self.plugin_id,
304+
provider=self.provider_name,
305+
model_type=self.model_type.value,
306+
model=model,
307+
credentials=credentials,
308+
prompt_messages=prompt_messages,
309+
tools=tools,
310+
)
311+
return 0
310312

311313
def _calc_response_usage(
312314
self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int

docker/.env.example

+8-3
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
7575

7676
# Password for admin user initialization.
7777
# If left unset, admin user will not be prompted for a password
78-
# when creating the initial admin account.
78+
# when creating the initial admin account.
7979
# The length of the password cannot exceed 30 characters.
8080
INIT_PASSWORD=
8181

@@ -605,17 +605,22 @@ SCARF_NO_ANALYTICS=true
605605
# ------------------------------
606606

607607
# The maximum number of tokens allowed for prompt generation.
608-
# This setting controls the upper limit of tokens that can be used by the LLM
608+
# This setting controls the upper limit of tokens that can be used by the LLM
609609
# when generating a prompt in the prompt generation tool.
610610
# Default: 512 tokens.
611611
PROMPT_GENERATION_MAX_TOKENS=512
612612

613613
# The maximum number of tokens allowed for code generation.
614-
# This setting controls the upper limit of tokens that can be used by the LLM
614+
# This setting controls the upper limit of tokens that can be used by the LLM
615615
# when generating code in the code generation tool.
616616
# Default: 1024 tokens.
617617
CODE_GENERATION_MAX_TOKENS=1024
618618

619+
# Enable or disable plugin based token counting. If disabled, token counting will return 0.
620+
# This can improve performance by skipping token counting operations.
621+
# Default: false (disabled).
622+
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
623+
619624
# ------------------------------
620625
# Multi-modal Configuration
621626
# ------------------------------

docker/docker-compose.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ x-shared-env: &shared-api-worker-env
276276
SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true}
277277
PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
278278
CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
279+
PLUGIN_BASED_TOKEN_COUNTING_ENABLED: ${PLUGIN_BASED_TOKEN_COUNTING_ENABLED:-false}
279280
MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64}
280281
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
281282
UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}

0 commit comments

Comments
 (0)