Skip to content

Commit 93c23d3

Browse files
committed
优化日志控制
1 parent 3b64bda commit 93c23d3

20 files changed

+38
-55
lines changed

gpt_server/model_backend/lmdeploy_backend.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
"lmdeploy-pytorch": "pytorch", # pytorch后端
2525
"lmdeploy-turbomind": "turbomind", # turbomind后端
2626
}
27+
from lmdeploy.utils import get_logger
28+
29+
get_logger("lmdeploy").setLevel("WARNING")
30+
os.environ["TM_LOG_LEVEL"] = "ERROR"
2731

2832

2933
def is_stop(output: str, stop_str: str):
@@ -126,7 +130,6 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
126130
skip_special_tokens=True,
127131
response_format=params["response_format"],
128132
)
129-
logger.info(f"request_id {int(request_id)}")
130133
if params.get("tools", None) or is_messages_with_tool(messages=messages):
131134
messages = prompt or messages # 解决lmdeploy 的提示模板不支持 tools
132135
if self.messages_type_select:
@@ -158,7 +161,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
158161
"usage": usage,
159162
"finish_reason": request_output.finish_reason,
160163
}
161-
164+
162165
if reasoning_parser_type:
163166
reasoning_parser = None
164167
delta_token_ids = (

gpt_server/model_worker/baichuan.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,10 @@ def __init__(
8484
self.stop = [
8585
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
8686
]
87-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
87+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
8888

8989
async def generate_stream_gate(self, params):
9090
self.call_ct += 1
91-
logger.info(f"params {params}")
92-
logger.info(f"worker_id: {self.worker_id}")
9391
try:
9492
messages = params["messages"]
9593
if isinstance(messages, list):

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
from typing import List
33
import json
4+
import sys
45
from abc import ABC, abstractmethod
56
from fastapi import BackgroundTasks, Request, FastAPI
67
from fastapi.responses import JSONResponse, StreamingResponse
@@ -20,6 +21,9 @@
2021

2122
worker = None
2223
app = FastAPI()
24+
logger.remove(0)
25+
log_level = os.getenv("log_level", "WARNING")
26+
logger.add(sys.stderr, level=log_level)
2327

2428

2529
def get_context_length_(config):
@@ -220,6 +224,8 @@ def run(cls):
220224
parser.add_argument("--kv_cache_quant_policy", type=str, default="0")
221225
# vad_model
222226
parser.add_argument("--vad_model", type=str, default="")
227+
# log_level
228+
parser.add_argument("--log_level", type=str, default="WARNING")
223229
args = parser.parse_args()
224230
os.environ["num_gpus"] = str(args.num_gpus)
225231
if args.backend == "vllm":
@@ -244,6 +250,7 @@ def run(cls):
244250
os.environ["gpu_memory_utilization"] = args.gpu_memory_utilization
245251
os.environ["kv_cache_quant_policy"] = args.kv_cache_quant_policy
246252
os.environ["dtype"] = args.dtype
253+
os.environ["log_level"] = args.log_level
247254

248255
host = args.host
249256
controller_address = args.controller_address
@@ -305,6 +312,7 @@ async def api_generate_stream(request: Request):
305312
params["request_id"] = request_id
306313
params["request"] = request
307314
params.pop("prompt")
315+
logger.info(f"params {params}")
308316
generator = worker.generate_stream_gate(params)
309317
background_tasks = create_background_tasks(request_id)
310318
return StreamingResponse(generator, background=background_tasks)

gpt_server/model_worker/chatglm.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(
3838
self.stop_words_ids.append(self.tokenizer.convert_tokens_to_ids(i))
3939
except Exception as e:
4040
pass
41-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
41+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
4242

4343
def build_chat_input(self, query, history=None, role="user"):
4444
if history is None:
@@ -60,8 +60,6 @@ def build_chat_input(self, query, history=None, role="user"):
6060

6161
async def generate_stream_gate(self, params):
6262
self.call_ct += 1
63-
logger.info(f"params {params}")
64-
logger.info(f"worker_id: {self.worker_id}")
6563
try:
6664
# ----------------添加对工具的支持-----------------------------------
6765
messages = add_tools2messages(params=params, model_adapter="chatglm4")

gpt_server/model_worker/deepseek.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,10 @@ def __init__(
3636
self.stop = [
3737
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3838
]
39-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
39+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
4040

4141
async def generate_stream_gate(self, params):
4242
self.call_ct += 1
43-
logger.info(f"params {params}")
44-
logger.info(f"worker_id: {self.worker_id}")
4543
try:
4644
messages = params["messages"]
4745
if not self.vision_config:

gpt_server/model_worker/embedding.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
device = "cpu"
3232
else:
3333
device = "cuda"
34-
logger.info(f"使用{device}加载...")
34+
logger.warning(f"使用{device}加载...")
3535
model_kwargs = {"device": device}
3636
self.encode_kwargs = {"normalize_embeddings": True, "batch_size": 64}
3737
self.mode = "embedding"
@@ -44,16 +44,14 @@ def __init__(
4444
self.client = sentence_transformers.CrossEncoder(
4545
model_name=model_path, **model_kwargs
4646
)
47-
logger.info("正在使用 rerank 模型...")
47+
logger.warning("正在使用 rerank 模型...")
4848
elif self.mode == "embedding":
4949
self.client = sentence_transformers.SentenceTransformer(
5050
model_path, **model_kwargs
5151
)
52-
logger.info("正在使用 embedding 模型...")
52+
logger.warning("正在使用 embedding 模型...")
5353

5454
async def get_embeddings(self, params):
55-
logger.info(f"params {params}")
56-
logger.info(f"worker_id: {self.worker_id}")
5755
self.call_ct += 1
5856
ret = {"embedding": [], "token_num": 0}
5957
texts = params["input"]

gpt_server/model_worker/embedding_infinity.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(
4545
device = "cpu"
4646
else:
4747
device = "cuda"
48-
logger.info(f"使用{device}加载...")
48+
logger.warning(f"使用{device}加载...")
4949
model_type = getattr(self.model_config, "model_type", None)
5050
bettertransformer = True
5151
if model_type is not None and "deberta" in model_type:
@@ -71,15 +71,13 @@ def __init__(
7171
self.engine: AsyncEmbeddingEngine = AsyncEngineArray.from_args([engine_args])[0]
7272
loop = asyncio.get_running_loop()
7373
loop.create_task(self.engine.astart())
74-
logger.info(f"模型:{model_names[0]}")
75-
logger.info(f"正在使用 {self.mode} 模型...")
74+
logger.warning(f"模型:{model_names[0]}")
75+
logger.warning(f"正在使用 {self.mode} 模型...")
7676

7777
async def astart(self):
7878
await self.engine.astart()
7979

8080
async def get_embeddings(self, params):
81-
logger.info(f"params {params}")
82-
logger.info(f"worker_id: {self.worker_id}")
8381
self.call_ct += 1
8482
ret = {"embedding": [], "token_num": 0}
8583
texts: list = params["input"]

gpt_server/model_worker/embedding_v2.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ def __init__(
5656
self.client = sentence_transformers.CrossEncoder(
5757
model_name=model_path, **model_kwargs
5858
)
59-
logger.info("正在使用 rerank 模型...")
59+
logger.warning("正在使用 rerank 模型...")
6060
elif self.mode == "embedding":
6161
self.client = sentence_transformers.SentenceTransformer(
6262
model_path, **model_kwargs
6363
)
64-
logger.info("正在使用 embedding 模型...")
64+
logger.warning("正在使用 embedding 模型...")
6565
self.warm_up()
6666

6767
def warm_up(self):
@@ -140,8 +140,6 @@ async def rerank(self, params: dict, future: asyncio.Future):
140140
await self.add_request(params, future)
141141

142142
async def get_embeddings(self, params):
143-
logger.info(f"params {params}")
144-
logger.info(f"worker_id: {self.worker_id}")
145143
self.call_ct += 1
146144
ret = {"embedding": [], "token_num": 0}
147145
texts = params["input"]

gpt_server/model_worker/funasr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ def __init__(
3333
device = "cpu"
3434
else:
3535
device = "cuda"
36-
logger.info(f"使用{device}加载...")
36+
logger.warning(f"使用{device}加载...")
3737
vad_model = os.environ.get("vad_model", None)
3838
self.model = AutoModel(
3939
model=model_path,
4040
vad_model=vad_model,
4141
vad_kwargs={"max_single_segment_time": 30000},
4242
device="cuda",
4343
)
44-
logger.info(f"模型:{model_names[0]}")
44+
logger.warning(f"模型:{model_names[0]}")
4545

4646
async def transcription(self, params):
4747
file_input = base64.b64decode(params["file"]) # Base64 → bytes

gpt_server/model_worker/gemma.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,10 @@ def __init__(
3131
self.stop = [
3232
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3333
]
34-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
34+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
3535

3636
async def generate_stream_gate(self, params):
3737
self.call_ct += 1
38-
logger.info(f"params {params}")
39-
logger.info(f"worker_id: {self.worker_id}")
4038
try:
4139
messages = params["messages"]
4240
if isinstance(messages, list):

gpt_server/model_worker/internlm.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,13 @@ def __init__(
3838
self.stop = [
3939
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
4040
]
41-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
41+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
4242
self.other_config = {
4343
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
4444
}
4545

4646
async def generate_stream_gate(self, params):
4747
self.call_ct += 1
48-
logger.info(f"params {params}")
49-
logger.info(f"worker_id: {self.worker_id}")
5048
try:
5149
model_type = getattr(self.model_config, "model_type", "internlm")
5250
messages = params["messages"]

gpt_server/model_worker/internvl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,12 @@ def __init__(
3535
self.stop = [
3636
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3737
]
38-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
38+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
3939
# from https://github.com/xorbitsai/inference/blob/c70ea74fa820a613f8d577047ef1818da20a96b3/xinference/model/llm/llm_family_modelscope.json
4040
self.vl_chat_template = "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
4141

4242
async def generate_stream_gate(self, params):
4343
self.call_ct += 1
44-
logger.info(f"params {params}")
45-
logger.info(f"worker_id: {self.worker_id}")
4644
try:
4745
messages = params.get("messages", [])
4846
# 一定是多模态

gpt_server/model_worker/llama.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,10 @@ def __init__(
3737
self.stop = [
3838
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3939
]
40-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
40+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
4141

4242
async def generate_stream_gate(self, params):
4343
self.call_ct += 1
44-
logger.info(f"params {params}")
45-
logger.info(f"worker_id: {self.worker_id}")
4644
try:
4745
messages = params["messages"]
4846
if isinstance(messages, list):

gpt_server/model_worker/minicpmv.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,10 @@ def __init__(
3535
self.stop = [
3636
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3737
]
38-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
38+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
3939

4040
async def generate_stream_gate(self, params):
4141
self.call_ct += 1
42-
logger.info(f"params {params}")
43-
logger.info(f"worker_id: {self.worker_id}")
4442
try:
4543
messages = params["messages"]
4644
if isinstance(messages, list):

gpt_server/model_worker/mixtral.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ def __init__(
3434
self.stop = [
3535
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3636
]
37-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
37+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
3838

3939
async def generate_stream_gate(self, params):
4040
self.call_ct += 1
41-
logger.info(f"params {params}")
42-
logger.info(f"worker_id: {self.worker_id}")
4341
try:
4442
messages = params["messages"]
4543
if isinstance(messages, list):

gpt_server/model_worker/phi.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,10 @@ def __init__(
3737
self.stop = [
3838
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3939
]
40-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
40+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
4141

4242
async def generate_stream_gate(self, params):
4343
self.call_ct += 1
44-
logger.info(f"params {params}")
45-
logger.info(f"worker_id: {self.worker_id}")
4644
try:
4745
messages = params["messages"]
4846
if isinstance(messages, list):

gpt_server/model_worker/qwen.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def __init__(
5050
self.stop = [
5151
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
5252
]
53-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
53+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
5454

5555
self.chat_template = MODELS.module_dict["qwen2_5"]()
5656
self.tool_parser = ToolParserManager.module_dict["qwen2_5"](
@@ -61,8 +61,6 @@ def __init__(
6161

6262
async def generate_stream_gate(self, params):
6363
self.call_ct += 1
64-
logger.info(f"params {params}")
65-
logger.info(f"worker_id: {self.worker_id}")
6664
try:
6765
messages = params.get("messages", [])
6866
tools = params.get("tools", None)

gpt_server/model_worker/spark_tts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def __init__(
9090
),
9191
)
9292
)
93-
logger.info(f"模型:{model_names[0]}")
93+
logger.warning(f"模型:{model_names[0]}")
9494

9595
# 这个是模型主要的方法
9696
async def generate_voice_stream(self, params):

gpt_server/model_worker/yi.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,10 @@ def __init__(
3535
self.stop = [
3636
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
3737
]
38-
logger.info(f"{model_names[0]} 停用词: {self.stop}")
38+
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
3939

4040
async def generate_stream_gate(self, params):
4141
self.call_ct += 1
42-
logger.info(f"params {params}")
43-
logger.info(f"worker_id: {self.worker_id}")
4442
try:
4543
messages = params["messages"]
4644
if isinstance(messages, list):

gpt_server/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def start_model_worker(config: dict):
132132
try:
133133
host = config["model_worker_args"]["host"]
134134
controller_address = config["model_worker_args"]["controller_address"]
135+
log_level = config["model_worker_args"].get("log_level", "WARNING")
135136
except KeyError as e:
136137
error_msg = f"请参照 https://github.com/shell-nlp/gpt_server/blob/main/gpt_server/script/config.yaml 设置正确的 model_worker_args"
137138
logger.error(error_msg)
@@ -238,6 +239,7 @@ def start_model_worker(config: dict):
238239
+ f" --enable_prefix_caching {enable_prefix_caching}" # 是否开启 prefix cache
239240
+ f" --gpu_memory_utilization {gpu_memory_utilization}" # 占用GPU比例
240241
+ f" --kv_cache_quant_policy {kv_cache_quant_policy}" # kv cache 量化策略
242+
+ f" --log_level {log_level}" # 日志水平
241243
)
242244
# 处理为 None的情况
243245
if lora:

0 commit comments

Comments
 (0)