Skip to content

Commit 578caa7

Browse files
committed
去除多余的日志
1 parent 125a82c commit 578caa7

File tree

4 files changed

+22
-74
lines changed

4 files changed

+22
-74
lines changed

gpt_server/model_backend/lmdeploy_backend.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
105105
stop_token_ids = params.get("stop_words_ids", None) or []
106106
presence_penalty = float(params.get("presence_penalty", 0.0))
107107
frequency_penalty = float(params.get("frequency_penalty", 0.0))
108+
reasoning_parser_type = params.get("reasoning_parser", None)
108109
request = params.get("request", None)
109110
# Handle stop_str
110111
stop = set()
@@ -157,7 +158,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
157158
"usage": usage,
158159
"finish_reason": request_output.finish_reason,
159160
}
160-
reasoning_parser_type = params.get("reasoning_parser", None)
161+
161162
if reasoning_parser_type:
162163
reasoning_parser = None
163164
delta_token_ids = (

gpt_server/model_worker/base/base_model_worker.py

Lines changed: 6 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import asyncio
21
import threading
32
import time
43
from typing import List
@@ -106,12 +105,12 @@ def register_to_controller(self):
106105
assert r.status_code == 200
107106

108107
def send_heart_beat(self):
109-
logger.info(
110-
f"Send heart beat. Models: {self.model_names}. "
111-
f"Semaphore: {pretty_print_semaphore(self.semaphore)}. "
112-
f"call_ct: {self.call_ct}. "
113-
f"worker_id: {self.worker_id}. "
114-
)
108+
# logger.info(
109+
# f"Send heart beat. Models: {self.model_names}. "
110+
# f"Semaphore: {pretty_print_semaphore(self.semaphore)}. "
111+
# f"call_ct: {self.call_ct}. "
112+
# f"worker_id: {self.worker_id}. "
113+
# )
115114

116115
url = self.controller_addr + "/receive_heart_beat"
117116

@@ -190,67 +189,3 @@ def transcription(self, params):
190189

191190
def generate_voice_stream(self, params):
192191
raise NotImplementedError
193-
194-
195-
def release_worker_semaphore():
196-
worker.semaphore.release()
197-
198-
199-
def acquire_worker_semaphore():
200-
if worker.semaphore is None:
201-
worker.semaphore = asyncio.Semaphore(worker.limit_worker_concurrency)
202-
return worker.semaphore.acquire()
203-
204-
205-
def create_background_tasks():
206-
background_tasks = BackgroundTasks()
207-
background_tasks.add_task(release_worker_semaphore)
208-
return background_tasks
209-
210-
211-
@app.post("/worker_generate_stream")
212-
async def api_generate_stream(request: Request):
213-
params = await request.json()
214-
await acquire_worker_semaphore()
215-
generator = worker.generate_stream_gate(params)
216-
background_tasks = create_background_tasks()
217-
return StreamingResponse(generator, background=background_tasks)
218-
219-
220-
@app.post("/worker_generate")
221-
async def api_generate(request: Request):
222-
params = await request.json()
223-
await acquire_worker_semaphore()
224-
output = await asyncio.to_thread(worker.generate_gate, params)
225-
release_worker_semaphore()
226-
return JSONResponse(output)
227-
228-
229-
@app.post("/worker_get_embeddings")
230-
async def api_get_embeddings(request: Request):
231-
params = await request.json()
232-
await acquire_worker_semaphore()
233-
embedding = worker.get_embeddings(params)
234-
release_worker_semaphore()
235-
return JSONResponse(content=embedding)
236-
237-
238-
@app.post("/worker_get_status")
239-
async def api_get_status(request: Request):
240-
return worker.get_status()
241-
242-
243-
@app.post("/count_token")
244-
async def api_count_token(request: Request):
245-
params = await request.json()
246-
return worker.count_token(params)
247-
248-
249-
@app.post("/worker_get_conv_template")
250-
async def api_get_conv(request: Request):
251-
return worker.get_conv_template()
252-
253-
254-
@app.post("/model_details")
255-
async def api_model_details(request: Request):
256-
return {"context_length": worker.context_len}

gpt_server/model_worker/qwen.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import json
23
from typing import List
34
from fastchat.constants import ErrorCode, SERVER_ERROR_MSG
@@ -75,7 +76,10 @@ async def generate_stream_gate(self, params):
7576

7677
if not self.vision_config:
7778
if isinstance(messages, list):
78-
text = self.chat_template.messages2prompt(messages, True, tools)
79+
# text = self.chat_template.messages2prompt(messages, True, tools)
80+
text = await asyncio.to_thread(
81+
self.chat_template.messages2prompt, messages, True, tools
82+
)
7983
elif isinstance(messages, str):
8084
text = messages
8185

@@ -84,12 +88,19 @@ async def generate_stream_gate(self, params):
8488
params["prompt"] = text
8589
else: # 多模态
8690
if isinstance(messages, list):
87-
text = self.tokenizer.apply_chat_template(
91+
text = await asyncio.to_thread(
92+
self.tokenizer.apply_chat_template,
8893
messages,
8994
chat_template=self.vl_chat_template,
9095
tokenize=False,
9196
add_generation_prompt=True,
9297
)
98+
# text = self.tokenizer.apply_chat_template(
99+
# messages,
100+
# chat_template=self.vl_chat_template,
101+
# tokenize=False,
102+
# add_generation_prompt=True,
103+
# )
93104
params["prompt"] = text
94105
# 多模态不需要传入input_ids
95106
params["multimodal"] = True

gpt_server/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def run_cmd(cmd: str, *args, **kwargs):
6262
def start_controller(controller_host, controller_port, dispatch_method):
6363
"""启动fastchat控制器"""
6464
cmd = f"python -m fastchat.serve.controller --host {controller_host} --port {controller_port} --dispatch-method {dispatch_method} "
65+
cmd += "> /dev/null 2>&1" # 完全静默(Linux/macOS)
6566
controller_process = Process(target=run_cmd, args=(cmd,))
6667
controller_process.start()
6768

0 commit comments

Comments
 (0)