update readme

shell-nlp · shell-nlp · commit c4bf8e7f0381 · 2025-04-30T00:22:18.000+08:00
diff --git a/README.md b/README.md
@@ -245,7 +245,7 @@ Chat UI界面:
 | Qwen-1.5 (0.5B--72B)  |    qwen    |   √   |   √   |         √          |        √         |   √    |
 |        Qwen-2         |    qwen    |   √   |   √   |         √          |        √         |   √    |
 |       Qwen-2.5        |    qwen    |   √   |   √   |         √          |        √         |   √    |
-|        Qwen-3         |    qwen    |   ×   |   ×   |         √          |        ×         |   ×    |
+|        Qwen-3         |    qwen    |   √   |   √   |         √          |        √         |   √    |
 |        Yi-34B         |     yi     |   √   |   √   |         √          |        √         |   √    |
 |     Internlm-1.0      |  internlm  |   √   |   √   |         √          |        √         |   √    |
 |     Internlm-2.0      |  internlm  |   √   |   √   |         √          |        √         |   √    |
diff --git a/gpt_server/model_backend/hf_backend.py b/gpt_server/model_backend/hf_backend.py
@@ -128,11 +128,15 @@ async def stream_chat(self, params: Dict[str, Any]):
         with context_manager:
             thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
             thread.start()
-        generated_text = ""
         prompt_tokens = len(input_ids.tolist()[0])
         completion_tokens = 0
         stop_flag = False
         try:
+            current_text = ""
+            previous_text = ""
+            previous_token_ids = []
+            current_token_ids = []
+            delta_token_ids = []
             for new_text in streamer:
                 for stop_word in stop:
                     if stop_word in new_text:
@@ -147,15 +151,15 @@ async def stream_chat(self, params: Dict[str, Any]):
                         )
                         new_text = new_text[:idx]
                         break
+                current_text = current_text + new_text
                 completion_tokens += 1
-                generated_text += new_text
                 usage = {
                     "prompt_tokens": prompt_tokens,
                     "completion_tokens": completion_tokens,
                     "total_tokens": prompt_tokens + completion_tokens,
                 }
                 ret = {
-                    "text": generated_text,
+                    "text": new_text,
                     "error_code": 0,
                     "usage": usage,
                 }
@@ -164,6 +168,6 @@ async def stream_chat(self, params: Dict[str, Any]):
                     break
                 # 用来解决输出卡顿的问题
                 await asyncio.sleep(0.02)
-            logger.info(generated_text)
+            logger.info(current_text)
         except asyncio.CancelledError as e:
             stop_specific_token_criteria.stop = True