@@ -128,11 +128,15 @@ async def stream_chat(self, params: Dict[str, Any]):
128
128
with context_manager :
129
129
thread = Thread (target = self .model .generate , kwargs = generation_kwargs )
130
130
thread .start ()
131
- generated_text = ""
132
131
prompt_tokens = len (input_ids .tolist ()[0 ])
133
132
completion_tokens = 0
134
133
stop_flag = False
135
134
try :
135
+ current_text = ""
136
+ previous_text = ""
137
+ previous_token_ids = []
138
+ current_token_ids = []
139
+ delta_token_ids = []
136
140
for new_text in streamer :
137
141
for stop_word in stop :
138
142
if stop_word in new_text :
@@ -147,15 +151,15 @@ async def stream_chat(self, params: Dict[str, Any]):
147
151
)
148
152
new_text = new_text [:idx ]
149
153
break
154
+ current_text = current_text + new_text
150
155
completion_tokens += 1
151
- generated_text += new_text
152
156
usage = {
153
157
"prompt_tokens" : prompt_tokens ,
154
158
"completion_tokens" : completion_tokens ,
155
159
"total_tokens" : prompt_tokens + completion_tokens ,
156
160
}
157
161
ret = {
158
- "text" : generated_text ,
162
+ "text" : new_text ,
159
163
"error_code" : 0 ,
160
164
"usage" : usage ,
161
165
}
@@ -164,6 +168,6 @@ async def stream_chat(self, params: Dict[str, Any]):
164
168
break
165
169
# 用来解决输出卡顿的问题
166
170
await asyncio .sleep (0.02 )
167
- logger .info (generated_text )
171
+ logger .info (current_text )
168
172
except asyncio .CancelledError as e :
169
173
stop_specific_token_criteria .stop = True
0 commit comments