Skip to content

Commit d3d7a7b

Browse files
authored
Add files via upload
1 parent c4fd9f7 commit d3d7a7b

File tree

5 files changed

+942
-0
lines changed

5 files changed

+942
-0
lines changed

chat-gpt_call.py

+317
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
#%%
2+
#%%
3+
import openai
4+
from methods.methods import *
5+
import time
6+
from tqdm import tqdm
7+
import copy
8+
### LOAD EVALUATION AND QRELS
9+
evaluation_path = './trec/treccast/'
10+
qrels_path = './trec/qrels/'
11+
all_qrels = load_all_qrels(qrels_path).reset_index(drop=True)
12+
qrels19 = all_qrels[all_qrels.year == 2019]
13+
eval_19 = pd.read_csv('./trec/evaluation2019.csv')
14+
eval_20 = pd.read_csv('./trec/evaluation2020.csv')
15+
eval_21 = pd.read_csv('./trec/evaluation2021.csv')
16+
madda_0_key = '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'#maddaoriginal
17+
guido_1_key ='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'#shared con cris e franco @gmail
18+
madda_2_key = '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' #fortebraccio
19+
guido_3_key = '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'
20+
cris_4_key = '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'
21+
openai.api_key = madda_2_key
22+
23+
import re
24+
def sub_(x):
25+
x = re.sub('\n- .*','',x)
26+
x = re.sub('De-contextualized rewrite under the multi-turn information-seeking dialog context:','',x)
27+
x = re.sub('Response:.*','',x)
28+
x = re.sub('\nCurrent question.*\n.*','',x)
29+
x = re.sub('Previous question:.*\nRewritten.*','',x)
30+
x = re.sub('\t.*sorry.*','',x)
31+
x = re.sub('"Earlier.*\.','',x)
32+
x = re.sub('Earlier, we.*\.','',x)
33+
x = re.sub('Keywords added:.*','',x)
34+
x = re.sub('"keywords:.*','',x)
35+
x = re.sub('Response:.*','',x)
36+
x = re.sub('User: .*','',x)
37+
x = re.sub('AI assistant:.*','',x)
38+
x = re.sub('Response: .*','',x)
39+
x = re.sub('"Current question:.*','',x)
40+
x = re.sub('Current question:.*','',x)
41+
x = re.sub('"Context: ','',x)
42+
x = re.sub('Context: ','',x)
43+
x = re.sub('','',x)
44+
x = re.sub('','',x)
45+
x = re.sub('"Reformulated question:','',x)
46+
x = re.sub('Reformulated question:','',x)
47+
x = re.sub('Reformulated question : ','',x)
48+
x = re.sub('Reformulated question: ','',x)
49+
x = re.sub('"Rephrased question: ','',x)
50+
x = re.sub('Rephrased question: ','',x)
51+
x = re.sub('"Request for conversational system: .*\n\nRewritten request: "','',x)
52+
x = re.sub('Request for conversational system:.*\n\nRewritten request: "','',x)
53+
x = re.sub('Previous keywords: ','',x)
54+
x = re.sub('"From the previous question:.*','',x)
55+
x = re.sub('From the previous question:.*','',x)
56+
x = re.sub('"Previous context:.*','',x)
57+
x = re.sub('Previous context:.*','',x)
58+
x = re.sub('"Keywords: ','',x)
59+
x = re.sub('Keywords: ','',x)
60+
x = re.sub('\n\n','',x)
61+
x = re.sub('"Search keywords: ','',x)
62+
x = re.sub('Search keywords: ','',x)
63+
x = re.sub('Prompt: ','',x)
64+
x = re.sub('Prompt for search engine: ','',x)
65+
x = re.sub('Search Engine Prompt: ','',x)
66+
x = re.sub("I'm sorry, but your current question",'',x)
67+
x = re.sub('lacks sufficient context .*','',x)
68+
x = re.sub('Query for a search engine: ','',x)
69+
x = re.sub('Search prompt: ','',x)
70+
x = re.sub('Search engine prompt: ','',x)
71+
x = re.sub('Search engine prompt:','',x)
72+
x = re.sub('Rewritten question: ','',x)
73+
x = re.sub('Rewritten question:','',x)
74+
x = re.sub('Request for a retrieval system: ','',x)
75+
x = re.sub('Request for a retrieval system:','',x)
76+
x = re.sub('"Request for clarification: ','',x)
77+
x = re.sub('Request for clarification:','',x)
78+
x = re.sub('Request: ','',x)
79+
x = re.sub('"Request for clarification: ','',x)
80+
x = re.sub('.*answer: ','',x)
81+
x = re.sub('Answer:.*','',x)
82+
x = re.sub('Question: ','',x)
83+
x = re.sub('"OP:.*','',x)
84+
x = re.sub('Request for retrieval system: ','',x)
85+
x = re.sub('Revised question: ','',x)
86+
x = re.sub('Could you please clarify your question?','',x)
87+
x = re.sub('Building on the previous questions:','',x)
88+
x = re.sub('Building on the previous questions','',x)
89+
x = re.sub('Building on previously asked questions ','',x)
90+
x = re.sub('Reformulated question in a multi-turn information-seeking dialog context: ','',x)
91+
x = re.sub('Rewritten: ','',x)
92+
x = re.sub('Reformulated: ','',x)
93+
x = re.sub("Revised: ",'',x)
94+
95+
x = re.sub('\(.*\)','',x)
96+
x = re.sub('"','',x)
97+
x = re.sub('"\n','',x)
98+
x = re.sub('\?.*','?',x)
99+
x = re.sub("I'm sorry, but .*",'',x)
100+
return x
101+
102+
103+
def sustitube(x):
104+
x = re.sub("\(.*\)","",x)
105+
return x
106+
107+
108+
def chatgpt(messages:list, model = 'gpt-3.5-turbo'):
109+
response = openai.ChatCompletion.create(
110+
model=model,
111+
messages=messages)
112+
return response.choices[0]['message']['content']
113+
114+
115+
_2020 = create_df_from_json(pd.read_json('/data4/guidorocchietti/GPT_clean/trec/treccast/2020_manual_evaluation_topics_v1.0.json'))
116+
_2020['conv_id'] = [x.split('_')[0] for x in _2020.number]
117+
_2020['turn'] = [x.split('_')[1] for x in _2020.number]
118+
qrels20 =load_qrels('/data4/guidorocchietti/GPT_clean/trec/qrels/2020qrels.txt')
119+
120+
_2019 = pd.read_csv('/data4/guidorocchietti/GPT_clean/trec/evaluation2019.csv')
121+
manual_2019 = pd.read_csv('/data4/guidorocchietti/GPT_clean/trec/treccast/test_manual_utterance.tsv', sep = '\t', names=['qid','manual_rewritten_utterance'])
122+
_2019 = _2019.merge(manual_2019,on='qid')
123+
# %%
124+
'''
125+
system_text = 'In a multi-turn dialog system, rewrite the given sentence to be self-explanatory. Use elements of the previous sentences to generate better sentences.'
126+
messages= [{"role": "system", "content": system_text}]
127+
example = []
128+
conv = _2020[_2020.conv.isin(['81','82'])]
129+
130+
for convid in conv.conv.unique():
131+
part = conv[conv.conv == convid]
132+
for turn in part.turn:
133+
if int(turn)<7:
134+
if convid =='82' and turn =='1':
135+
example.append({"role": "user", "content": 'New conversation.'})
136+
example.append({"role": "user", "content": part[(part.turn == turn)].raw_utterance.iloc[0]})
137+
example.append({"role": "assistant", "content": part[(part.turn == turn)].manual_rewritten_utterance.iloc[0]})
138+
139+
messages += example
140+
'''
141+
142+
def create_messages(system_text,command,current_utterance,previous_utterances=[],previous_outputs=[],previous_in_current = False,prompt_in_input=False):
143+
if prompt_in_input:
144+
messages = []
145+
else:
146+
messages = [{"role": "system", "content": system_text}]
147+
if previous_in_current:
148+
if previous_utterances == []:
149+
new_current = ''
150+
else:
151+
new_current = 'Previous context:'
152+
for x,y in zip(previous_utterances,previous_outputs):
153+
messages.append({"role": "user", "content": x})
154+
messages.append({"role": "assistant", "content":y})
155+
new_current += f"{x} "#f"original: {x}, rewritten:{y} "
156+
new_current += command + current_utterance
157+
if prompt_in_input:
158+
new_current = system_text + new_current
159+
messages.append({"role": "user", "content": new_current})
160+
else:
161+
for x,y in zip(previous_utterances,previous_outputs):
162+
messages.append({"role": "user", "content":command + x})
163+
messages.append({"role": "assistant", "content":y})
164+
if prompt_in_input:
165+
messages.append({"role": "user", "content":system_text+ " " + command + current_utterance})
166+
else:
167+
messages.append({"role": "user", "content":system_text+ " " + command + current_utterance})
168+
return messages
169+
170+
171+
def chatgpt_for_df(evaluation,system_text = 'In a multi-turn dialog system, rewrite the given sentence to be self-explanatory following the pattern of the previous interactions.',year=2019):
172+
current_utterance = evaluation.raw_utterance.iloc[0]
173+
174+
starting_message= [{"role": "system", "content": system_text}]
175+
dictionary = {'qid':{},'query' :{},'current_input':{}}
176+
ind = 0
177+
rate_limit_per_minute = 20
178+
delay = 60.0 / rate_limit_per_minute
179+
example = []
180+
if year == 2019:
181+
conv = _2020[_2020.conv_id.isin(['81','82'])]
182+
conv_id_stop ='82'
183+
elif year ==2020:
184+
conv = _2019[_2019.conv_id.isin([31,32])]
185+
conv_id_stop =32
186+
187+
for convid in conv.conv_id.unique():
188+
part = conv[conv.conv_id == convid]
189+
for turn in part.turn:
190+
if int(turn)< 9:
191+
if convid ==conv_id_stop and turn =='1':
192+
example.append({"role": "user", "content": 'New conversation.'})
193+
example.append({"role": "user", "content": part[(part.turn == turn)].raw_utterance.iloc[0]})
194+
example.append({"role": "assistant", "content": part[(part.turn == turn)].manual_rewritten_utterance.iloc[0]})
195+
starting_message += example
196+
197+
prompts = {'qid':{},'prompt' :{}}
198+
#previous_utterances = []
199+
#previous_outputs = []
200+
for conv_id in tqdm(evaluation.conv_id.unique()):
201+
conv = evaluation[evaluation.conv_id ==conv_id]
202+
previous_utterances = []
203+
previous_outputs = []
204+
for qid in (conv.qid):
205+
time.sleep(delay)
206+
try :
207+
messages = copy.copy(starting_message)
208+
current_utterance = conv[conv.qid == qid].raw_utterance.iloc[0]
209+
for x,y in zip(previous_utterances,previous_outputs):
210+
messages.append({"role": "user", "content": x})
211+
messages.append({"role": "assistant", "content":sustitube(y)})
212+
#message = create_messages(system_text,current_command,current_utterance,previous_utterances,previous_outputs,previous_in_current = previous_in_current,prompt_in_input=prompt_in_input)
213+
message = copy.copy(messages)
214+
message += [{"role": "user", "content": system_text + current_utterance}]
215+
prompts['qid'][ind] = qid
216+
prompts['prompt'][ind] = message
217+
#print(message)
218+
response = chatgpt(message)
219+
previous_utterances.append(current_utterance)
220+
previous_outputs.append(response)
221+
dictionary['qid'][ind] = qid
222+
dictionary['query'][ind] = response
223+
dictionary['current_input'][ind] = message[-1]['content']
224+
ind+=1
225+
except:
226+
try :
227+
time.sleep(delay*2)
228+
messages = copy.copy(starting_message)
229+
current_utterance = conv[conv.qid == qid].raw_utterance.iloc[0]
230+
for x,y in zip(previous_utterances,previous_outputs):
231+
messages.append({"role": "user", "content": x})
232+
messages.append({"role": "assistant", "content":sustitube(y)})
233+
#message = create_messages(system_text,current_command,current_utterance,previous_utterances,previous_outputs,previous_in_current = previous_in_current,prompt_in_input=prompt_in_input)
234+
message = copy.copy(messages)
235+
message += [{"role": "user", "content": system_text + current_utterance}]
236+
prompts['qid'][ind] = qid
237+
prompts['prompt'][ind] = message
238+
#print(message)
239+
response = chatgpt(message)
240+
previous_utterances.append(current_utterance)
241+
previous_outputs.append(response)
242+
dictionary['qid'][ind] = qid
243+
dictionary['query'][ind] = response
244+
dictionary['current_input'][ind] = message[-1]['content']
245+
ind+=1
246+
except Exception as e:
247+
try :
248+
messages = copy.copy(starting_message)
249+
time.sleep(delay*3)
250+
current_utterance = conv[conv.qid == qid].raw_utterance.iloc[0]
251+
for x,y in zip(previous_utterances,previous_outputs):
252+
messages.append({"role": "user", "content": x})
253+
messages.append({"role": "assistant", "content":sustitube(y)})
254+
#message = create_messages(system_text,current_command,current_utterance,previous_utterances,previous_outputs,previous_in_current = previous_in_current,prompt_in_input=prompt_in_input)
255+
message = copy.copy(messages)
256+
message += [{"role": "user", "content": system_text + current_utterance}]
257+
prompts['qid'][ind] = qid
258+
prompts['prompt'][ind] = message
259+
#print(message)
260+
response = chatgpt(message)
261+
previous_utterances.append(current_utterance)
262+
previous_outputs.append(response)
263+
dictionary['qid'][ind] = qid
264+
dictionary['query'][ind] = response
265+
dictionary['current_input'][ind] = message[-1]['content']
266+
ind+=1
267+
except Exception as e:
268+
print(e)
269+
try :
270+
messages = copy.copy(starting_message)
271+
time.sleep(delay*4)
272+
current_utterance = conv[conv.qid == qid].raw_utterance.iloc[0]
273+
for x,y in zip(previous_utterances,previous_outputs):
274+
messages.append({"role": "user", "content": x})
275+
messages.append({"role": "assistant", "content":sustitube(y)})
276+
#message = create_messages(system_text,current_command,current_utterance,previous_utterances,previous_outputs,previous_in_current = previous_in_current,prompt_in_input=prompt_in_input)
277+
message = copy.copy(messages)
278+
message += [{"role": "user", "content": system_text + current_utterance}]
279+
prompts['qid'][ind] = qid
280+
prompts['prompt'][ind] = message
281+
#print(message)
282+
response = chatgpt(message)
283+
previous_utterances.append(current_utterance)
284+
previous_outputs.append(response)
285+
dictionary['qid'][ind] = qid
286+
dictionary['query'][ind] = response
287+
dictionary['current_input'][ind] = message[-1]['content']
288+
ind+=1
289+
except Exception as e:
290+
print(e)
291+
print('end of cilcle, missing qid : ',qid)
292+
messages.append({"role": "user", "content": 'New conversation.'})
293+
294+
return pd.DataFrame(dictionary), pd.DataFrame(prompts)
295+
296+
# %%
297+
298+
prompts = pd.read_csv('./top5_prompts.csv',sep=';')
299+
for i in range(len(prompts)):
300+
301+
name = prompts['id'].iloc[i]
302+
text = prompts['text'].iloc[i]
303+
if not(os.path.isfile(f'/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/rewritten/{name}_2019.tsv')):
304+
print('Name : ',name)
305+
print('Text : ',text)
306+
307+
output, prompt = chatgpt_for_df(eval_19,system_text=text,year=2019)
308+
output[['qid','query']].to_csv(f'/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/rewritten/{name}_2019.tsv', sep = '\t', index=False)
309+
prompt.to_csv(f'/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/prompts/{name}_2019.csv')
310+
#output, prompt = chatgpt_for_df(eval_20,system_text=text,year=2020)
311+
#output[['qid','query']].to_csv(f'/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/rewritten/{name}_2020.tsv', sep = '\t')
312+
#prompt.to_csv(f'/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/prompts/{name}_2020.csv')
313+
#%%
314+
#output, prompt = chatgpt_for_df(eval_20)
315+
#output[['qid','query']].to_csv('/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/rewritten/Example_in_history_2020.tsv', sep = '\t', index=False)
316+
#prompt.to_csv('/data4/guidorocchietti/GPT_clean/ultima_prova/rewritings/prompts/Example_in_history_2020.csv')
317+
# %%

0 commit comments

Comments
 (0)