|
38 | 38 | PROMPT_START_3_v2 = 'You are an expert software developer who writes high quality code. With below information, please either generate Python3 code (Respond directly with code only with markdown), or ask clarifying questions: \n'
|
39 | 39 | # TODO: try this new prompt
|
40 | 40 | PROMPT_START_3_v3 = 'You are an expert software developer who writes high quality code. With below information, please either generate Python3 code (only one code block with markdown in response), or ask clarifying questions (no markdown in response): \n'
|
| 41 | +PROMPT_START_3_v4 = '\n Based on the information below, you can choose to either generate Python3 code (Respond directly with code only with markdown), or ask clarifying questions. \n' |
41 | 42 | ORIGINAL_PROMPT_START_0 = 'You are an expert software developer who writes high quality code. With below information, please generate Python3 code (Respond directly with code only with markdown): \n'
|
42 | 43 |
|
43 | 44 | PROMPT_EVALUATE_QUESTIONS_V1 = 'The original description of a coding problem is modified so that the requirements become inconsistent, incomplete, or ambiguous. Given the modified description, some clarifying questions were raised to clarify the description. Given the original and modified problem description, evaluate the quality of the questions. Please provide an integer representing the quality of questions (3: Good questions that recover all missing info. 2: Fair questions that recover some missing info. 1: Bad questions or irrelevant content).\n QUALITY=[your int] \n Please also provide answers to the questions to recover the missing requirements! Be sure to add what is new or different in the original descrpition in your answer, compared with the modified problem description! \n ANSWERS=```[your answer]``` \n Please strictly follow the format QUALITY=[the int] and ANSWERS=```[the answer]``` in the response! Surround your answer with markup! \n\n ### Questions: {clarifying_questions} \n ### Problem Description: {problem} \n ### Original Description: {missing_information} \n'
|
@@ -841,15 +842,18 @@ def generate_response(model, msgs, topn, temperature, args, open_source_model, t
|
841 | 842 | response_list.append(i['message']['content'])
|
842 | 843 | return response_list
|
843 | 844 |
|
844 |
| -def description_2_code_multi_rounds(task_id, entry_point, prompt, user_input, original_prompt, model, topn, temperature, args, open_source_model, tokenizer, cached_response, cached_qq, cached_answer): |
| 845 | +def description_2_code_multi_rounds(prompt_modified, task_id, entry_point, prompt, user_input, original_prompt, model, topn, temperature, args, open_source_model, tokenizer, cached_response, cached_qq, cached_answer): |
845 | 846 |
|
846 | 847 | messages = []
|
847 | 848 | response_list = []
|
848 | 849 | model_2nd_round = OK_MODEL if model == 'Okanagan' else model
|
849 | 850 | # ROUND 1
|
850 | 851 | if model == "AgentCoder":
|
851 | 852 | # Adding the following: entry_point, task_id, original_prompt for AgentCoder
|
852 |
| - messages.append({"task_id": task_id,"prompt": original_prompt, "entry_point": entry_point}) |
| 853 | + if prompt_modified == False: |
| 854 | + messages.append({"task_id": task_id,"prompt": original_prompt, "entry_point": entry_point}) |
| 855 | + else: |
| 856 | + messages.append({"task_id": task_id,"prompt": original_prompt, "entry_point": entry_point, "clarity_prompt": PROMPT_START_3_v4}) |
853 | 857 | else:
|
854 | 858 | ## 1st round: initial code generation
|
855 | 859 | full_prompt = OK_PROMPT_CODEGEN + user_input if model == 'Okanagan' else prompt + user_input
|
@@ -901,9 +905,15 @@ def description_2_code_multi_rounds(task_id, entry_point, prompt, user_input, or
|
901 | 905 | if model == "AgentCoder":
|
902 | 906 | # We can only send one prompt to AgentCoder for now. Adding multiple roles requires major code changes in the original AgentCoder repo
|
903 | 907 | new_prompt = "Original Question: " + original_prompt + " First Response: " + response + " Feedback: " + answer + " " + PROMPT_2ND_ROUND
|
| 908 | + # For the third round, we dont tell the model that our prompt contained the phrase "generate clarifying questions, that is why we send original question" |
904 | 909 | messages[-1]["prompt"] = new_prompt
|
905 | 910 | msgs_i = messages.copy()
|
906 | 911 |
|
| 912 | + for message in messages: |
| 913 | + # Check if 'clarity_prompt' exists in the dictionary and then remove it for third round |
| 914 | + if 'clarity_prompt' in message: |
| 915 | + del message['clarity_prompt'] |
| 916 | + |
907 | 917 | # # directly send third round request to GPT
|
908 | 918 | # messages.clear()
|
909 | 919 | # messages.append({"role": "user","content": full_prompt})
|
@@ -1031,8 +1041,16 @@ def HumanEval_experiment(dataset, dataset_loc, option, model, topn, temperature,
|
1031 | 1041 | original_prompt = problem['prompt']
|
1032 | 1042 | entry_point = problem['entry_point']
|
1033 | 1043 | task_id = problem['name']
|
1034 |
| - prompt_start = ORIGINAL_PROMPT_START_0 if input_prompt == 'prompt' else PROMPT_START_3_v2 |
1035 |
| - response_list, code_list, qq_list, ans_list = description_2_code_multi_rounds(task_id, entry_point, prompt_start, description, original_prompt, model, topn, temperature, args, open_source_model, tokenizer, cached_responses.get(key, ''), cached_qqs.get(key, 0), cached_answers.get(key, '')) |
| 1044 | + # prompt_start = ORIGINAL_PROMPT_START_0 if input_prompt == 'prompt' else PROMPT_START_3_v2 |
| 1045 | + # We will use "Prompt_Modified" to check whether AgentCoder is getting a modified prompt or an original prompt, based on which, we decide whether to send in a "generate clarifying questions" prompt or not. |
| 1046 | + # A new prompt called PROMPT_START_3_v4 has been created for the same. |
| 1047 | + Prompt_Modified = False |
| 1048 | + if input_prompt == 'prompt': |
| 1049 | + prompt_start = ORIGINAL_PROMPT_START_0 |
| 1050 | + else: |
| 1051 | + prompt_start = PROMPT_START_3_v2 |
| 1052 | + Prompt_Modified = True |
| 1053 | + response_list, code_list, qq_list, ans_list = description_2_code_multi_rounds(Prompt_Modified, task_id, entry_point, prompt_start, description, original_prompt, model, topn, temperature, args, open_source_model, tokenizer, cached_responses.get(key, ''), cached_qqs.get(key, 0), cached_answers.get(key, '')) |
1036 | 1054 | except Exception as e:
|
1037 | 1055 | print('%s---------%s' % (problem['name'], e), flush=True)
|
1038 | 1056 | continue
|
|
0 commit comments