From d1b091b65a8e61f0d12ac829126defea84460c42 Mon Sep 17 00:00:00 2001 From: quanbisen Date: Thu, 19 Jun 2025 13:32:08 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E8=A1=8C=E8=AF=84=E5=AE=A1=E5=BB=BA=E8=AE=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biz/github/webhook_handler.py | 60 ++++++++++++-- biz/gitlab/webhook_handler.py | 109 +++++++++++++++++++----- biz/queue/worker.py | 38 ++++++++- biz/utils/code_parser.py | 72 ++++++++++++++++ biz/utils/code_reviewer.py | 109 ++++++++++++++++++++---- conf/.env.dist | 3 + conf/prompt_templates.yml | 152 ++++++++++++++++++++++++++++++++++ 7 files changed, 496 insertions(+), 47 deletions(-) diff --git a/biz/github/webhook_handler.py b/biz/github/webhook_handler.py index 8eb735fb..5501c53f 100644 --- a/biz/github/webhook_handler.py +++ b/biz/github/webhook_handler.py @@ -43,10 +43,7 @@ def filter_changes(changes: list): # 过滤 `new_path` 以支持的扩展名结尾的元素, 仅保留diff和new_path字段 filtered_changes = [ { - 'diff': item.get('diff', ''), - 'new_path': item['new_path'], - 'additions': item.get('additions', 0), - 'deletions': item.get('deletions', 0), + **item } for item in not_deleted_changes if any(item.get('new_path', '').endswith(ext) for ext in supported_extensions) @@ -105,9 +102,11 @@ def get_pull_request_changes(self) -> list: changes = [] for file in files: change = { - 'old_path': file.get('filename'), + 'old_path': file.get('previous_filename', ''), 'new_path': file.get('filename'), 'diff': file.get('patch', ''), + 'status': file.get('status', ''), + 'renamed_file': file.get('status') == 'renamed', 'additions': file.get('additions', 0), 'deletions': file.get('deletions', 0) } @@ -192,6 +191,57 @@ def target_branch_protected(self) -> bool: logger.warn(f"Failed to get protected branches: {response.status_code}, {response.text}") return False + def add_pull_request_comment(self, review): + """向 GitHub Pull Request 的特定行添加评论""" + head_sha = self.webhook_data.get("pull_request", {}).get("head", {}).get("sha") + if not head_sha: + logger.error("无法添加评论,缺少 head_sha。") + return False + + url = f"https://api.github.com/repos/{self.repo_full_name}/pulls/{self.pull_request_number}/comments" + headers = { + "Authorization": f"token {self.github_token}", + "Accept": "application/vnd.github.v3+json", + "Content-Type": "application/json" + } + + body = f"""**AI Review [{review.get('severity', 'N/A').upper()}]**: {review.get('category', 'General')} + +**分析**: {review.get('analysis', 'N/A')} + +**建议**: +```suggestion +{review.get('suggestion', 'N/A')} +``` +""" + + lines_info = review.get("lines", {}) + file_path = review.get("file") + + if not file_path: + logger.warning("跳过评论,审查缺少 'file' 路径。") + return False + if not lines_info or not lines_info.get('new'): + logger.warning("跳过评论,审查缺少 'lines' 信息。") + return False + + payload = { + "body": body, + "commit_id": head_sha, + "path": file_path, + "line": lines_info["new"] + } + + target_desc = f"file {file_path} line {lines_info['new']}" + try: + response = requests.post(url, headers=headers, json=payload) + response.raise_for_status() + logger.info(f"成功向 GitHub PR #{self.pull_request_number} ({target_desc}) 添加评论") + return True + except Exception as e: + logger.exception(f"添加 GitHub 评论 ({target_desc}) 时发生意外错误: {e}") + return False + class PushHandler: def __init__(self, webhook_data: dict, github_token: str, github_url: str): diff --git a/biz/gitlab/webhook_handler.py b/biz/gitlab/webhook_handler.py index 254d528b..0aa134a9 100644 --- a/biz/gitlab/webhook_handler.py +++ b/biz/gitlab/webhook_handler.py @@ -15,13 +15,13 @@ def filter_changes(changes: list): # 从环境变量中获取支持的文件扩展名 supported_extensions = os.getenv('SUPPORTED_EXTENSIONS', '.java,.py,.php').split(',') + # 过滤删除的文件 filter_deleted_files_changes = [change for change in changes if not change.get("deleted_file")] - # 过滤 `new_path` 以支持的扩展名结尾的元素, 仅保留diff和new_path字段 + # 过滤 `new_path` 以支持的扩展名结尾的元素 filtered_changes = [ { - 'diff': item.get('diff', ''), - 'new_path': item['new_path'], + **item, 'additions': len(re.findall(r'^\+(?!\+\+)', item.get('diff', ''), re.MULTILINE)), 'deletions': len(re.findall(r'^-(?!--)', item.get('diff', ''), re.MULTILINE)) } @@ -73,6 +73,21 @@ def parse_merge_request_event(self): self.project_id = merge_request.get('target_project_id') self.action = merge_request.get('action') + def get_merge_request(self) -> dict: + # 调用 GitLab API 获取 Merge Request 的 changes + url = urljoin(f"{self.gitlab_url}/", + f"api/v4/projects/{self.project_id}/merge_requests/{self.merge_request_iid}/changes") + headers = { + 'Private-Token': self.gitlab_token + } + response = requests.get(url, headers=headers, verify=False) + # 检查请求是否成功 + if response.status_code == 200: + return response.json() + else: + logger.warn(f"Failed to get changes from GitLab (URL: {url}): {response.status_code}, {response.text}") + return {} + def get_merge_request_changes(self) -> list: # 检查是否为 Merge Request Hook 事件 if self.event_type != 'merge_request': @@ -83,28 +98,16 @@ def get_merge_request_changes(self) -> list: max_retries = 3 # 最大重试次数 retry_delay = 10 # 重试间隔时间(秒) for attempt in range(max_retries): - # 调用 GitLab API 获取 Merge Request 的 changes - url = urljoin(f"{self.gitlab_url}/", - f"api/v4/projects/{self.project_id}/merge_requests/{self.merge_request_iid}/changes") - headers = { - 'Private-Token': self.gitlab_token - } - response = requests.get(url, headers=headers, verify=False) + data = self.get_merge_request() logger.debug( - f"Get changes response from GitLab (attempt {attempt + 1}): {response.status_code}, {response.text}, URL: {url}") - - # 检查请求是否成功 - if response.status_code == 200: - changes = response.json().get('changes', []) - if changes: - return changes - else: - logger.info( - f"Changes is empty, retrying in {retry_delay} seconds... (attempt {attempt + 1}/{max_retries}), URL: {url}") - time.sleep(retry_delay) + f"Get changes response from GitLab (attempt {attempt + 1}): {data}") + changes = data.get('changes', []) + if changes: + return changes else: - logger.warn(f"Failed to get changes from GitLab (URL: {url}): {response.status_code}, {response.text}") - return [] + logger.info( + f"Changes is empty, retrying in {retry_delay} seconds... (attempt {attempt + 1}/{max_retries})") + time.sleep(retry_delay) logger.warning(f"Max retries ({max_retries}) reached. Changes is still empty.") return [] # 达到最大重试次数后返回空列表 @@ -165,6 +168,66 @@ def target_branch_protected(self) -> bool: logger.warn(f"Failed to get protected branches: {response.status_code}, {response.text}") return False + def add_merge_request_comment(self, review, position_info): + """向 GitLab Merge Request 的特定行添加评论""" + if not position_info or not position_info.get("head_sha") or not position_info.get( + "base_sha") or not position_info.get("start_sha"): + logger.error( + f"错误: 无法添加评论,缺少必要的位置信息 (head_sha/base_sha/start_sha)。得到: {position_info}") + return False + + url = f"{self.gitlab_url}/api/v4/projects/{self.project_id}/merge_requests/{self.merge_request_iid}/discussions" + headers = {"PRIVATE-TOKEN": self.gitlab_token, "Content-Type": "application/json"} + + body = f"""**AI Review [{review.get('severity', 'N/A').upper()}]**: {review.get('category', 'General')} + +**分析**: {review.get('analysis', 'N/A')} + +**建议**: +```suggestion +{review.get('suggestion', 'N/A')} +``` +""" + position_data = { + "base_sha": position_info.get("base_sha"), + "start_sha": position_info.get("start_sha"), + "head_sha": position_info.get("head_sha"), + "position_type": "text", + } + + lines_info = review.get("lines", {}) + file_path = review.get("file") + old_file_path = review.get("old_path") + + if not file_path: + logger.warning("跳过评论,审查缺少 'file' 路径。") + return False + + if lines_info and lines_info.get("new") is not None: + position_data["new_path"] = file_path + position_data["new_line"] = lines_info["new"] + position_data["old_path"] = old_file_path if old_file_path else file_path + target_desc = f"file {file_path} line {lines_info['new']}" + elif lines_info and lines_info.get("old") is not None: + position_data["old_path"] = old_file_path if old_file_path else file_path + position_data["old_line"] = lines_info["old"] + position_data["new_path"] = file_path + target_desc = f"文件 {position_data['old_path']} 旧行号 {lines_info['old']}" + else: + logger.warning("跳过评论,审查缺少 'lines' 信息。") + return False + + payload = {"body": body, "position": position_data} + logger.info(f"尝试向 {target_desc} 添加带位置的评论") + try: + response_obj = requests.post(url, headers=headers, json=payload) + response_obj.raise_for_status() + logger.info(f"成功向 GitLab MR {self.merge_request_iid} ({target_desc}) 添加评论") + return True + except Exception as e: + logger.exception(f"添加 GitLab 评论 ({target_desc}) 时发生意外错误: {e}") + return False + class PushHandler: def __init__(self, webhook_data: dict, gitlab_token: str, gitlab_url: str): diff --git a/biz/queue/worker.py b/biz/queue/worker.py index 169648e5..33e61986 100644 --- a/biz/queue/worker.py +++ b/biz/queue/worker.py @@ -11,7 +11,6 @@ from biz.utils.log import logger - def handle_push_event(webhook_data: dict, gitlab_token: str, gitlab_url: str, gitlab_url_slug: str): push_review_enabled = os.environ.get('PUSH_REVIEW_ENABLED', '0') == '1' try: @@ -75,6 +74,7 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url :return: ''' merge_review_only_protected_branches = os.environ.get('MERGE_REVIEW_ONLY_PROTECTED_BRANCHES_ENABLED', '0') == '1' + merge_detail_review = os.environ.get('MERGE_DETAIL_REVIEW_ENABLED', '0') == '1' try: # 解析Webhook数据 handler = MergeRequestHandler(webhook_data, gitlab_token, gitlab_url) @@ -111,8 +111,8 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url # review 代码 commits_text = ';'.join(commit['title'] for commit in commits) - review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) - + reviewer = CodeReviewer() + review_result = reviewer.review_and_strip_code(str(changes), commits_text) # 将review结果提交到Gitlab的 notes handler.add_merge_request_notes(f'Auto Review Result: \n{review_result}') @@ -135,11 +135,26 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url ) ) + if merge_detail_review: + # 如果开启Merge请求详细Review,对每个变更的文件进行review + all_detail_review = reviewer.detail_review(changes) + merge_request = handler.get_merge_request() + success_count = 0 + fail_count = 0 + for review in all_detail_review: + if handler.add_merge_request_comment(review=review, position_info=merge_request.get("diff_refs")): + success_count += 1 + else: + fail_count += 1 + logger.info(f'Gitlab merge request detail review count: {len(all_detail_review)}, ' + f'Success count: {success_count}, Fail count: {fail_count}') + except Exception as e: error_message = f'AI Code Review 服务出现未知错误: {str(e)}\n{traceback.format_exc()}' notifier.send_notification(content=error_message) logger.error('出现未知错误: %s', error_message) + def handle_github_push_event(webhook_data: dict, github_token: str, github_url: str, github_url_slug: str): push_review_enabled = os.environ.get('PUSH_REVIEW_ENABLED', '0') == '1' try: @@ -203,6 +218,7 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith :return: ''' merge_review_only_protected_branches = os.environ.get('MERGE_REVIEW_ONLY_PROTECTED_BRANCHES_ENABLED', '0') == '1' + merge_detail_review = os.environ.get('MERGE_DETAIL_REVIEW_ENABLED', '0') == '1' try: # 解析Webhook数据 handler = GithubPullRequestHandler(webhook_data, github_token, github_url) @@ -239,7 +255,8 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith # review 代码 commits_text = ';'.join(commit['title'] for commit in commits) - review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text) + reviewer = CodeReviewer() + review_result = reviewer.review_and_strip_code(str(changes), commits_text) # 将review结果提交到GitHub的 notes handler.add_pull_request_notes(f'Auto Review Result: \n{review_result}') @@ -262,6 +279,19 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith deletions=deletions, )) + if merge_detail_review: + # 如果开启Merge请求详细Review,对每个变更的文件进行review + all_detail_review = reviewer.detail_review(changes) + success_count = 0 + fail_count = 0 + for review in all_detail_review: + if handler.add_pull_request_comment(review=review): + success_count += 1 + else: + fail_count += 1 + logger.info(f'Github merge request detail review count: {len(all_detail_review)}, ' + f'Success count: {success_count}, Fail count: {fail_count}') + except Exception as e: error_message = f'服务出现未知错误: {str(e)}\n{traceback.format_exc()}' notifier.send_notification(content=error_message) diff --git a/biz/utils/code_parser.py b/biz/utils/code_parser.py index 92c66a56..bd065f51 100644 --- a/biz/utils/code_parser.py +++ b/biz/utils/code_parser.py @@ -44,3 +44,75 @@ def get_new_code(self): if self.new_code is None: self.parse_diff() return self.new_code + + +def parse_single_file_diff(diff_text, file_path, old_file_path=None): + """ + 解析单个文件的 unified diff 格式文本,提取变更信息。 + 返回包含该文件变更详情和上下文的字典。 + """ + file_changes = { + "path": file_path, + "old_path": old_file_path, + "changes": [], + "context": {"old": [], "new": []}, + "lines_changed": 0 + } + + old_line_num_current = 0 + new_line_num_current = 0 + hunk_context_lines = [] + + lines = diff_text.splitlines() + i = 0 + while i < len(lines): + line = lines[i] + if line.startswith('--- ') or line.startswith('+++ '): + i += 1 + continue + elif line.startswith('@@ '): + match = re.match(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', line) + if match: + old_line_num_start = int(match.group(1)) + new_line_num_start = int(match.group(3)) + old_line_num_current = old_line_num_start + new_line_num_current = new_line_num_start + if hunk_context_lines: # 将上一个 hunk 的上下文添加到 file_changes + file_changes["context"]["old"].extend(hunk_context_lines) + file_changes["context"]["new"].extend(hunk_context_lines) + hunk_context_lines = [] # 为新的 hunk 重置 + else: + old_line_num_current = 0 + new_line_num_current = 0 + elif line.startswith('+'): + file_changes["changes"].append({ + "type": "add", + "old_line": None, + "new_line": new_line_num_current, + "content": line[1:] + }) + new_line_num_current += 1 + elif line.startswith('-'): + file_changes["changes"].append({ + "type": "delete", + "old_line": old_line_num_current, + "new_line": None, + "content": line[1:] + }) + old_line_num_current += 1 + elif line.startswith(' '): # Context line + hunk_context_lines.append(f"{old_line_num_current} -> {new_line_num_current}: {line[1:]}") + old_line_num_current += 1 + new_line_num_current += 1 + i += 1 + + if hunk_context_lines: # 添加最后一个 hunk 的上下文 + file_changes["context"]["old"].extend(hunk_context_lines) + file_changes["context"]["new"].extend(hunk_context_lines) + + limit = 20 # 限制上下文行数 + file_changes["context"]["old"] = "\n".join(file_changes["context"]["old"][-limit:]) + file_changes["context"]["new"] = "\n".join(file_changes["context"]["new"][-limit:]) + file_changes["lines_changed"] = len([c for c in file_changes["changes"] if c['type'] in ['add', 'delete']]) + + return file_changes diff --git a/biz/utils/code_reviewer.py b/biz/utils/code_reviewer.py index a277ac59..c8569170 100644 --- a/biz/utils/code_reviewer.py +++ b/biz/utils/code_reviewer.py @@ -1,4 +1,5 @@ import abc +import json import os import re from typing import Dict, Any, List @@ -7,6 +8,7 @@ from jinja2 import Template from biz.llm.factory import Factory +from biz.utils.code_parser import parse_single_file_diff from biz.utils.log import logger from biz.utils.token_util import count_tokens, truncate_text_by_tokens @@ -14,29 +16,29 @@ class BaseReviewer(abc.ABC): """代码审查基类""" - def __init__(self, prompt_key: str): + def __init__(self): self.client = Factory().getClient() - self.prompts = self._load_prompts(prompt_key, os.getenv("REVIEW_STYLE", "professional")) + self.prompts = self._load_prompts(os.getenv("REVIEW_STYLE", "professional")) - def _load_prompts(self, prompt_key: str, style="professional") -> Dict[str, Any]: + def _load_prompts(self, style="professional") -> Dict[str, Any]: """加载提示词配置""" prompt_templates_file = "conf/prompt_templates.yml" try: # 在打开 YAML 文件时显式指定编码为 UTF-8,避免使用系统默认的 GBK 编码。 with open(prompt_templates_file, "r", encoding="utf-8") as file: - prompts = yaml.safe_load(file).get(prompt_key, {}) + prompts = yaml.safe_load(file) # 使用Jinja2渲染模板 def render_template(template_str: str) -> str: return Template(template_str).render(style=style) - - system_prompt = render_template(prompts["system_prompt"]) - user_prompt = render_template(prompts["user_prompt"]) - - return { - "system_message": {"role": "system", "content": system_prompt}, - "user_message": {"role": "user", "content": user_prompt}, - } + for k, v in prompts.items(): + system_prompt = render_template(v["system_prompt"]) + user_prompt = render_template(v["user_prompt"]) + prompts[k] = { + "system_message": {"role": "system", "content": system_prompt}, + "user_message": {"role": "user", "content": user_prompt}, + } + return prompts except (FileNotFoundError, KeyError, yaml.YAMLError) as e: logger.error(f"加载提示词配置失败: {e}") raise Exception(f"提示词配置加载失败: {e}") @@ -58,7 +60,7 @@ class CodeReviewer(BaseReviewer): """代码 Diff 级别的审查""" def __init__(self): - super().__init__("code_review_prompt") + super().__init__() def review_and_strip_code(self, changes_text: str, commits_text: str = "") -> str: """ @@ -88,16 +90,93 @@ def review_and_strip_code(self, changes_text: str, commits_text: str = "") -> st def review_code(self, diffs_text: str, commits_text: str = "") -> str: """Review 代码并返回结果""" messages = [ - self.prompts["system_message"], + self.prompts['code_review_prompt']["system_message"], { "role": "user", - "content": self.prompts["user_message"]["content"].format( + "content": self.prompts['code_review_prompt']["user_message"]["content"].format( diffs_text=diffs_text, commits_text=commits_text ), }, ] return self.call_llm(messages) + def detail_review(self, changes: list) -> List[Dict]: + all_reviews: List[Dict] = [] + # 对每个变更的文件进行详细review + for change in changes: + file_diff_text = change.get('diff') + new_path = change.get('new_path') + old_path = change.get('old_path') + is_renamed = change.get('renamed_file', False) + logger.info(f"解析文件 diff: {new_path} (旧路径: {old_path if is_renamed else 'N/A'})") + try: + file_parsed_changes = parse_single_file_diff(file_diff_text, new_path, old_path if is_renamed else None) + if not file_parsed_changes or not file_parsed_changes.get("changes"): + logger.info(f"未从 {new_path} 的 diff 中解析出变更。") + continue + + logger.info(f"成功解析 {new_path} 的 {len(file_parsed_changes['changes'])} 处变更。") + input_data = { + "file_meta": { + "path": file_parsed_changes["path"], + "old_path": file_parsed_changes.get("old_path"), + "lines_changed": file_parsed_changes.get("lines_changed", len(file_parsed_changes["changes"])), + "context": file_parsed_changes["context"] + }, + "changes": file_parsed_changes["changes"] + } + input_json_string = json.dumps(input_data, indent=2, ensure_ascii=False) + json_content = f"```json\n{input_json_string}\n```" + + messages = [ + self.prompts['detail_review_prompt']["system_message"], + { + "role": "user", + "content": self.prompts['detail_review_prompt']["user_message"]["content"]. + format(json_content=json_content), + }, + ] + + review_json_str = self.call_llm(messages) + parsed_output = json.loads(review_json_str) + reviews_for_file = [] + if isinstance(parsed_output, list): + reviews_for_file = parsed_output + elif isinstance(parsed_output, dict): # Check if the dict contains a list + found_list = False + for key, value in parsed_output.items(): + if isinstance(value, list): + reviews_for_file = value + found_list = True + logger.info(f"在 LLM 输出的键 '{key}' 下找到审查列表。") + break + if not found_list: + logger.warning( + f"文件 {new_path} 的 LLM 输出是一个字典,但未找到列表值。输出: {review_json_str}") + # Attempt to use the dict as a single review item if it matches structure, + # otherwise, it will be filtered out by validation below. + reviews_for_file = [parsed_output] + else: + logger.warning( + f"文件 {new_path} 的 LLM 输出不是 JSON 列表或预期的字典。输出: {review_json_str}") + + valid_reviews_for_file = [] + for review in reviews_for_file: + if isinstance(review, dict) and all( + k in review for k in + ["file", "lines", "category", "severity", "analysis", "suggestion"]): + if review.get("file") != new_path: + logger.warning( + f"修正审查中的文件路径从 '{review.get('file')}' 为 '{new_path}'") + review["file"] = new_path + valid_reviews_for_file.append(review) + else: + logger.warning(f"跳过文件 {new_path} 的无效审查项结构: {review}") + all_reviews.extend(valid_reviews_for_file) + except Exception as e: + logger.warning(f"文件 {new_path} 的代码审查出错: {e}") + return all_reviews + @staticmethod def parse_review_score(review_text: str) -> int: """解析 AI 返回的 Review 结果,返回评分""" diff --git a/conf/.env.dist b/conf/.env.dist index 15b18a8b..12d96d47 100644 --- a/conf/.env.dist +++ b/conf/.env.dist @@ -76,6 +76,9 @@ PUSH_REVIEW_ENABLED=1 # 开启Merge请求过滤,过滤仅当合并目标分支是受保护分支时才Review(开启此选项请确保仓库已配置受保护分支protected branches) MERGE_REVIEW_ONLY_PROTECTED_BRANCHES_ENABLED=0 +# 开启Merge请求详细Review,Review的修改建议会在代码块上评论(该功能需要指令遵循能力较强、能输出稳定 JSON 的模型,消耗token也比较多) +MERGE_DETAIL_REVIEW_ENABLED=0 + # Dashboard登录用户名和密码 DASHBOARD_USER=admin DASHBOARD_PASSWORD=admin diff --git a/conf/prompt_templates.yml b/conf/prompt_templates.yml index 66258255..68ad8fc3 100644 --- a/conf/prompt_templates.yml +++ b/conf/prompt_templates.yml @@ -41,3 +41,155 @@ code_review_prompt: 提交历史(commits): {commits_text} + +detail_review_prompt: + system_prompt: |- + # 角色 + 你是一位资深的软件开发工程师,你的核心职责是深入分析提供的代码变更,发现其中潜在的错误、安全隐患、性能问题、设计缺陷或不符合最佳实践的地方。 + 你的审查结果必须**极度严格**地遵守后续指定的 JSON 数组输出格式要求,**不包含**任何额外的解释性文字、代码块标记(如 ```json ... ```)或其他非JSON数组内容。 + + # 审查维度及判断标准(按优先级排序) + 1. **功能实现的正确性与健壮性**: 确保代码逻辑正确,能够处理各种边界情况和异常输入。 + 2. **安全性与潜在风险**:检查代码是否存在安全漏洞(如SQL注入、XSS攻击等),并评估其潜在风险。 + 3. **是否符合最佳实践**:评估代码是否遵循行业最佳实践,包括代码结构、命名规范、注释清晰度等。 + 4. **性能与资源利用效率**:分析代码的性能表现,评估是否存在资源浪费或性能瓶颈。 + 5. **设计与架构**:代码是否遵循良好的设计原则(如 SOLID),模块化和封装是否合理。 + + # 输入数据格式 + 输入是一个 JSON 对象,包含单个文件的变更信息: + { + "file_meta": { + "path": "当前文件路径", + "old_path": "原文件路径(重命名时存在,否则为null)", + "lines_changed": "变更行数统计(仅add/delete,例如 '+5,-2')", + "context": { + "old": "原文件相关上下文代码片段(可能包含行号)", + "new": "新文件相关上下文代码片段(可能包含行号)" + } + }, + "changes": [ + { + "type": "变更类型(add/delete)", + "old_line": "原文件行号(删除时为整数,新增时为null)", + "new_line": "新文件行号(新增时为整数,删除时为null)", + "content": "变更内容(不含+/-前缀)" + } + // ... more changes in this file + ] + } + - `old_line`:该 `content` 在原文件中的行号,为 `null` 表示该行是新增的。 + - `new_line`:该 `content` 在新文件中的行号,为 `null` 表示该行是被删除的。 + - `context` 提供了变更区域附近的代码行,以帮助理解变更的背景。 + + # 示例输入与输出 (Few-shot Examples) + + ## 示例输入 1 (包含一个潜在问题) + ```json + { + "file_meta": { + "path": "service/user_service.py", + "old_path": null, + "lines_changed": "+4", + "context": { + "old": "def get_user_info(user_id):\n # Existing code\n pass", + "new": "def get_user_info(user_id):\n # Existing code\n conn = db.connect()\n cursor = conn.cursor()\n query = f\"SELECT * FROM users WHERE id = {user_id}\"\n cursor.execute(query)\n user_data = cursor.fetchone()\n conn.close()\n return user_data" + } + }, + "changes": [ + {"type": "add", "old_line": null, "new_line": 3, "content": " conn = db.connect()"}, + {"type": "add", "old_line": null, "new_line": 4, "content": " cursor = conn.cursor()"}, + {"type": "add", "old_line": null, "new_line": 5, "content": " query = f\"SELECT * FROM users WHERE id = {user_id}\""}, + {"type": "add", "old_line": null, "new_line": 6, "content": " cursor.execute(query)"}, + {"type": "add", "old_line": null, "new_line": 7, "content": " user_data = cursor.fetchone()"}, + {"type": "add", "old_line": null, "new_line": 8, "content": " conn.close()"} + ] + } + ``` + + ## 示例输出 1 (对应示例输入 1 的正确 JSON数组 输出) + [ + { + "file": "service/user_service.py", + "lines": { + "old": null, + "new": 5 + }, + "category": "安全性", + "severity": "critical", + "analysis": "直接将 user_id 拼接到 SQL 查询字符串中存在 SQL 注入风险。", + "suggestion": "query = \"SELECT * FROM users WHERE id = %s\"\ncursor.execute(query, (user_id,))" + } + ] + + ## 示例输入 2 (没有发现重要问题) + ```json + { + "file_meta": { + "path": "util/string_utils.py", + "old_path": null, + "lines_changed": "+3", + "context": { + "old": "def greet(name):\n return f\"Hello, {name}!\"", + "new": "def greet(name):\n # Add an exclamation mark\n greeting = f\"Hello, {name}!\"\n return greeting + \"!!\"" + } + }, + "changes": [ + {"type": "add", "old_line": null, "new_line": 2, "content": " # Add an exclamation mark"}, + {"type": "add", "old_line": null, "new_line": 3, "content": " greeting = f\"Hello, {name}!\""}, + {"type": "add", "old_line": null, "new_line": 4, "content": " return greeting + \"!!\""} + ] + } + ``` + + ## 示例输出 2 (对应示例输入 2 的正确 JSON数组 输出) + [] + + # 输出格式 + 你的输出必须严格按照以下 JSON数组 格式输出一个审查结果JSON数组。数组中的每个对象代表一个具体的审查意见。 + [ + { + "file": "string, 发生问题的文件的完整路径", + "lines": { + "old": "integer or null, 原文件行号。如果是针对新增代码或无法精确到原文件行,则为 null。", + "new": "integer or null, 新文件行号。如果是针对删除代码或无法精确到新文件行,则为 null。" + }, + "category": "string, 问题分类,从 [正确性, 安全性, 性能, 设计, 最佳实践] 中选择。", + "severity": "string, 严重程度,从 [critical, high, medium, low] 中选择。", + "analysis": "string, 结合代码上下文对问题进行的简短分析和审查意见。限制在 100 字以内,使用中文。", + "suggestion": "string, 针对该问题位置的纠正或改进建议代码。如果难以提供直接代码,可以提供文字说明。" + } + // ... more review comments + ] + + ** analysis 简短分析和审查意见风格** + - 保持 {{ style }} 风格,{% if style == 'professional' %}评论时请使用标准的工程术语,保持专业严谨。{% elif style == 'sarcastic' %}评论时请大胆使用讽刺性语言,但要确保技术指正准确。{% elif style == 'gentle' %}评论时请多用"建议"、"可以考虑"等温和措辞。{% elif style == 'humorous' %}评论时请: + 1. 在技术点评中加入适当幽默元素 + 2. 合理使用相关Emoji(但不要过度): + - 🐛 表示bug + - 💥 表示严重问题 + - 🎯 表示改进建议 + - 🔍 表示需要仔细检查 + {% endif %} + + **行号处理规则强化:** + - 如果审查意见针对**新增**的代码行,请将 `lines.old` 设为 `null`,`lines.new` 设为该行在**新文件**中的对应行号 (务必与输入 `changes` 中的 `new_line` 精确匹配)。 + - 如果审查意见针对**删除**的代码行,请将 `lines.old` 设为该行在**原文件**中的对应行号 (务必与输入 `changes` 中的 `old_line` 精确匹配),`lines.new` 设为 `null`。 + - 如果审查意见是针对**修改**后的代码行(即涉及旧行和新行),请优先关联到**新文件**的行号:`lines.old` 设为 `null`,`lines.new` 设为修改后该行在**新文件**中的对应行号 (务必与输入 `changes` 中的 `new_line` 精确匹配)。 + - 如果审查意见针对整个文件、某个函数签名或无法精确到输入 `changes` 中的某一行,可以将 `lines` 设为 `{"old": null, "new": null}`。 + - **请再次确认:你输出的每个审查意见对象中的 `lines.old` 或 `lines.new` 至少有一个值必须与输入 `changes` 数组中某个元素的 `old_line` 或 `lines.new` 精确匹配(除非是针对整个文件或无法精确到行的意见)。** + + **输出格式绝对禁止:** + - **不允许**在 JSON 数组前后或内部添加任何解释性文字、markdown 格式(如代码块标记 ```json ```)。 + - **不允许**输出任何注释。 + - **不允许**在 JSON数组 之外有任何其他内容。 + - **不允许**输出的 JSON 中存在其他key。 + + 如果提供的文件变更中没有发现任何需要反馈的问题(即没有达到 medium 或更高 severity 的问题),请返回一个**空的 JSON 数组**:`[]`。 + + 请根据上述指令和格式要求,审查我提供的代码变更输入,并输出严格符合格式要求的 JSON 数组。 + + user_prompt: |- + 以下是某位员工向代码库提交的代码,请以{{ style }}风格审查以下代码。 + + JSON 代码内容: + {json_content}