From d0bccc59c4e9fb4790d274e1ade4ebb8f58e67d7 Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 06:29:23 -0500 Subject: [PATCH 1/8] Bump tree-sitter version for Python3.13. --- suite/auto-sync/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/suite/auto-sync/pyproject.toml b/suite/auto-sync/pyproject.toml index ab605d6ccb..6fb92ae1c2 100644 --- a/suite/auto-sync/pyproject.toml +++ b/suite/auto-sync/pyproject.toml @@ -7,8 +7,8 @@ name = "autosync" version = "0.1.0" dependencies = [ "termcolor >= 2.3.0", - "tree_sitter == 0.22.3", - "tree-sitter-cpp == 0.22.3", + "tree_sitter == 0.24.0", + "tree-sitter-cpp == 0.23.4", "black >= 24.3.0", "usort >= 1.0.8", "setuptools >= 69.2.0", From fd6e21605fc5a732404b7a9ea6e3050077697dca Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 09:04:01 -0500 Subject: [PATCH 2/8] Refactor for tree-sitter-py 24.0. Remove usage of .text property of some nodes. Convert the result of query.captures() back to the 22.3 version. Fix the setup of the Parser. --- .../src/autosync/cpptranslator/Configurator.py | 3 +-- .../src/autosync/cpptranslator/CppTranslator.py | 7 ++++--- .../auto-sync/src/autosync/cpptranslator/Differ.py | 2 +- .../autosync/cpptranslator/TemplateCollector.py | 7 ++++--- .../autosync/cpptranslator/Tests/test_patches.py | 3 ++- .../cpptranslator/patches/BitCastStdArray.py | 12 ++++++------ .../cpptranslator/tree_sitter_compatibility.py | 14 ++++++++++++++ 7 files changed, 32 insertions(+), 16 deletions(-) create mode 100644 suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py diff --git a/suite/auto-sync/src/autosync/cpptranslator/Configurator.py b/suite/auto-sync/src/autosync/cpptranslator/Configurator.py index e8e81cba6d..0f8ecb4b5b 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/Configurator.py +++ b/suite/auto-sync/src/autosync/cpptranslator/Configurator.py @@ -79,5 +79,4 @@ def ts_set_cpp_language(self) -> None: def init_parser(self) -> None: log.debug("Init parser") - self.parser = Parser() - self.parser.set_language(self.ts_cpp_lang) + self.parser = Parser(self.ts_cpp_lang) diff --git a/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py b/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py index d98f8631a5..c780e960eb 100755 --- a/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py +++ b/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py @@ -91,6 +91,7 @@ run_clang_format, ) from autosync.cpptranslator.patches.isUInt import IsUInt +from autosync.cpptranslator.tree_sitter_compatibility import query_captures_22_3 class Translator: @@ -361,7 +362,7 @@ def init_patches(self): def parse(self, src_path: Path) -> None: self.read_src_file(src_path) log.debug("Parse source code") - self.tree = self.parser.parse(self.src, keep_text=True) + self.tree = self.parser.parse(self.src) def patch_src(self, p_list: [(bytes, Node)]) -> None: if len(p_list) == 0: @@ -391,7 +392,7 @@ def patch_src(self, p_list: [(bytes, Node)]) -> None: old_end_point=old_end_point, new_end_point=(old_end_point[0], old_end_point[1] + d), ) - self.tree = self.parser.parse(new_src, self.tree, keep_text=True) + self.tree = self.parser.parse(new_src, self.tree) def apply_patch(self, patch: Patch) -> bool: """Tests if the given patch should be applied for the current architecture or file.""" @@ -435,7 +436,7 @@ def translate(self) -> None: # Here we bundle these captures together. query: Query = self.ts_cpp_lang.query(pattern) captures_bundle: [[(Node, str)]] = list() - for q in query.captures(self.tree.root_node): + for q in query_captures_22_3(query, self.tree.root_node): if q[1] == patch.get_main_capture_name(): # The main capture the patch is looking for. captures_bundle.append([q]) diff --git a/suite/auto-sync/src/autosync/cpptranslator/Differ.py b/suite/auto-sync/src/autosync/cpptranslator/Differ.py index 1361ea08fc..5dfdd67f88 100755 --- a/suite/auto-sync/src/autosync/cpptranslator/Differ.py +++ b/suite/auto-sync/src/autosync/cpptranslator/Differ.py @@ -345,7 +345,7 @@ def parse_file(self, file: Path) -> dict[str:Node]: with open(file) as f: content = bytes(f.read(), "utf8") - tree: Tree = self.parser.parse(content, keep_text=True) + tree: Tree = self.parser.parse(content) node_types_to_diff = [ n["node_type"] for n in self.conf_general["nodes_to_diff"] diff --git a/suite/auto-sync/src/autosync/cpptranslator/TemplateCollector.py b/suite/auto-sync/src/autosync/cpptranslator/TemplateCollector.py index 24470aad77..decb086671 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/TemplateCollector.py +++ b/suite/auto-sync/src/autosync/cpptranslator/TemplateCollector.py @@ -8,6 +8,7 @@ from tree_sitter import Language, Node, Parser, Query from autosync.cpptranslator.patches.Helper import get_text +from autosync.cpptranslator.tree_sitter_compatibility import query_captures_22_3 class TemplateRefInstance: @@ -105,7 +106,7 @@ def collect(self): src = x["content"] log.debug(f"Search for template references in {path}") - tree = self.parser.parse(src, keep_text=True) + tree = self.parser.parse(src) query: Query = self.lang_cpp.query(self.get_template_pattern()) capture_bundles = self.get_capture_bundles(query, tree) @@ -278,8 +279,8 @@ def read_files(self): @staticmethod def get_capture_bundles(query, tree): - captures_bundle: [[(Node, str)]] = list() - for q in query.captures(tree.root_node): + captures_bundle: list[list[tuple[Node, str]]] = list() + for q in query_captures_22_3(query, tree.root_node): if q[1] == "templ_ref": captures_bundle.append([q]) else: diff --git a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py index 9deeb99f9f..81d69e8b64 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py +++ b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py @@ -81,6 +81,7 @@ from autosync.cpptranslator.patches.BadConditionCode import BadConditionCode from autosync.Helper import get_path from autosync.cpptranslator.patches.isUInt import IsUInt +from autosync.cpptranslator.tree_sitter_compatibility import query_captures_22_3 class TestPatches(unittest.TestCase): @@ -101,7 +102,7 @@ def check_patching_result(self, patch, syntax, expected, filename=""): kwargs = self.translator.get_patch_kwargs(patch) query: Query = self.ts_cpp_lang.query(patch.get_search_pattern()) captures_bundle: [[(Node, str)]] = list() - for q in query.captures(self.parser.parse(syntax, keep_text=True).root_node): + for q in query_captures_22_3(query, self.parser.parse(syntax).root_node): if q[1] == patch.get_main_capture_name(): captures_bundle.append([q]) else: diff --git a/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py b/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py index 2491de114c..5c19c7568c 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py +++ b/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py @@ -46,14 +46,14 @@ def get_main_capture_name(self) -> str: return "array_bit_cast" def get_patch(self, captures: [(Node, str)], src: bytes, **kwargs) -> bytes: - arr_name: bytes = captures[1][0].text + c1 = captures[1][0] + c4 = captures[4][0] + arr_name: bytes = get_text(src, c1.start_byte, c1.end_byte) array_type: Node = captures[3][0] - cast_target: bytes = captures[4][0].text.strip(b"()") + cast_target: bytes = get_text(src, c4.start_byte, c4.end_byte).strip(b"()") + named_child = array_type.named_children[0].named_children[1].named_children[1] array_templ_args: bytes = ( - array_type.named_children[0] - .named_children[1] - .named_children[1] - .text.strip(b"<>") + get_text(src, named_child.start_byte, named_child.end_byte).strip(b"<>") ) arr_type = array_templ_args.split(b",")[0] arr_len = array_templ_args.split(b",")[1] diff --git a/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py b/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py new file mode 100644 index 0000000000..54196bbbb6 --- /dev/null +++ b/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py @@ -0,0 +1,14 @@ +from tree_sitter import Node, Query + +# Queries for the given pattern and converts the query back to the tree-siter v22.3 format. +# Which is: A list of tuples where the first element is the +# Node of the capture and the second one is the name. +def query_captures_22_3(query: Query, node: Node) -> list[tuple[Node, str]]: + result = list() + captures = query.captures(node) + while len(captures) != 0: + for name, nodes in captures.items(): + node = nodes.pop(0) + result.append((node, name)) + captures = {k: l for k, l in captures.items() if len(l) != 0} + return result From 7e627c991f56ac884e71392db7dc79f2bcdb47b2 Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 09:13:06 -0500 Subject: [PATCH 3/8] Formatting --- .../src/autosync/cpptranslator/patches/BitCastStdArray.py | 6 +++--- .../src/autosync/cpptranslator/tree_sitter_compatibility.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py b/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py index 5c19c7568c..6e5a67e63d 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py +++ b/suite/auto-sync/src/autosync/cpptranslator/patches/BitCastStdArray.py @@ -52,9 +52,9 @@ def get_patch(self, captures: [(Node, str)], src: bytes, **kwargs) -> bytes: array_type: Node = captures[3][0] cast_target: bytes = get_text(src, c4.start_byte, c4.end_byte).strip(b"()") named_child = array_type.named_children[0].named_children[1].named_children[1] - array_templ_args: bytes = ( - get_text(src, named_child.start_byte, named_child.end_byte).strip(b"<>") - ) + array_templ_args: bytes = get_text( + src, named_child.start_byte, named_child.end_byte + ).strip(b"<>") arr_type = array_templ_args.split(b",")[0] arr_len = array_templ_args.split(b",")[1] return ( diff --git a/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py b/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py index 54196bbbb6..0b014bd151 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py +++ b/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py @@ -1,5 +1,6 @@ from tree_sitter import Node, Query + # Queries for the given pattern and converts the query back to the tree-siter v22.3 format. # Which is: A list of tuples where the first element is the # Node of the capture and the second one is the name. From 7b571c963128d85d109efa8cf2526e6bd62e919c Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 10:35:40 -0500 Subject: [PATCH 4/8] Attempt to use new query.match() API --- .../src/autosync/cpptranslator/patches/StreamOperation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py b/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py index dd44e73892..fd4e7fb990 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py +++ b/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py @@ -110,16 +110,17 @@ def get_patch(self, captures: [(Node, str)], src: bytes, **kwargs) -> bytes: (declaration ( (primitive_type) @typ (init_declarator - (identifier) @ident (#eq? @ident "{last_op_text.decode('utf8')}") + (identifier) @ident (#eq? @ident "{last_op_text.decode("utf8")}") ) )) @decl """ query = kwargs["ts_cpp_lang"].query(queue_str) + query.end_byte_for_pattern(last_op.start_byte) root_node = kwargs["tree"].root_node query_result = list( filter( lambda x: "typ" in x[1], - query.matches(root_node, end_byte=last_op.start_byte), + query.matches(root_node), ) ) if len(query_result) == 0: From 80f35931eed87bcfdfad34ad3618e1a7458dcca1 Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 12:09:50 -0500 Subject: [PATCH 5/8] Add tree and ts language to default patch arguments. --- .../src/autosync/cpptranslator/CppTranslator.py | 11 ++++++----- .../src/autosync/cpptranslator/Tests/test_patches.py | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py b/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py index c780e960eb..1b8563acee 100755 --- a/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py +++ b/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py @@ -454,8 +454,6 @@ def translate(self) -> None: cb: [(Node, str)] for cb in captures_bundle: patch_kwargs = self.get_patch_kwargs(patch) - patch_kwargs["tree"] = self.tree - patch_kwargs["ts_cpp_lang"] = self.ts_cpp_lang bytes_patch: bytes = patch.get_patch(cb, self.src, **patch_kwargs) p_list.append((bytes_patch, cb[0][0])) self.patch_src(p_list) @@ -481,9 +479,12 @@ def collect_template_instances(self): self.template_collector.collect() def get_patch_kwargs(self, patch): - if isinstance(patch, Includes): - return {"filename": self.current_src_path_in.name} - return dict() + default_kwargs = dict() + default_kwargs ["tree"] = self.tree + default_kwargs ["ts_cpp_lang"] = self.ts_cpp_lang + if isinstance(patch, Includes) and self.current_src_path_in: + default_kwargs["filename"] = self.current_src_path_in.name + return default_kwargs def remark_manual_files(self) -> None: manual_edited = self.conf["manually_edited_files"] diff --git a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py index 81d69e8b64..520a7c8f4b 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py +++ b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py @@ -6,6 +6,7 @@ import unittest +from pathlib import Path from tree_sitter import Node, Query from autosync.cpptranslator import CppTranslator @@ -95,11 +96,11 @@ def setUpClass(cls): configurator.get_parser(), configurator.get_cpp_lang(), [], [] ) - def check_patching_result(self, patch, syntax, expected, filename=""): + def check_patching_result(self, patch, syntax, expected, filename: Path =None): + kwargs = self.translator.get_patch_kwargs(patch) if filename: - kwargs = {"filename": filename} - else: - kwargs = self.translator.get_patch_kwargs(patch) + kwargs["filename"] = filename + query: Query = self.ts_cpp_lang.query(patch.get_search_pattern()) captures_bundle: [[(Node, str)]] = list() for q in query_captures_22_3(query, self.parser.parse(syntax).root_node): @@ -370,7 +371,7 @@ def test_includes(self): b"#include \n" b"#include \n\n" b"test_output", - "filename", + filename=Path("filename"), ) def test_inlinetostaticinline(self): From 5c612f9d45332ad05cb16129e9af20f7ef60e1c6 Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 12:17:10 -0500 Subject: [PATCH 6/8] Fix StreamOperation patch with new ts 24.0 API. --- .../src/autosync/cpptranslator/Tests/test_patches.py | 9 +++++++-- .../autosync/cpptranslator/patches/StreamOperation.py | 3 +-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py index 520a7c8f4b..009e330ff7 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py +++ b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py @@ -96,14 +96,16 @@ def setUpClass(cls): configurator.get_parser(), configurator.get_cpp_lang(), [], [] ) - def check_patching_result(self, patch, syntax, expected, filename: Path =None): + def check_patching_result(self, patch, syntax, expected, filename: Path = None, tree: dict = None): kwargs = self.translator.get_patch_kwargs(patch) if filename: kwargs["filename"] = filename query: Query = self.ts_cpp_lang.query(patch.get_search_pattern()) + tree = self.parser.parse(syntax) + kwargs["tree"] = tree captures_bundle: [[(Node, str)]] = list() - for q in query_captures_22_3(query, self.parser.parse(syntax).root_node): + for q in query_captures_22_3(query, tree.root_node): if q[1] == patch.get_main_capture_name(): captures_bundle.append([q]) else: @@ -570,6 +572,9 @@ def test_streamoperation(self): b'SStream_concat0(OS, "cccc");', ) + syntax = b"{ int y = 1; int x = 1; OS << x; }" + self.check_patching_result(patch, syntax, b"printInt32(OS, x);") + def test_templatedeclaration(self): patch = TemplateDeclaration(0, self.template_collector) syntax = b"template void tfunction();" diff --git a/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py b/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py index fd4e7fb990..3fdd67b839 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py +++ b/suite/auto-sync/src/autosync/cpptranslator/patches/StreamOperation.py @@ -126,8 +126,7 @@ def get_patch(self, captures: [(Node, str)], src: bytes, **kwargs) -> bytes: if len(query_result) == 0: res += b"SStream_concat0(" + s_name + b", " + last_op_text + b");" else: - cap = query_result[-1] - typ = get_text_from_node(src, cap[1]["typ"]) + typ = get_text_from_node(src, query_result[0][1]["typ"][-1]) match typ: case b"int": res += b"printInt32(" + s_name + b", " + last_op_text + b");" From d52cc383e2d4b658531de18fb80891ca158456ce Mon Sep 17 00:00:00 2001 From: Rot127 Date: Wed, 14 May 2025 12:17:59 -0500 Subject: [PATCH 7/8] Remove invalid test --- .../src/autosync/cpptranslator/CppTranslator.py | 4 ++-- .../src/autosync/cpptranslator/Tests/test_patches.py | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py b/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py index 1b8563acee..1dd31e59af 100755 --- a/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py +++ b/suite/auto-sync/src/autosync/cpptranslator/CppTranslator.py @@ -480,8 +480,8 @@ def collect_template_instances(self): def get_patch_kwargs(self, patch): default_kwargs = dict() - default_kwargs ["tree"] = self.tree - default_kwargs ["ts_cpp_lang"] = self.ts_cpp_lang + default_kwargs["tree"] = self.tree + default_kwargs["ts_cpp_lang"] = self.ts_cpp_lang if isinstance(patch, Includes) and self.current_src_path_in: default_kwargs["filename"] = self.current_src_path_in.name return default_kwargs diff --git a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py index 009e330ff7..e4bc39e127 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py +++ b/suite/auto-sync/src/autosync/cpptranslator/Tests/test_patches.py @@ -96,7 +96,9 @@ def setUpClass(cls): configurator.get_parser(), configurator.get_cpp_lang(), [], [] ) - def check_patching_result(self, patch, syntax, expected, filename: Path = None, tree: dict = None): + def check_patching_result( + self, patch, syntax, expected, filename: Path = None, tree: dict = None + ): kwargs = self.translator.get_patch_kwargs(patch) if filename: kwargs["filename"] = filename @@ -546,11 +548,6 @@ def test_stifeaturebits(self): b"ARCH_getFeatureBits(Inst->csh->mode, ARCH::FLAG)", ) - def test_stifeaturebits(self): - patch = SubtargetInfoParam(0) - syntax = b"void function(MCSubtargetInfo &STI);" - self.check_patching_result(patch, syntax, b"()") - def test_streamoperation(self): patch = StreamOperations(0) syntax = b"{ OS << 'a'; }" From 2f68c95d78515b8dfd5e9005b5d335490f4b504c Mon Sep 17 00:00:00 2001 From: Rot127 Date: Tue, 20 May 2025 07:00:24 -0500 Subject: [PATCH 8/8] Update sorting code which still segfaults occasionally. --- .../cpptranslator/tree_sitter_compatibility.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py b/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py index 0b014bd151..15b305899b 100644 --- a/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py +++ b/suite/auto-sync/src/autosync/cpptranslator/tree_sitter_compatibility.py @@ -7,9 +7,16 @@ def query_captures_22_3(query: Query, node: Node) -> list[tuple[Node, str]]: result = list() captures = query.captures(node) - while len(captures) != 0: - for name, nodes in captures.items(): + for name, nodes in captures.items(): + print(f"{name}: {len(nodes)}") + # Captures are no longer sorted by start point. + captures_sorted = dict() + nodes: list[Node] + for name, nodes in captures.items(): + captures_sorted[name] = sorted(nodes, key=lambda n: n.start_point) + while len(captures_sorted) != 0: + for name, nodes in captures_sorted.items(): node = nodes.pop(0) result.append((node, name)) - captures = {k: l for k, l in captures.items() if len(l) != 0} + captures_sorted = {k: l for k, l in captures_sorted.items() if len(l) != 0} return result