diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 756e9e4..73056d2 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,7 +6,7 @@ on: [push, pull_request] jobs: Pytest: # The type of runner that the job will run on - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: matrix: python-version: [3.6, 3.7, 3.8, 3.9] @@ -34,12 +34,12 @@ jobs: - name: Add llvm keys run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - echo 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' | sudo tee -a /etc/apt/sources.list - echo 'deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' | sudo tee -a /etc/apt/sources.list + echo 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main' | sudo tee -a /etc/apt/sources.list + echo 'deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main' | sudo tee -a /etc/apt/sources.list - name: Install libclang and its python bindings run: | sudo apt-get update - sudo apt-get install -y libclang-11-dev python3-clang-11 + sudo apt-get install -y libclang-12-dev python3-clang-12 # Add dist-package to path to enable apt installed python3-clang import - name: Add dist-packages to PYTHONPATH diff --git a/bindings/python/scripts/clang_utils.py b/bindings/python/scripts/clang_utils.py new file mode 100644 index 0000000..f9e00c8 --- /dev/null +++ b/bindings/python/scripts/clang_utils.py @@ -0,0 +1,120 @@ +import inspect +import clang.cindex as clang + + +def getmembers_static(object, predicate=None): + """ + Return all members of an object as (name, value) pairs sorted by name via `getattr_static`. + Optionally, only return members that satisfy a given predicate. + + + - A static version of `get_members` function at: + https://github.com/python/cpython/blob/3.9/Lib/inspect.py#L326-L368 + https://github.com/python/cpython/blob/14ba761078b5ae83519e34d66ab883743912c45b/Lib/inspect.py#L444-L486 + - `getmembers` function (from the inspect module) triggers execution instead of doing static analysis. + - This leads to errors, particularly on properties of classes in cindex.py, which causes segmentation errors or raises an Exception if a particular condition is not satisfied. + - To curb this, we fetch the members statically. We define a custom function based on the one in the inspect module. + """ + + results = [] + names = dir(object) + # :dd any DynamicClassAttributes to the list of names if object is a class; + # this may result in duplicate entries if, for example, a virtual + # attribute with the same name as a DynamicClassAttribute exists + try: + base_members = filter( + lambda k, v: isinstance(v, types.DynamicClassAttribute), + object.__bases__.__dict__.items(), + ) + names.extend(base_members) + except AttributeError: + pass + for key in names: + value = inspect.getattr_static(object, key) + if not predicate or predicate(value): + results.append((key, value)) + results.sort(key=lambda pair: pair[0]) + return results + + +class ClangUtils: + """ + Clang's cindex class utilities. + + Supports the following objects: + CursorKind: + https://github.com/llvm/llvm-project/blob/release/12.x/clang/bindings/python/clang/cindex.py#L657 + https://github.com/llvm/llvm-project/blob/1acd9a1a29ac30044ecefb6613485d5d168f66ca/clang/bindings/python/clang/cindex.py#L657 + - A CursorKind describes the kind of entity that a cursor points to. + Cursor: + https://github.com/llvm/llvm-project/blob/release/12.x/clang/bindings/python/clang/cindex.py#L1415 + https://github.com/llvm/llvm-project/blob/1acd9a1a29ac30044ecefb6613485d5d168f66ca/clang/bindings/python/clang/cindex.py#L1415 + - The Cursor class represents a reference to an element within the AST. It acts as a kind of iterator. + Type: + https://github.com/llvm/llvm-project/blob/release/12.x/clang/bindings/python/clang/cindex.py#L2180 + https://github.com/llvm/llvm-project/blob/1acd9a1a29ac30044ecefb6613485d5d168f66ca/clang/bindings/python/clang/cindex.py#L2180 + - The Type class represents the type of an element in the abstract syntax tree. + """ + + def __init__(self, object): + if not ( + isinstance(object, clang.CursorKind) + or isinstance(object, clang.Cursor) + or isinstance(object, clang.Type) + ): + raise NotImplementedError(f"Not implemented for {object}") + + self.check_functions_dict = {} + self.get_functions_dict = {} + self.properties_dict = {} + + # A list to ignore the functions/properties that causes segmentation errors. + ignore_list = [ + "mangled_name", + "get_address_space", + "get_typedef_name", + "tls_kind", + ] + + # populate dicts + valid_entries = filter( + lambda entry: entry[0] not in ignore_list, getmembers_static(object) + ) + for name, func in valid_entries: + if inspect.isfunction(func): # if function + try: # cindex.py's functions raise exceptions internally + if name.startswith("is_"): + self.check_functions_dict[name] = func(object) + if name.startswith("get_"): + self.get_functions_dict[name] = func(object) + except: + continue + elif isinstance(func, property): # else, property + try: # cindex.py's property functions raise exceptions internally + self.properties_dict[name] = getattr(object, name) + except: + continue + + def get_check_functions_dict(self): + """ + Returns: `check_functions_dict`: + - functions that begin with "is_" i.e., checking functions + - {function_name, function_result} + """ + return self.check_functions_dict + + def get_get_functions_dict(self): + """ + Returns: `get_functions_dict`: + - functions that begin with "get_" i.e., getter functions + - {function_name, function_result} + """ + return self.get_functions_dict + + def get_properties_dict(self): + """ + Returns: properties_dict + - Properties + - {property_name, property} + """ + return self.properties_dict diff --git a/bindings/python/scripts/compilation_database.py b/bindings/python/scripts/compilation_database.py new file mode 100644 index 0000000..7883ac6 --- /dev/null +++ b/bindings/python/scripts/compilation_database.py @@ -0,0 +1,40 @@ +import clang.cindex as clang + + +class CompilationDatabase: + """ + Build a compilation database from a given directory + """ + + def __init__(self, compilation_database_path): + self.compilation_database = clang.CompilationDatabase.fromDirectory( + buildDir=compilation_database_path + ) + + def get_compilation_arguments(self, filename=None): + """ + Returns the compilation commands extracted from the compilation database + + Parameters: + - compilation_database_path: The path to `compile_commands.json` + - filename (optional): To get compilaton commands of a file + + Returns: + - compilation_arguments (dict): {filename: compiler arguments} + """ + + if filename: + # Get compilation commands from the compilation database for the given file + compilation_commands = self.compilation_database.getCompileCommands( + filename=filename + ) + else: + # Get all compilation commands from the compilation database + compilation_commands = self.compilation_database.getAllCompileCommands() + + # {file: compiler arguments} + compilation_arguments = { + command.filename: list(command.arguments)[1:-1] + for command in compilation_commands + } + return compilation_arguments diff --git a/bindings/python/scripts/generate.py b/bindings/python/scripts/generate.py index 1380860..9426718 100644 --- a/bindings/python/scripts/generate.py +++ b/bindings/python/scripts/generate.py @@ -148,10 +148,14 @@ def get_fields_from_anonymous(item: dict) -> list: fields = [] for sub_item in item["members"]: # base condition - if sub_item["kind"] == "FIELD_DECL": + if sub_item["cursor_kind"]["name"] == "FIELD_DECL": fields.append(sub_item) # recurse - elif sub_item["kind"] in ("ANONYMOUS_UNION_DECL", "ANONYMOUS_STRUCT_DECL"): + # @TODO Fix this, `ANONYMOUS_kind` was removed, now test via `is_anonymous` + elif sub_item["cursor_kind"]["name"] in ( + "ANONYMOUS_UNION_DECL", + "ANONYMOUS_STRUCT_DECL", + ): fields += bind.get_fields_from_anonymous(item=sub_item) return fields @@ -173,8 +177,8 @@ def handle_node(self, item: dict) -> None: """ self.item = item - self.kind = self.item["kind"] - self.name = self.item["name"] + self.kind = self.item["cursor_kind"]["name"] + self.name = self.item["cursor"]["spelling"] self.members = self.item["members"] self.depth = self.item["depth"] @@ -218,17 +222,21 @@ def handle_struct_decl(self) -> None: template_class_name = None template_class_name_python = None for sub_item in self.members: - if sub_item["kind"] == "TYPE_REF": + if sub_item["cursor_kind"]["name"] == "TYPE_REF": # TODO: Will this case only apply to templates? # @TODO: Make more robust - type_ref = sub_item["name"].replace("struct ", "").replace("pcl::", "") + type_ref = ( + sub_item["cursor"]["spelling"] + .replace("struct ", "") + .replace("pcl::", "") + ) template_class_name = f"{self.name}<{type_ref}>" template_class_name_python = f"{self.name}_{type_ref}" base_class_list = [ - sub_item["name"] + sub_item["cursor"]["spelling"] for sub_item in self.members - if sub_item["kind"] == "CXX_BASE_SPECIFIER" + if sub_item["cursor_kind"]["name"] == "CXX_BASE_SPECIFIER" ] base_class_list_string = [ @@ -253,35 +261,35 @@ def handle_struct_decl(self) -> None: for sub_item in self.members: fields = self.get_fields_from_anonymous(sub_item) for field in fields: - if field["element_type"] == "ConstantArray": + if field["type"]["kind"] == "ConstantArray": # TODO: FIX: readwrite, not readonly self._linelist.append( - f'.def_property_readonly("{field["name"]}", []({self.name}& obj) {{return obj.{field["name"]}; }})' # float[ ' + f'obj.{sub_item["name"]}' + '.size()];} )' + f'.def_property_readonly("{field["cursor"]["spelling"]}", []({self.name}& obj) {{return obj.{field["cursor"]["spelling"]}; }})' # float[ ' + f'obj.{sub_item["cursor"]["spelling"]}' + '.size()];} )' ) else: self._linelist.append( - f'.def_readwrite("{field["name"]}", &{self.name}::{field["name"]})' + f'.def_readwrite("{field["cursor"]["spelling"]}", &{self.name}::{field["cursor"]["spelling"]})' ) for sub_item in self.members: # handle field declarations - if sub_item["kind"] == "FIELD_DECL": - if sub_item["element_type"] == "ConstantArray": + if sub_item["cursor_kind"]["name"] == "FIELD_DECL": + if sub_item["type"]["kind"] == "ConstantArray": self._linelist.append( - f'.def_property_readonly("{sub_item["name"]}", []({self.name}& obj) {{return obj.{sub_item["name"]}; }})' # float[ ' + f'obj.{sub_item["name"]}' + '.size()];} )' + f'.def_property_readonly("{sub_item["cursor"]["spelling"]}", []({self.name}& obj) {{return obj.{sub_item["cursor"]["spelling"]}; }})' # float[ ' + f'obj.{sub_item["cursor"]["spelling"]}' + '.size()];} )' ) else: self._linelist.append( - f'.def_readwrite("{sub_item["name"]}", &{self.name}::{sub_item["name"]})' + f'.def_readwrite("{sub_item["cursor"]["spelling"]}", &{self.name}::{sub_item["cursor"]["spelling"]})' ) # handle class methods - elif sub_item["kind"] == "CXX_METHOD": + elif sub_item["cursor_kind"]["name"] == "CXX_METHOD": # TODO: Add template args, currently blank - if sub_item["name"] not in ("PCL_DEPRECATED"): + if sub_item["cursor"]["spelling"] not in ("PCL_DEPRECATED"): self._linelist.append( - f'.def("{sub_item["name"]}", py::overload_cast<>(&{self.name}::{sub_item["name"]}))' + f'.def("{sub_item["cursor"]["spelling"]}", py::overload_cast<>(&{self.name}::{sub_item["cursor"]["spelling"]}))' ) def handle_function(self) -> None: @@ -293,8 +301,8 @@ def handle_function(self) -> None: parameter_type_list = [] details = self._state_stack[-1] for sub_item in self.members: - if sub_item["kind"] == "PARM_DECL": - parameter_type_list.append(f'"{sub_item["name"]}"_a') + if sub_item["cursor_kind"]["name"] == "PARM_DECL": + parameter_type_list.append(f'"{sub_item["cursor"]["spelling"]}"_a') parameter_type_list = ",".join(parameter_type_list) if parameter_type_list: @@ -317,7 +325,7 @@ def handle_constructor(self) -> None: # generate parameter type list for sub_item in self.members: - if sub_item["kind"] == "PARM_DECL": + if sub_item["cursor_kind"]["name"] == "PARM_DECL": parameter_type_list.append(self.get_parm_types(sub_item)) parameter_type_list = ",".join(parameter_type_list) @@ -326,25 +334,29 @@ def handle_constructor(self) -> None: self._linelist.append(f".def(py::init<{parameter_type_list}>())") def get_parm_types(self, item: Dict[str, Any]) -> List[str]: - if item["element_type"] == "LValueReference": + if item["type"]["kind"] == "LValueReference": for sub_item in item["members"]: - if sub_item["kind"] == "TYPE_REF": + if sub_item["cursor_kind"]["name"] == "TYPE_REF": # @TODO: Make more robust type_ref = ( - sub_item["name"].replace("struct ", "").replace("pcl::", "") + sub_item["cursor"]["spelling"] + .replace("struct ", "") + .replace("pcl::", "") ) parameter_type_list = f"{type_ref} &" - elif item["element_type"] == "Elaborated": + elif item["type"]["kind"] == "Elaborated": namespace_ref = "" for sub_item in item["members"]: - if sub_item["kind"] == "NAMESPACE_REF": - namespace_ref += f'{sub_item["name"]}::' - if sub_item["kind"] == "TYPE_REF": - parameter_type_list = f'{namespace_ref}{sub_item["name"]}' - elif item["element_type"] in ("Float", "Double", "Int"): - parameter_type_list = f'{item["element_type"].lower()}' + if sub_item["cursor_kind"]["name"] == "NAMESPACE_REF": + namespace_ref += f'{sub_item["cursor"]["spelling"]}::' + if sub_item["cursor_kind"]["name"] == "TYPE_REF": + parameter_type_list = ( + f'{namespace_ref}{sub_item["cursor"]["spelling"]}' + ) + elif item["type"]["kind"] in ("Float", "Double", "Int"): + parameter_type_list = f'{item["type"]["kind"].lower()}' else: - parameter_type_list = f'{item["element_type"]}' + parameter_type_list = f'{item["type"]["kind"]}' return parameter_type_list def handle_inclusion_directive(self) -> None: @@ -417,7 +429,7 @@ def combine_lines() -> list or Exception: if parsed_info: bind_object = bind(root=parsed_info, module_name=module_name) # Extract filename from parsed_info (TRANSLATION_UNIT's name contains the filepath) - filename = "pcl" + parsed_info["name"].rsplit("pcl")[-1] + filename = "pcl" + parsed_info["cursor"]["spelling"].rsplit("pcl")[-1] return combine_lines() else: raise Exception("Empty dict: parsed_info") diff --git a/bindings/python/scripts/parse.py b/bindings/python/scripts/parse.py index 4205de6..fd378b3 100644 --- a/bindings/python/scripts/parse.py +++ b/bindings/python/scripts/parse.py @@ -1,286 +1,135 @@ -import os -import sys import clang.cindex as clang from context import scripts import scripts.utils as utils +from scripts.clang_utils import ClangUtils -def valid_children(node): +class Parse: """ - A generator function yielding valid children nodes - - Parameters: - - node (dict): - - The node in the AST - - Keys: - - cursor: The cursor pointing to a node - - filename: - - The file's name to check if the node belongs to it - - Needed to ensure that only symbols belonging to the file gets parsed, not the included files' symbols - - depth: The depth of the node (root=0) - - Yields: - - child_node (dict): Same structure as the argument - """ - - cursor = node["cursor"] - filename = node["filename"] - depth = node["depth"] - - for child in cursor.get_children(): - child_node = {"cursor": child, "filename": filename, "depth": depth + 1} - # Check if the child belongs to the file - if child.location.file and child.location.file.name == filename: - yield (child_node) - - -def print_ast(node): - """ - Prints the AST by recursively traversing it - - Parameters: - - node (dict): - - The node in the AST - - Keys: - - cursor: The cursor pointing to a node - - filename: - - The file's name to check if the node belongs to it - - Needed to ensure that only symbols belonging to the file gets parsed, not the included files' symbols - - depth: The depth of the node (root=0) - - Returns: - - None - """ - - cursor = node["cursor"] - depth = node["depth"] - - print( - "-" * depth, - cursor.location.file, - f"L{cursor.location.line} C{cursor.location.column}", - cursor.kind.name, - cursor.spelling, - ) - - # Get cursor's children and recursively print - for child_node in valid_children(node): - print_ast(child_node) - - -def generate_parsed_info(node): + Class containing functions to generate an AST of a file and parse it to retrieve relevant information. """ - Generates parsed information by recursively traversing the AST - - Parameters: - - node (dict): - - The node in the AST - - Keys: - - cursor: The cursor pointing to a node - - filename: - - The file's name to check if the node belongs to it - - Needed to ensure that only symbols belonging to the file gets parsed, not the included files' symbols - - depth: The depth of the node (root=0) - - Returns: - - parsed_info (dict): - - Contains key-value pairs of various traits of a node - - The key 'members' contains the node's children's `parsed_info` - """ - - parsed_info = dict() - - cursor = node["cursor"] - depth = node["depth"] - - parsed_info["depth"] = depth - parsed_info["line"] = cursor.location.line - parsed_info["column"] = cursor.location.column - parsed_info["kind"] = cursor.kind.name - parsed_info["tokens"] = [x.spelling for x in cursor.get_tokens()] - - if cursor.is_anonymous(): - parsed_info["kind"] = "ANONYMOUS_" + parsed_info["kind"] - parsed_info["name"] = cursor.spelling - if cursor.type.kind.spelling != "Invalid": - parsed_info["element_type"] = cursor.type.kind.spelling - if cursor.access_specifier.name != "INVALID": - parsed_info["access_specifier"] = cursor.access_specifier.name - if cursor.result_type.spelling != "": - parsed_info["result_type"] = cursor.result_type.spelling - if cursor.brief_comment: - parsed_info["brief_comment"] = cursor.brief_comment - if cursor.raw_comment: - parsed_info["raw_comment"] = cursor.raw_comment - - # add result of various kinds of checks available in cindex.py - - cursorkind_checks = { - "kind_is_declaration": cursor.kind.is_declaration, - "kind_is_reference": cursor.kind.is_reference, - "kind_is_expression": cursor.kind.is_expression, - "kind_is_statement": cursor.kind.is_statement, - "kind_is_attribute": cursor.kind.is_attribute, - "kind_is_invalid": cursor.kind.is_invalid, - "kind_is_translation_unit": cursor.kind.is_translation_unit, - "kind_is_preprocessing": cursor.kind.is_preprocessing, - "kind_is_unexposed": cursor.kind.is_unexposed, - } - - # check for deleted ctor analogous to `is_default_constructor` unavailable - cursor_checks = { - "is_definition": cursor.is_definition, - "is_const_method": cursor.is_const_method, - "is_converting_constructor": cursor.is_converting_constructor, - "is_copy_constructor": cursor.is_copy_constructor, - "is_default_constructor": cursor.is_default_constructor, - "is_move_constructor": cursor.is_move_constructor, - "is_default_method": cursor.is_default_method, - "is_mutable_field": cursor.is_mutable_field, - "is_pure_virtual_method": cursor.is_pure_virtual_method, - "is_static_method": cursor.is_static_method, - "is_virtual_method": cursor.is_virtual_method, - "is_abstract_record": cursor.is_abstract_record, - "is_scoped_enum": cursor.is_scoped_enum, - "is_anonymous": cursor.is_anonymous, - "is_bitfield": cursor.is_bitfield, - } - - type_checks = { - "type_is_const_qualified": cursor.type.is_const_qualified, - "type_is_volatile_qualified": cursor.type.is_volatile_qualified, - "type_is_restrict_qualified": cursor.type.is_restrict_qualified, - "type_is_pod": cursor.type.is_pod, - } - - for checks in (cursorkind_checks, cursor_checks, type_checks): - for check, check_call in checks.items(): - parsed_info[check] = check_call() - - # special case handling for `cursor.type.is_function_variadic()` - if cursor.type.kind.spelling == "FunctionProto": - parsed_info["type_is_function_variadic"] = cursor.type.is_function_variadic() - - parsed_info["members"] = [] - - # Get cursor's children and recursively add their info to a dictionary, as members of the parent - for child_node in valid_children(node): - child_parsed_info = generate_parsed_info(child_node) - parsed_info["members"].append(child_parsed_info) - return parsed_info + def __init__(self, file, compiler_arguments): + index = clang.Index.create() + """ + - Why parse using the option `PARSE_DETAILED_PROCESSING_RECORD`? + - Indicates that the parser should construct a detailed preprocessing record, + including all macro definitions and instantiations + - Required to retrieve `CursorKind.INCLUSION_DIRECTIVE` + """ -def get_compilation_commands(compilation_database_path, filename): - """ - Returns the compilation commands extracted from the compilation database - - Parameters: - - compilation_database_path: The path to `compile_commands.json` - - filename: The file's name to get its compilation commands - - Returns: - - compilation commands (list): The arguments passed to the compiler - """ - - # Build a compilation database found in the given directory - compilation_database = clang.CompilationDatabase.fromDirectory( - buildDir=compilation_database_path - ) - - # Get compiler arguments from the compilation database for the given file - compilation_commands = compilation_database.getCompileCommands(filename=filename) - - """ - - compilation_commands: - - An iterable object providing all the compilation commands available to build filename. - - type: - - compilation_commands[0]: - - Since we have only one command per filename in the compile_commands.json, extract 0th element - - type: - - compilation_commands[0].arguments: - - Get compiler arguments from the CompileCommand object - - type: - - list(compilation_commands[0].arguments)[1:-1]: - - Convert the generator object to list, and extract compiler arguments - - 0th element is the compiler name - - nth element is the filename - """ - - return list(compilation_commands[0].arguments)[1:-1] - - -def parse_file(source, compilation_database_path=None): - """ - Returns the parsed_info for a file - - Parameters: - - source: Source to parse - - compilation_database_path: The path to `compile_commands.json` - - Returns: - - parsed_info (dict) - """ - - # Create a new index to start parsing - index = clang.Index.create() - - # Get compiler arguments - compilation_commands = get_compilation_commands( - compilation_database_path=compilation_database_path, - filename=source, - ) - - """ - - Parse the given source code file by running clang and generating the AST before loading - - option `PARSE_DETAILED_PROCESSING_RECORD`: - - Indicates that the parser should construct a detailed preprocessing record, - including all macro definitions and instantiations. - - Required to get the `INCLUSION_DIRECTIVE`s. - """ - source_ast = index.parse( - path=source, - args=compilation_commands, - options=clang.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD, - ) - - # Dictionary to hold a node's information - root_node = { - "cursor": source_ast.cursor, - "filename": source_ast.spelling, - "depth": 0, - } - - # For testing purposes - # print_ast(root_node) - - return generate_parsed_info(root_node) - - -def main(): - # Get command line arguments - args = utils.parse_arguments(script="parse") - for source in args.files: - source = utils.get_realpath(path=source) - - # Parse the source file - parsed_info = parse_file(source, args.compilation_database_path) - - # Output path for dumping the parsed info into a json file - output_dir = utils.join_path(args.json_output_path, "json") - output_filepath = utils.get_output_path( - source=source, - output_dir=output_dir, - split_from=args.project_root, - extension=".json", + source_ast = index.parse( + path=file, + args=compiler_arguments, + options=clang.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD, ) - out_rel_path = os.path.relpath(output_filepath, args.json_output_path) - print(f"Producing ./{out_rel_path}") - - # Dump the parsed info at output path - utils.dump_json(filepath=output_filepath, info=parsed_info) - -if __name__ == "__main__": - main() + self.root_node = { + "cursor": source_ast.cursor, + "filename": source_ast.spelling, + "depth": 0, + } + + @staticmethod + def _is_valid_child(parent_node, child_node): + child = child_node.get("cursor") + parent_filename = parent_node.get("filename") + + if child.location.file and child.location.file.name == parent_filename: + return True + return False + + @staticmethod + def get_parsed_node(node): + cursor = node.get("cursor") + + # Objects to get various kinds of checks available in cindex.py via clang_utils.py + cursor_kind_utils = ClangUtils(cursor.kind) + cursor_utils = ClangUtils(cursor) + cursor_type_utils = ClangUtils(cursor.type) + + parsed_node = { + "depth": node.get("depth"), + "line": cursor.location.line, + "column": cursor.location.column, + "tokens": [x.spelling for x in cursor.get_tokens()], + "cursor_kind": { + **cursor_kind_utils.get_check_functions_dict(), # Functions that begin with "is_" i.e., checking functions + **cursor_kind_utils.get_get_functions_dict(), # Functions that begin with "get_" i.e., getter functions + **cursor_kind_utils.get_properties_dict(), # Properties + }, + "cursor": { + **cursor_utils.get_check_functions_dict(), + **cursor_utils.get_get_functions_dict(), + **cursor_utils.get_properties_dict(), + }, + "type": { + **cursor_type_utils.get_check_functions_dict(), + **cursor_type_utils.get_get_functions_dict(), + **cursor_type_utils.get_properties_dict(), + }, + "members": [], + } + + # HACKY FIXES + # get spelling from object + parsed_node["cursor"]["result_type"] = parsed_node["cursor"][ + "result_type" + ].spelling + # replace `AccessSpecifier.value` with just `value` + parsed_node["cursor"]["access_specifier"] = parsed_node["cursor"][ + "access_specifier" + ].name + # replace `TypeKind.value` with just `value` + parsed_node["type"]["kind"] = parsed_node["type"]["kind"].name + + return parsed_node + + @classmethod + def parse_node_recursive(cls, node): + """ + Generates parsed information by recursively traversing the AST + + Parameters: + - node (dict): + - The node in the AST + - Keys: + - cursor: The cursor pointing to a node + - filename: + - The file's name to check if the node belongs to it + - Needed to ensure that only symbols belonging to the file gets parsed, not the included files' symbols + - depth: The depth of the node (root=0) + + Returns: + - parsed_info (dict): + - Contains key-value pairs of various traits of a node + - The key 'members' contains the node's children's `parsed_info` + """ + + cursor = node.get("cursor") + filename = node.get("filename") + depth = node.get("depth") + + parsed_info = cls.get_parsed_node(node) + + # Get cursor's children and recursively add their info to a dictionary, as members of the parent + for child in cursor.get_children(): + child_node = {"cursor": child, "filename": filename, "depth": depth + 1} + if cls._is_valid_child(node, child_node): + child_parsed_info = cls.parse_node_recursive(child_node) + parsed_info["members"].append(child_parsed_info) + + return parsed_info + + def get_parsed_info(self): + """ + Returns the parsed information for a file by recursively traversing the AST + + Returns: + - parsed_info (dict): + - Contains key-value pairs of various traits of a node + - The key 'members' contains the node's children's `parsed_info` + """ + return self.parse_node_recursive(self.root_node) diff --git a/bindings/python/tests/test_generate.py b/bindings/python/tests/test_generate.py index dd671f1..d64938c 100644 --- a/bindings/python/tests/test_generate.py +++ b/bindings/python/tests/test_generate.py @@ -49,7 +49,7 @@ def generate_bindings(cpp_code_block, module_name, tmp_path): tmp_path=tmp_path, file_contents=cpp_code_block ) - file_include = "pcl" + parsed_info["name"].rsplit("pcl")[-1] + file_include = "pcl" + parsed_info["cursor"]["spelling"].rsplit("pcl")[-1] # Get the binded code binded_code = generate.generate(module_name=module_name, parsed_info=parsed_info) diff --git a/bindings/python/tests/test_parse.py b/bindings/python/tests/test_parse.py index 7b1a791..05bf7c3 100644 --- a/bindings/python/tests/test_parse.py +++ b/bindings/python/tests/test_parse.py @@ -1,8 +1,10 @@ from context import scripts -import scripts.parse as parse +import clang.cindex as clang +from scripts.parse import Parse +from scripts.compilation_database import CompilationDatabase -def create_compilation_database(tmp_path, filepath): +def get_compilation_database_path(tmp_path, filepath): input = tmp_path / "compile_commands.json" x = [ { @@ -24,12 +26,21 @@ def get_parsed_info(tmp_path, file_contents): with open(source_path, "w") as f: f.write(str(file_contents)) - parsed_info = parse.parse_file( - source=str(source_path), - compilation_database_path=create_compilation_database( + compilation_database_path = get_compilation_database_path( tmp_path=tmp_path, filepath=source_path - ), - ) + ) + + compilation_database = CompilationDatabase( + compilation_database_path=compilation_database_path + ) + + compilation_arguments = compilation_database.get_compilation_arguments( + filename=source_path + ) + + compiler_arguments = compilation_arguments.get(source_path) + + parsed_info = Parse(source_path, compiler_arguments).get_parsed_info() return parsed_info @@ -46,27 +57,30 @@ def test_anonymous_decls(tmp_path): union_decl = parsed_info["members"][0] - assert union_decl["kind"] == "ANONYMOUS_UNION_DECL" - assert union_decl["name"] == "" + assert union_decl["cursor_kind"]["name"] == "UNION_DECL" + assert union_decl["cursor"]["is_anonymous"] == True + assert union_decl["cursor"]["spelling"] == "" struct_decl = union_decl["members"][0] - assert struct_decl["kind"] == "ANONYMOUS_STRUCT_DECL" - assert struct_decl["name"] == "" + assert struct_decl["cursor_kind"]["name"] == "STRUCT_DECL" + assert union_decl["cursor"]["is_anonymous"] == True + assert union_decl["cursor"]["spelling"] == "" enum_decl = struct_decl["members"][0] - assert enum_decl["kind"] == "ANONYMOUS_ENUM_DECL" - assert enum_decl["name"] == "" + assert enum_decl["cursor_kind"]["name"] == "ENUM_DECL" + assert union_decl["cursor"]["is_anonymous"] == True + assert union_decl["cursor"]["spelling"] == "" def test_translation_unit(tmp_path): file_contents = "" parsed_info = get_parsed_info(tmp_path=tmp_path, file_contents=file_contents) - assert parsed_info["kind"] == "TRANSLATION_UNIT" + assert parsed_info["cursor_kind"]["name"] == "TRANSLATION_UNIT" assert parsed_info["depth"] == 0 - assert parsed_info["name"] == str(tmp_path / "file.cpp") + assert parsed_info["cursor"]["spelling"] == str(tmp_path / "file.cpp") def test_namespace(tmp_path): @@ -75,8 +89,8 @@ def test_namespace(tmp_path): namespace = parsed_info["members"][0] - assert namespace["kind"] == "NAMESPACE" - assert namespace["name"] == "a_namespace" + assert namespace["cursor_kind"]["name"] == "NAMESPACE" + assert namespace["cursor"]["spelling"] == "a_namespace" def test_namespace_ref(tmp_path): @@ -88,14 +102,14 @@ def test_namespace_ref(tmp_path): inclusion_directive = parsed_info["members"][0] - assert inclusion_directive["kind"] == "INCLUSION_DIRECTIVE" - assert inclusion_directive["name"] == "ostream" + assert inclusion_directive["cursor_kind"]["name"] == "INCLUSION_DIRECTIVE" + assert inclusion_directive["cursor"]["spelling"] == "ostream" var_decl = parsed_info["members"][1] namespace_ref = var_decl["members"][0] - assert namespace_ref["kind"] == "NAMESPACE_REF" - assert namespace_ref["name"] == "std" + assert namespace_ref["cursor_kind"]["name"] == "NAMESPACE_REF" + assert namespace_ref["cursor"]["spelling"] == "std" def test_var_decl(tmp_path): @@ -104,9 +118,9 @@ def test_var_decl(tmp_path): var_decl = parsed_info["members"][0] - assert var_decl["kind"] == "VAR_DECL" - assert var_decl["element_type"] == "Int" - assert var_decl["name"] == "anInt" + assert var_decl["cursor_kind"]["name"] == "VAR_DECL" + assert var_decl["type"]["kind"] == "INT" + assert var_decl["cursor"]["spelling"] == "anInt" def test_field_decl(tmp_path): @@ -120,9 +134,9 @@ def test_field_decl(tmp_path): struct_decl = parsed_info["members"][0] field_decl = struct_decl["members"][0] - assert field_decl["kind"] == "FIELD_DECL" - assert field_decl["element_type"] == "Int" - assert field_decl["name"] == "aClassMember" + assert field_decl["cursor_kind"]["name"] == "FIELD_DECL" + assert field_decl["type"]["kind"] == "INT" + assert field_decl["cursor"]["spelling"] == "aClassMember" def test_parsed_info_structure(tmp_path): @@ -142,9 +156,9 @@ def test_function_decl_without_parameters(tmp_path): func_decl = parsed_info["members"][0] - assert func_decl["kind"] == "FUNCTION_DECL" - assert func_decl["name"] == "aFunction" - assert func_decl["result_type"] == "int" + assert func_decl["cursor_kind"]["name"] == "FUNCTION_DECL" + assert func_decl["cursor"]["spelling"] == "aFunction" + assert func_decl["cursor"]["result_type"] == "int" def test_function_decl_with_parameters(tmp_path): @@ -155,18 +169,18 @@ def test_function_decl_with_parameters(tmp_path): func_decl = parsed_info["members"][0] - assert func_decl["kind"] == "FUNCTION_DECL" - assert func_decl["name"] == "aFunction" - assert func_decl["result_type"] == "int" + assert func_decl["cursor_kind"]["name"] == "FUNCTION_DECL" + assert func_decl["cursor"]["spelling"] == "aFunction" + assert func_decl["cursor"]["result_type"] == "int" first_param = func_decl["members"][0] second_param = func_decl["members"][1] - assert first_param["name"] == "firstParam" - assert first_param["element_type"] == "Int" + assert first_param["cursor"]["spelling"] == "firstParam" + assert first_param["type"]["kind"] == "INT" - assert second_param["name"] == "secondParam" - assert second_param["element_type"] == "Double" + assert second_param["cursor"]["spelling"] == "secondParam" + assert second_param["type"]["kind"] == "DOUBLE" def test_simple_call_expr(tmp_path): @@ -181,10 +195,10 @@ def test_simple_call_expr(tmp_path): var_decl = parsed_info["members"][1] call_expr = var_decl["members"][0] - assert call_expr["kind"] == "CALL_EXPR" - assert call_expr["name"] == "aFunction" + assert call_expr["cursor_kind"]["name"] == "CALL_EXPR" + assert call_expr["cursor"]["spelling"] == "aFunction" - assert var_decl["name"] == "anInt" + assert var_decl["cursor"]["spelling"] == "anInt" def test_struct_decl(tmp_path): @@ -193,8 +207,8 @@ def test_struct_decl(tmp_path): struct_decl = parsed_info["members"][0] - assert struct_decl["kind"] == "STRUCT_DECL" - assert struct_decl["name"] == "AStruct" + assert struct_decl["cursor_kind"]["name"] == "STRUCT_DECL" + assert struct_decl["cursor"]["spelling"] == "AStruct" def test_public_inheritance(tmp_path): @@ -207,9 +221,9 @@ def test_public_inheritance(tmp_path): child_struct_decl = parsed_info["members"][1] cxx_base_specifier = child_struct_decl["members"][0] - assert cxx_base_specifier["kind"] == "CXX_BASE_SPECIFIER" - assert cxx_base_specifier["access_specifier"] == "PUBLIC" - assert cxx_base_specifier["name"] == "struct BaseStruct" + assert cxx_base_specifier["cursor_kind"]["name"] == "CXX_BASE_SPECIFIER" + assert cxx_base_specifier["cursor"]["access_specifier"] == "PUBLIC" + assert cxx_base_specifier["cursor"]["spelling"] == "struct BaseStruct" def test_member_function(tmp_path): @@ -223,9 +237,9 @@ def test_member_function(tmp_path): struct_decl = parsed_info["members"][0] cxx_method = struct_decl["members"][0] - assert cxx_method["kind"] == "CXX_METHOD" - assert cxx_method["result_type"] == "void" - assert cxx_method["name"] == "aMethod" + assert cxx_method["cursor_kind"]["name"] == "CXX_METHOD" + assert cxx_method["cursor"]["result_type"] == "void" + assert cxx_method["cursor"]["spelling"] == "aMethod" def test_type_ref(tmp_path): @@ -242,12 +256,12 @@ class AClass { cxx_method = class_decl["members"][0] parm_decl = cxx_method["members"][0] - assert parm_decl["name"] == "aParameter" + assert parm_decl["cursor"]["spelling"] == "aParameter" type_ref = parm_decl["members"][0] - assert type_ref["kind"] == "TYPE_REF" - assert type_ref["name"] == "struct SomeUsefulType" + assert type_ref["cursor_kind"]["name"] == "TYPE_REF" + assert type_ref["cursor"]["spelling"] == "struct SomeUsefulType" def test_simple_constructor(tmp_path): @@ -261,9 +275,9 @@ def test_simple_constructor(tmp_path): struct_decl = parsed_info["members"][0] constructor = struct_decl["members"][0] - assert constructor["kind"] == "CONSTRUCTOR" - assert constructor["access_specifier"] == "PUBLIC" - assert constructor["name"] == "AStruct" + assert constructor["cursor_kind"]["name"] == "CONSTRUCTOR" + assert constructor["cursor"]["access_specifier"] == "PUBLIC" + assert constructor["cursor"]["spelling"] == "AStruct" def test_unexposed_expr(tmp_path): @@ -279,12 +293,12 @@ class SimpleClassWithConstructor { constructor = struct_decl["members"][1] member_ref = constructor["members"][1] - assert member_ref["name"] == "aClassMember" + assert member_ref["cursor"]["spelling"] == "aClassMember" unexposed_expr = constructor["members"][2] - assert unexposed_expr["kind"] == "UNEXPOSED_EXPR" - assert unexposed_expr["name"] == "aConstructorParameter" + assert unexposed_expr["cursor_kind"]["name"] == "UNEXPOSED_EXPR" + assert unexposed_expr["cursor"]["spelling"] == "aConstructorParameter" # @TODO: Not sure how to reproduce. Maybe later. @@ -309,10 +323,10 @@ def test_decl_ref_expr(tmp_path): decl_ref_expr_1 = unexposed_expr_1["members"][0] decl_ref_expr_2 = unexposed_expr_2["members"][0] - assert decl_ref_expr_1["kind"] == "DECL_REF_EXPR" - assert decl_ref_expr_2["kind"] == "DECL_REF_EXPR" - assert decl_ref_expr_1["name"] == "secondFunctionParameter" - assert decl_ref_expr_2["name"] == "firstFunctionParameter" + assert decl_ref_expr_1["cursor_kind"]["name"] == "DECL_REF_EXPR" + assert decl_ref_expr_2["cursor_kind"]["name"] == "DECL_REF_EXPR" + assert decl_ref_expr_1["cursor"]["spelling"] == "secondFunctionParameter" + assert decl_ref_expr_2["cursor"]["spelling"] == "firstFunctionParameter" def test_member_ref(tmp_path): @@ -330,12 +344,12 @@ def test_member_ref(tmp_path): member_ref_1 = constructor["members"][2] member_ref_2 = constructor["members"][4] - assert member_ref_1["kind"] == "MEMBER_REF" - assert member_ref_2["kind"] == "MEMBER_REF" - assert member_ref_1["element_type"] == "Int" - assert member_ref_2["element_type"] == "Int" - assert member_ref_1["name"] == "firstMember" - assert member_ref_2["name"] == "secondMember" + assert member_ref_1["cursor_kind"]["name"] == "MEMBER_REF" + assert member_ref_2["cursor_kind"]["name"] == "MEMBER_REF" + assert member_ref_1["type"]["kind"] == "INT" + assert member_ref_2["type"]["kind"] == "INT" + assert member_ref_1["cursor"]["spelling"] == "firstMember" + assert member_ref_2["cursor"]["spelling"] == "secondMember" def test_class_template(tmp_path): @@ -347,14 +361,14 @@ def test_class_template(tmp_path): class_template = parsed_info["members"][0] - assert class_template["kind"] == "CLASS_TEMPLATE" - assert class_template["name"] == "AStruct" + assert class_template["cursor_kind"]["name"] == "CLASS_TEMPLATE" + assert class_template["cursor"]["spelling"] == "AStruct" template_type_parameter = class_template["members"][0] - assert template_type_parameter["kind"] == "TEMPLATE_TYPE_PARAMETER" - assert template_type_parameter["name"] == "T" - assert template_type_parameter["access_specifier"] == "PUBLIC" + assert template_type_parameter["cursor_kind"]["name"] == "TEMPLATE_TYPE_PARAMETER" + assert template_type_parameter["cursor"]["spelling"] == "T" + assert template_type_parameter["cursor"]["access_specifier"] == "PUBLIC" def test_template_non_type_parameter(tmp_path): @@ -366,14 +380,17 @@ def test_template_non_type_parameter(tmp_path): class_template = parsed_info["members"][0] - assert class_template["kind"] == "CLASS_TEMPLATE" - assert class_template["name"] == "AStruct" + assert class_template["cursor_kind"]["name"] == "CLASS_TEMPLATE" + assert class_template["cursor"]["spelling"] == "AStruct" template_non_type_parameter = class_template["members"][0] - assert template_non_type_parameter["kind"] == "TEMPLATE_NON_TYPE_PARAMETER" - assert template_non_type_parameter["element_type"] == "Int" - assert template_non_type_parameter["name"] == "N" + assert ( + template_non_type_parameter["cursor_kind"]["name"] + == "TEMPLATE_NON_TYPE_PARAMETER" + ) + assert template_non_type_parameter["type"]["kind"] == "INT" + assert template_non_type_parameter["cursor"]["spelling"] == "N" def test_function_template(tmp_path): @@ -385,15 +402,15 @@ def test_function_template(tmp_path): function_template = parsed_info["members"][0] - assert function_template["kind"] == "FUNCTION_TEMPLATE" - assert function_template["result_type"] == "void" - assert function_template["name"] == "aFunction" + assert function_template["cursor_kind"]["name"] == "FUNCTION_TEMPLATE" + assert function_template["cursor"]["result_type"] == "void" + assert function_template["cursor"]["spelling"] == "aFunction" template_type_parameter = function_template["members"][0] - assert template_type_parameter["kind"] == "TEMPLATE_TYPE_PARAMETER" - assert template_type_parameter["name"] == "T" - assert template_type_parameter["access_specifier"] == "PUBLIC" + assert template_type_parameter["cursor_kind"]["name"] == "TEMPLATE_TYPE_PARAMETER" + assert template_type_parameter["cursor"]["spelling"] == "T" + assert template_type_parameter["cursor"]["access_specifier"] == "PUBLIC" def test_template_type_parameter(tmp_path): @@ -409,16 +426,16 @@ def test_template_type_parameter(tmp_path): class_template = parsed_info["members"][0] template_type_parameter = class_template["members"][0] - assert template_type_parameter["kind"] == "TEMPLATE_TYPE_PARAMETER" - assert template_type_parameter["element_type"] == "Unexposed" - assert template_type_parameter["name"] == "T" + assert template_type_parameter["cursor_kind"]["name"] == "TEMPLATE_TYPE_PARAMETER" + assert template_type_parameter["type"]["kind"] == "UNEXPOSED" + assert template_type_parameter["cursor"]["spelling"] == "T" function_template = parsed_info["members"][1] template_type_parameter = function_template["members"][0] - assert template_type_parameter["kind"] == "TEMPLATE_TYPE_PARAMETER" - assert template_type_parameter["element_type"] == "Unexposed" - assert template_type_parameter["name"] == "P" + assert template_type_parameter["cursor_kind"]["name"] == "TEMPLATE_TYPE_PARAMETER" + assert template_type_parameter["type"]["kind"] == "UNEXPOSED" + assert template_type_parameter["cursor"]["spelling"] == "P" def test_default_delete_constructor(tmp_path): @@ -436,14 +453,14 @@ class aClass { default_constructor = class_decl["members"][0] - assert default_constructor["kind"] == "CONSTRUCTOR" - assert default_constructor["name"] == "aClass" - assert default_constructor["result_type"] == "void" - assert default_constructor["is_default_constructor"] + assert default_constructor["cursor_kind"]["name"] == "CONSTRUCTOR" + assert default_constructor["cursor"]["spelling"] == "aClass" + assert default_constructor["cursor"]["result_type"] == "void" + assert default_constructor["cursor"]["is_default_constructor"] delete_constructor = class_decl["members"][1] - assert delete_constructor["kind"] == "CONSTRUCTOR" - assert delete_constructor["name"] == "aClass" - assert delete_constructor["result_type"] == "void" + assert delete_constructor["cursor_kind"]["name"] == "CONSTRUCTOR" + assert delete_constructor["cursor"]["spelling"] == "aClass" + assert delete_constructor["cursor"]["result_type"] == "void" # no check available for deleted ctor analogous to `is_default_constructor`