Skip to content

Commit cc1ffd7

Browse files
authored
Merge pull request #6 from divmadan/polish-parse
Introduce clang_utils.py, compilation_database.py and related changes
2 parents 5e07663 + 6f4c13e commit cc1ffd7

File tree

7 files changed

+445
-407
lines changed

7 files changed

+445
-407
lines changed

.github/workflows/pytest.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on: [push, pull_request]
66
jobs:
77
Pytest:
88
# The type of runner that the job will run on
9-
runs-on: ubuntu-latest
9+
runs-on: ubuntu-20.04
1010
strategy:
1111
matrix:
1212
python-version: [3.6, 3.7, 3.8, 3.9]
@@ -34,12 +34,12 @@ jobs:
3434
- name: Add llvm keys
3535
run: |
3636
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
37-
echo 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' | sudo tee -a /etc/apt/sources.list
38-
echo 'deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' | sudo tee -a /etc/apt/sources.list
37+
echo 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main' | sudo tee -a /etc/apt/sources.list
38+
echo 'deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main' | sudo tee -a /etc/apt/sources.list
3939
- name: Install libclang and its python bindings
4040
run: |
4141
sudo apt-get update
42-
sudo apt-get install -y libclang-11-dev python3-clang-11
42+
sudo apt-get install -y libclang-12-dev python3-clang-12
4343
4444
# Add dist-package to path to enable apt installed python3-clang import
4545
- name: Add dist-packages to PYTHONPATH
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import inspect
2+
import clang.cindex as clang
3+
4+
5+
def getmembers_static(object, predicate=None):
6+
"""
7+
Return all members of an object as (name, value) pairs sorted by name via `getattr_static`.
8+
Optionally, only return members that satisfy a given predicate.
9+
10+
11+
- A static version of `get_members` function at:
12+
https://github.com/python/cpython/blob/3.9/Lib/inspect.py#L326-L368
13+
https://github.com/python/cpython/blob/14ba761078b5ae83519e34d66ab883743912c45b/Lib/inspect.py#L444-L486
14+
- `getmembers` function (from the inspect module) triggers execution instead of doing static analysis.
15+
- This leads to errors, particularly on properties of classes in cindex.py, which causes segmentation errors or raises an Exception if a particular condition is not satisfied.
16+
- To curb this, we fetch the members statically. We define a custom function based on the one in the inspect module.
17+
"""
18+
19+
results = []
20+
names = dir(object)
21+
# :dd any DynamicClassAttributes to the list of names if object is a class;
22+
# this may result in duplicate entries if, for example, a virtual
23+
# attribute with the same name as a DynamicClassAttribute exists
24+
try:
25+
base_members = filter(
26+
lambda k, v: isinstance(v, types.DynamicClassAttribute),
27+
object.__bases__.__dict__.items(),
28+
)
29+
names.extend(base_members)
30+
except AttributeError:
31+
pass
32+
for key in names:
33+
value = inspect.getattr_static(object, key)
34+
if not predicate or predicate(value):
35+
results.append((key, value))
36+
results.sort(key=lambda pair: pair[0])
37+
return results
38+
39+
40+
class ClangUtils:
41+
"""
42+
Clang's cindex class utilities.
43+
44+
Supports the following objects:
45+
CursorKind:
46+
https://github.com/llvm/llvm-project/blob/release/12.x/clang/bindings/python/clang/cindex.py#L657
47+
https://github.com/llvm/llvm-project/blob/1acd9a1a29ac30044ecefb6613485d5d168f66ca/clang/bindings/python/clang/cindex.py#L657
48+
- A CursorKind describes the kind of entity that a cursor points to.
49+
Cursor:
50+
https://github.com/llvm/llvm-project/blob/release/12.x/clang/bindings/python/clang/cindex.py#L1415
51+
https://github.com/llvm/llvm-project/blob/1acd9a1a29ac30044ecefb6613485d5d168f66ca/clang/bindings/python/clang/cindex.py#L1415
52+
- The Cursor class represents a reference to an element within the AST. It acts as a kind of iterator.
53+
Type:
54+
https://github.com/llvm/llvm-project/blob/release/12.x/clang/bindings/python/clang/cindex.py#L2180
55+
https://github.com/llvm/llvm-project/blob/1acd9a1a29ac30044ecefb6613485d5d168f66ca/clang/bindings/python/clang/cindex.py#L2180
56+
- The Type class represents the type of an element in the abstract syntax tree.
57+
"""
58+
59+
def __init__(self, object):
60+
if not (
61+
isinstance(object, clang.CursorKind)
62+
or isinstance(object, clang.Cursor)
63+
or isinstance(object, clang.Type)
64+
):
65+
raise NotImplementedError(f"Not implemented for {object}")
66+
67+
self.check_functions_dict = {}
68+
self.get_functions_dict = {}
69+
self.properties_dict = {}
70+
71+
# A list to ignore the functions/properties that causes segmentation errors.
72+
ignore_list = [
73+
"mangled_name",
74+
"get_address_space",
75+
"get_typedef_name",
76+
"tls_kind",
77+
]
78+
79+
# populate dicts
80+
valid_entries = filter(
81+
lambda entry: entry[0] not in ignore_list, getmembers_static(object)
82+
)
83+
for name, func in valid_entries:
84+
if inspect.isfunction(func): # if function
85+
try: # cindex.py's functions raise exceptions internally
86+
if name.startswith("is_"):
87+
self.check_functions_dict[name] = func(object)
88+
if name.startswith("get_"):
89+
self.get_functions_dict[name] = func(object)
90+
except:
91+
continue
92+
elif isinstance(func, property): # else, property
93+
try: # cindex.py's property functions raise exceptions internally
94+
self.properties_dict[name] = getattr(object, name)
95+
except:
96+
continue
97+
98+
def get_check_functions_dict(self):
99+
"""
100+
Returns: `check_functions_dict`:
101+
- functions that begin with "is_" i.e., checking functions
102+
- {function_name, function_result}
103+
"""
104+
return self.check_functions_dict
105+
106+
def get_get_functions_dict(self):
107+
"""
108+
Returns: `get_functions_dict`:
109+
- functions that begin with "get_" i.e., getter functions
110+
- {function_name, function_result}
111+
"""
112+
return self.get_functions_dict
113+
114+
def get_properties_dict(self):
115+
"""
116+
Returns: properties_dict
117+
- Properties
118+
- {property_name, property}
119+
"""
120+
return self.properties_dict
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import clang.cindex as clang
2+
3+
4+
class CompilationDatabase:
5+
"""
6+
Build a compilation database from a given directory
7+
"""
8+
9+
def __init__(self, compilation_database_path):
10+
self.compilation_database = clang.CompilationDatabase.fromDirectory(
11+
buildDir=compilation_database_path
12+
)
13+
14+
def get_compilation_arguments(self, filename=None):
15+
"""
16+
Returns the compilation commands extracted from the compilation database
17+
18+
Parameters:
19+
- compilation_database_path: The path to `compile_commands.json`
20+
- filename (optional): To get compilaton commands of a file
21+
22+
Returns:
23+
- compilation_arguments (dict): {filename: compiler arguments}
24+
"""
25+
26+
if filename:
27+
# Get compilation commands from the compilation database for the given file
28+
compilation_commands = self.compilation_database.getCompileCommands(
29+
filename=filename
30+
)
31+
else:
32+
# Get all compilation commands from the compilation database
33+
compilation_commands = self.compilation_database.getAllCompileCommands()
34+
35+
# {file: compiler arguments}
36+
compilation_arguments = {
37+
command.filename: list(command.arguments)[1:-1]
38+
for command in compilation_commands
39+
}
40+
return compilation_arguments

bindings/python/scripts/generate.py

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,14 @@ def get_fields_from_anonymous(item: dict) -> list:
148148
fields = []
149149
for sub_item in item["members"]:
150150
# base condition
151-
if sub_item["kind"] == "FIELD_DECL":
151+
if sub_item["cursor_kind"]["name"] == "FIELD_DECL":
152152
fields.append(sub_item)
153153
# recurse
154-
elif sub_item["kind"] in ("ANONYMOUS_UNION_DECL", "ANONYMOUS_STRUCT_DECL"):
154+
# @TODO Fix this, `ANONYMOUS_kind` was removed, now test via `is_anonymous`
155+
elif sub_item["cursor_kind"]["name"] in (
156+
"ANONYMOUS_UNION_DECL",
157+
"ANONYMOUS_STRUCT_DECL",
158+
):
155159
fields += bind.get_fields_from_anonymous(item=sub_item)
156160
return fields
157161

@@ -173,8 +177,8 @@ def handle_node(self, item: dict) -> None:
173177
"""
174178

175179
self.item = item
176-
self.kind = self.item["kind"]
177-
self.name = self.item["name"]
180+
self.kind = self.item["cursor_kind"]["name"]
181+
self.name = self.item["cursor"]["spelling"]
178182
self.members = self.item["members"]
179183
self.depth = self.item["depth"]
180184

@@ -218,17 +222,21 @@ def handle_struct_decl(self) -> None:
218222
template_class_name = None
219223
template_class_name_python = None
220224
for sub_item in self.members:
221-
if sub_item["kind"] == "TYPE_REF":
225+
if sub_item["cursor_kind"]["name"] == "TYPE_REF":
222226
# TODO: Will this case only apply to templates?
223227
# @TODO: Make more robust
224-
type_ref = sub_item["name"].replace("struct ", "").replace("pcl::", "")
228+
type_ref = (
229+
sub_item["cursor"]["spelling"]
230+
.replace("struct ", "")
231+
.replace("pcl::", "")
232+
)
225233
template_class_name = f"{self.name}<{type_ref}>"
226234
template_class_name_python = f"{self.name}_{type_ref}"
227235

228236
base_class_list = [
229-
sub_item["name"]
237+
sub_item["cursor"]["spelling"]
230238
for sub_item in self.members
231-
if sub_item["kind"] == "CXX_BASE_SPECIFIER"
239+
if sub_item["cursor_kind"]["name"] == "CXX_BASE_SPECIFIER"
232240
]
233241

234242
base_class_list_string = [
@@ -253,35 +261,35 @@ def handle_struct_decl(self) -> None:
253261
for sub_item in self.members:
254262
fields = self.get_fields_from_anonymous(sub_item)
255263
for field in fields:
256-
if field["element_type"] == "ConstantArray":
264+
if field["type"]["kind"] == "ConstantArray":
257265
# TODO: FIX: readwrite, not readonly
258266
self._linelist.append(
259-
f'.def_property_readonly("{field["name"]}", []({self.name}& obj) {{return obj.{field["name"]}; }})' # float[ ' + f'obj.{sub_item["name"]}' + '.size()];} )'
267+
f'.def_property_readonly("{field["cursor"]["spelling"]}", []({self.name}& obj) {{return obj.{field["cursor"]["spelling"]}; }})' # float[ ' + f'obj.{sub_item["cursor"]["spelling"]}' + '.size()];} )'
260268
)
261269
else:
262270
self._linelist.append(
263-
f'.def_readwrite("{field["name"]}", &{self.name}::{field["name"]})'
271+
f'.def_readwrite("{field["cursor"]["spelling"]}", &{self.name}::{field["cursor"]["spelling"]})'
264272
)
265273

266274
for sub_item in self.members:
267275

268276
# handle field declarations
269-
if sub_item["kind"] == "FIELD_DECL":
270-
if sub_item["element_type"] == "ConstantArray":
277+
if sub_item["cursor_kind"]["name"] == "FIELD_DECL":
278+
if sub_item["type"]["kind"] == "ConstantArray":
271279
self._linelist.append(
272-
f'.def_property_readonly("{sub_item["name"]}", []({self.name}& obj) {{return obj.{sub_item["name"]}; }})' # float[ ' + f'obj.{sub_item["name"]}' + '.size()];} )'
280+
f'.def_property_readonly("{sub_item["cursor"]["spelling"]}", []({self.name}& obj) {{return obj.{sub_item["cursor"]["spelling"]}; }})' # float[ ' + f'obj.{sub_item["cursor"]["spelling"]}' + '.size()];} )'
273281
)
274282
else:
275283
self._linelist.append(
276-
f'.def_readwrite("{sub_item["name"]}", &{self.name}::{sub_item["name"]})'
284+
f'.def_readwrite("{sub_item["cursor"]["spelling"]}", &{self.name}::{sub_item["cursor"]["spelling"]})'
277285
)
278286

279287
# handle class methods
280-
elif sub_item["kind"] == "CXX_METHOD":
288+
elif sub_item["cursor_kind"]["name"] == "CXX_METHOD":
281289
# TODO: Add template args, currently blank
282-
if sub_item["name"] not in ("PCL_DEPRECATED"):
290+
if sub_item["cursor"]["spelling"] not in ("PCL_DEPRECATED"):
283291
self._linelist.append(
284-
f'.def("{sub_item["name"]}", py::overload_cast<>(&{self.name}::{sub_item["name"]}))'
292+
f'.def("{sub_item["cursor"]["spelling"]}", py::overload_cast<>(&{self.name}::{sub_item["cursor"]["spelling"]}))'
285293
)
286294

287295
def handle_function(self) -> None:
@@ -293,8 +301,8 @@ def handle_function(self) -> None:
293301
parameter_type_list = []
294302
details = self._state_stack[-1]
295303
for sub_item in self.members:
296-
if sub_item["kind"] == "PARM_DECL":
297-
parameter_type_list.append(f'"{sub_item["name"]}"_a')
304+
if sub_item["cursor_kind"]["name"] == "PARM_DECL":
305+
parameter_type_list.append(f'"{sub_item["cursor"]["spelling"]}"_a')
298306

299307
parameter_type_list = ",".join(parameter_type_list)
300308
if parameter_type_list:
@@ -317,7 +325,7 @@ def handle_constructor(self) -> None:
317325

318326
# generate parameter type list
319327
for sub_item in self.members:
320-
if sub_item["kind"] == "PARM_DECL":
328+
if sub_item["cursor_kind"]["name"] == "PARM_DECL":
321329
parameter_type_list.append(self.get_parm_types(sub_item))
322330
parameter_type_list = ",".join(parameter_type_list)
323331

@@ -326,25 +334,29 @@ def handle_constructor(self) -> None:
326334
self._linelist.append(f".def(py::init<{parameter_type_list}>())")
327335

328336
def get_parm_types(self, item: Dict[str, Any]) -> List[str]:
329-
if item["element_type"] == "LValueReference":
337+
if item["type"]["kind"] == "LValueReference":
330338
for sub_item in item["members"]:
331-
if sub_item["kind"] == "TYPE_REF":
339+
if sub_item["cursor_kind"]["name"] == "TYPE_REF":
332340
# @TODO: Make more robust
333341
type_ref = (
334-
sub_item["name"].replace("struct ", "").replace("pcl::", "")
342+
sub_item["cursor"]["spelling"]
343+
.replace("struct ", "")
344+
.replace("pcl::", "")
335345
)
336346
parameter_type_list = f"{type_ref} &"
337-
elif item["element_type"] == "Elaborated":
347+
elif item["type"]["kind"] == "Elaborated":
338348
namespace_ref = ""
339349
for sub_item in item["members"]:
340-
if sub_item["kind"] == "NAMESPACE_REF":
341-
namespace_ref += f'{sub_item["name"]}::'
342-
if sub_item["kind"] == "TYPE_REF":
343-
parameter_type_list = f'{namespace_ref}{sub_item["name"]}'
344-
elif item["element_type"] in ("Float", "Double", "Int"):
345-
parameter_type_list = f'{item["element_type"].lower()}'
350+
if sub_item["cursor_kind"]["name"] == "NAMESPACE_REF":
351+
namespace_ref += f'{sub_item["cursor"]["spelling"]}::'
352+
if sub_item["cursor_kind"]["name"] == "TYPE_REF":
353+
parameter_type_list = (
354+
f'{namespace_ref}{sub_item["cursor"]["spelling"]}'
355+
)
356+
elif item["type"]["kind"] in ("Float", "Double", "Int"):
357+
parameter_type_list = f'{item["type"]["kind"].lower()}'
346358
else:
347-
parameter_type_list = f'{item["element_type"]}'
359+
parameter_type_list = f'{item["type"]["kind"]}'
348360
return parameter_type_list
349361

350362
def handle_inclusion_directive(self) -> None:
@@ -417,7 +429,7 @@ def combine_lines() -> list or Exception:
417429
if parsed_info:
418430
bind_object = bind(root=parsed_info, module_name=module_name)
419431
# Extract filename from parsed_info (TRANSLATION_UNIT's name contains the filepath)
420-
filename = "pcl" + parsed_info["name"].rsplit("pcl")[-1]
432+
filename = "pcl" + parsed_info["cursor"]["spelling"].rsplit("pcl")[-1]
421433
return combine_lines()
422434
else:
423435
raise Exception("Empty dict: parsed_info")

0 commit comments

Comments
 (0)