8
8
#
9
9
10
10
import attr
11
+ from collections import Counter
12
+
11
13
from commoncode .cliutils import PluggableCommandLineOption
12
14
from commoncode .cliutils import POST_SCAN_GROUP
13
15
from license_expression import Licensing
14
16
from plugincode .post_scan import PostScanPlugin
15
17
from plugincode .post_scan import post_scan_impl
16
18
19
+ from licensedcode .detection import LicenseDetection
20
+
17
21
# Set to True to enable debug tracing
18
22
TRACE = False
19
23
@@ -39,7 +43,11 @@ class LicensesReference(PostScanPlugin):
39
43
"""
40
44
Add a reference list of all licenses data and text.
41
45
"""
42
- codebase_attributes = dict (licenses_reference = attr .ib (default = attr .Factory (list )))
46
+ codebase_attributes = dict (
47
+ license_references = attr .ib (default = attr .Factory (list )),
48
+ licensedb_references = attr .ib (default = attr .Factory (list )),
49
+ license_detection_references = attr .ib (default = attr .Factory (list ))
50
+ )
43
51
44
52
sort_order = 500
45
53
@@ -55,30 +63,187 @@ def is_enabled(self, licenses_reference, **kwargs):
55
63
return licenses_reference
56
64
57
65
def process_codebase (self , codebase , licenses_reference , ** kwargs ):
58
- from licensedcode .cache import get_licenses_db
59
- licensing = Licensing ()
60
-
61
- license_keys = set ()
66
+ """
67
+ Get Licenses and LicenseDB data from all license detections in a codebase level list
68
+ and only refer to them in the resource level detections.
69
+ """
70
+ licexps = []
71
+ license_db_data = []
72
+ license_detections_by_path = {}
62
73
63
74
for resource in codebase .walk ():
64
- licexps = getattr (resource , 'license_expressions' , []) or []
65
- for expression in licexps :
66
- if expression :
67
- license_keys .update (licensing .license_keys (expression ))
68
-
69
- packages = getattr (codebase , 'packages' , []) or []
70
- for package in packages :
71
- # FXIME: license_expression attribute name is changing soon
72
- expression = package .get ('license_expression' )
73
- if expression :
74
- license_keys .update (licensing .license_keys (expression ))
75
-
76
- resource .save (codebase )
77
-
78
- db = get_licenses_db ()
79
- for key in sorted (license_keys ):
80
- license_details = db [key ].to_dict (
81
- include_ignorables = False ,
82
- include_text = True ,
75
+
76
+ # Get license_expressions from both package and license detections
77
+ license_licexps = getattr (resource , 'license_expressions' , []) or []
78
+ package_data = getattr (resource , 'package_data' , []) or []
79
+ # TODO: license_expression attribute name is changing soon
80
+ package_licexps = [pkg ['license_expression' ] for pkg in package_data ]
81
+ licexps .extend (license_licexps + package_licexps )
82
+
83
+ # Get license matches from both package and license detections
84
+ licence_detections = getattr (resource , 'licenses' , []) or []
85
+ #TODO: report license detections (with license matches) for packages
86
+ license_db_data .extend (
87
+ get_license_db_reference_data (licence_detections = licence_detections )
88
+ )
89
+
90
+ license_detections_by_path [resource .path ] = licence_detections
91
+ codebase .save_resource (resource )
92
+
93
+ license_references = get_license_references (license_expressions = licexps )
94
+ codebase .attributes .license_references .extend (license_references )
95
+
96
+ licensedb_references = get_licensedb_references (license_db_data = license_db_data )
97
+ codebase .attributes .licensedb_references .extend (licensedb_references )
98
+
99
+ license_detection_references = get_license_detection_references (license_detections_by_path )
100
+ codebase .attributes .license_detection_references .extend (license_detection_references )
101
+
102
+
103
+ def get_license_references (license_expressions , licensing = Licensing ()):
104
+ """
105
+ Get a list of License data from a list of `license_expression` strings.
106
+ """
107
+ from licensedcode .cache import get_licenses_db
108
+
109
+ license_keys = set ()
110
+ license_references = []
111
+
112
+ for expression in license_expressions :
113
+ if expression :
114
+ license_keys .update (licensing .license_keys (expression ))
115
+
116
+ db = get_licenses_db ()
117
+ for key in sorted (license_keys ):
118
+ license_references .append (
119
+ db [key ].to_dict (include_ignorables = False , include_text = True )
120
+ )
121
+
122
+ return license_references
123
+
124
+
125
+ def get_licensedb_references (license_db_data ):
126
+ """
127
+ """
128
+ licence_db_ids = set ()
129
+ licensedb_references = []
130
+
131
+ for licdb_ref in license_db_data :
132
+
133
+ licdb_id = licdb_ref ['licensedb_identifier' ]
134
+ if licdb_id not in licence_db_ids :
135
+ licence_db_ids .update (licdb_id )
136
+ licensedb_references .append (licdb_ref )
137
+
138
+ return licensedb_references
139
+
140
+
141
+ def get_license_db_reference_data (licence_detections ):
142
+ """
143
+ """
144
+ licence_db_ids = set ()
145
+ license_db_reference_data = []
146
+
147
+ for detection in licence_detections :
148
+ matches = detection ['matches' ]
149
+
150
+ for match in matches :
151
+
152
+ licdb_id = match ['licensedb_identifier' ]
153
+
154
+ ref_data = {}
155
+ ref_data ['license_expression' ] = match ['license_expression' ]
156
+ ref_data ['licensedb_identifier' ] = licdb_id
157
+ ref_data ['referenced_filenames' ] = match .pop ('referenced_filenames' )
158
+ ref_data ['is_license_text' ] = match .pop ('is_license_text' )
159
+ ref_data ['is_license_notice' ] = match .pop ('is_license_notice' )
160
+ ref_data ['is_license_reference' ] = match .pop ('is_license_reference' )
161
+ ref_data ['is_license_tag' ] = match .pop ('is_license_tag' )
162
+ ref_data ['is_license_intro' ] = match .pop ('is_license_intro' )
163
+ ref_data ['rule_length' ] = match .pop ('rule_length' )
164
+ ref_data ['rule_relevance' ] = match .pop ('rule_relevance' )
165
+
166
+ if 'matched_text' in match :
167
+ ref_data ['matched_text' ] = match .pop ('matched_text' )
168
+
169
+ _ = match .pop ('licenses' )
170
+
171
+ if licdb_id not in licence_db_ids :
172
+ licence_db_ids .update (licdb_id )
173
+ license_db_reference_data .append (ref_data )
174
+
175
+ return license_db_reference_data
176
+
177
+
178
+ def get_license_detection_references (license_detections_by_path ):
179
+ """
180
+ """
181
+ detection_objects = []
182
+
183
+ for path , detections in license_detections_by_path .items ():
184
+
185
+ for detection in detections :
186
+ detection_obj = LicenseDetection .from_mapping (detection = detection )
187
+ _matches = detection .pop ('matches' )
188
+ _reasons = detection .pop ('combination_reasons' )
189
+ detection_obj .file_region = detection_obj .get_file_region (path = path )
190
+ detection ["id" ] = detection_obj .identifier
191
+
192
+ detection_objects .append (detection_obj )
193
+
194
+ detection_references = UniqueDetection .get_unique_detections (detection_objects )
195
+ return detection_references
196
+
197
+
198
+ @attr .s
199
+ class UniqueDetection :
200
+ """
201
+ An unique License Detection.
202
+ """
203
+ unique_identifier = attr .ib (type = int )
204
+ license_detection = attr .ib ()
205
+ files = attr .ib (factory = list )
206
+
207
+ @classmethod
208
+ def get_unique_detections (cls , license_detections ):
209
+ """
210
+ Get all unique license detections from a list of
211
+ LicenseDetections.
212
+ """
213
+ identifiers = get_identifiers (license_detections )
214
+ unique_detection_counts = dict (Counter (identifiers ))
215
+
216
+ unique_license_detections = []
217
+ for detection_identifier in unique_detection_counts .keys ():
218
+ file_regions = (
219
+ detection .file_region
220
+ for detection in license_detections
221
+ if detection_identifier == detection .identifier
222
+ )
223
+ all_detections = (
224
+ detection
225
+ for detection in license_detections
226
+ if detection_identifier == detection .identifier
83
227
)
84
- codebase .attributes .licenses_reference .append (license_details )
228
+
229
+ detection = next (all_detections )
230
+ unique_license_detections .append (
231
+ cls (
232
+ files = list (file_regions ),
233
+ license_detection = attr .asdict (detection ),
234
+ unique_identifier = detection .identifier ,
235
+ )
236
+ )
237
+
238
+ return unique_license_detections
239
+
240
+
241
+ def get_identifiers (license_detections ):
242
+ """
243
+ Get identifiers for all license detections.
244
+ """
245
+ identifiers = (
246
+ detection .identifier
247
+ for detection in license_detections
248
+ )
249
+ return identifiers
0 commit comments