35
35
LICENSE_INDEX_FILENAME = 'index_cache'
36
36
LICENSE_LOCKFILE_NAME = 'scancode_license_index_lockfile'
37
37
LICENSE_CHECKSUM_FILE = 'scancode_license_index_tree_checksums'
38
+ CACHED_DIRECTORIES_FILENAME = 'cached_directories'
38
39
39
40
40
41
@attr .s (slots = True )
@@ -58,6 +59,7 @@ def load_or_build(
58
59
timeout = LICENSE_INDEX_LOCK_TIMEOUT ,
59
60
licenses_data_dir = None ,
60
61
rules_data_dir = None ,
62
+ additional_directories = None ,
61
63
):
62
64
"""
63
65
Load or build and save and return a LicenseCache object.
@@ -66,7 +68,8 @@ def load_or_build(
66
68
On the side, we load cached or build license db, SPDX symbols and other
67
69
license-related data structures.
68
70
69
- - If the cache exists, it is returned unless corrupted or ``force`` is True.
71
+ - If the cache exists, it is returned unless corrupted, ``force`` is True, or if we pass in additional
72
+ directories containing licenses that are not present in the existing cache.
70
73
- If the cache does not exist, a new index is built and cached.
71
74
- If ``index_all_languages`` is True, include texts in all languages when
72
75
building the license index. Otherwise, only include the English license \
@@ -75,11 +78,30 @@ def load_or_build(
75
78
idx_cache_dir = os .path .join (licensedcode_cache_dir , LICENSE_INDEX_DIR )
76
79
create_dir (idx_cache_dir )
77
80
cache_file = os .path .join (idx_cache_dir , LICENSE_INDEX_FILENAME )
81
+ cached_directories_file = os .path .join (idx_cache_dir , CACHED_DIRECTORIES_FILENAME )
78
82
79
83
has_cache = os .path .exists (cache_file ) and os .path .getsize (cache_file )
84
+ has_cached_directories = os .path .exists (cached_directories_file ) and os .path .getsize (cached_directories_file )
85
+ should_rebuild_cache = False
86
+
87
+ if has_cached_directories :
88
+ # if we have cached additional directories of licenses, check if those licenses are equal to the additional
89
+ # directories passed in
90
+ with open (cached_directories_file , 'rb' ) as file :
91
+ cached_additional_directories = pickle .load (file )
92
+
93
+ # we need to rebuild the cache if the list of additional directories we passed in is different than
94
+ # the set of additional directories current included in the index cache
95
+ should_rebuild_cache = additional_directories is not None \
96
+ and sorted (additional_directories ) != sorted (cached_additional_directories )
97
+ else :
98
+ # otherwise, we don't have a file of cached directories. If there are additional directories passed in,
99
+ # we know we need to make a new cache file.
100
+ if additional_directories :
101
+ should_rebuild_cache = True
80
102
81
103
# bypass build if cache exists
82
- if has_cache and not force :
104
+ if has_cache and not force and not should_rebuild_cache :
83
105
try :
84
106
return load_cache_file (cache_file )
85
107
except Exception as e :
@@ -92,6 +114,8 @@ def load_or_build(
92
114
from licensedcode .models import licenses_data_dir as ldd
93
115
from licensedcode .models import rules_data_dir as rdd
94
116
from licensedcode .models import load_licenses
117
+ from licensedcode .models import load_licenses_from_multiple_dirs
118
+ from licensedcode .models import get_license_dirs
95
119
from scancode import lockfile
96
120
97
121
licenses_data_dir = licenses_data_dir or ldd
@@ -106,13 +130,21 @@ def load_or_build(
106
130
# Here, the cache is either stale or non-existing: we need to
107
131
# rebuild all cached data (e.g. mostly the index) and cache it
108
132
109
- licenses_db = load_licenses (licenses_data_dir = licenses_data_dir )
133
+ if additional_directories :
134
+ additional_license_dirs = get_license_dirs (additional_dirs = additional_directories )
135
+ combined_directories = [licenses_data_dir ] + additional_license_dirs
136
+ licenses_db = load_licenses_from_multiple_dirs (license_directories = combined_directories )
137
+ else :
138
+ licenses_db = load_licenses (licenses_data_dir = licenses_data_dir )
110
139
140
+ # create a single merged index containing license data from licenses_data_dir
141
+ # and data from additional directories
111
142
index = build_index (
112
143
licenses_db = licenses_db ,
113
144
licenses_data_dir = licenses_data_dir ,
114
145
rules_data_dir = rules_data_dir ,
115
146
index_all_languages = index_all_languages ,
147
+ additional_directories = additional_directories ,
116
148
)
117
149
118
150
spdx_symbols = build_spdx_symbols (licenses_db = licenses_db )
@@ -131,6 +163,10 @@ def load_or_build(
131
163
with open (cache_file , 'wb' ) as fn :
132
164
pickle .dump (license_cache , fn , protocol = PICKLE_PROTOCOL )
133
165
166
+ # save the list of additional directories included in the cache
167
+ with open (cached_directories_file , 'wb' ) as file :
168
+ pickle .dump (additional_directories , file , protocol = PICKLE_PROTOCOL )
169
+
134
170
return license_cache
135
171
136
172
except lockfile .LockTimeout :
@@ -143,27 +179,50 @@ def build_index(
143
179
licenses_data_dir = None ,
144
180
rules_data_dir = None ,
145
181
index_all_languages = False ,
182
+ additional_directories = None ,
146
183
):
147
184
"""
148
185
Return an index built from rules and licenses directories
149
186
150
187
If ``index_all_languages`` is True, include texts and rules in all languages.
151
188
Otherwise, only include the English license texts and rules (the default)
189
+ If ``additional_directories`` is not None, we will include licenses and rules
190
+ from these additional directories in the returned index.
152
191
"""
153
192
from licensedcode .index import LicenseIndex
193
+ from licensedcode .models import get_license_dirs
194
+ from licensedcode .models import get_rule_dirs
154
195
from licensedcode .models import get_rules
196
+ from licensedcode .models import get_rules_from_multiple_dirs
155
197
from licensedcode .models import get_all_spdx_key_tokens
156
198
from licensedcode .models import get_license_tokens
157
199
from licensedcode .models import licenses_data_dir as ldd
158
200
from licensedcode .models import rules_data_dir as rdd
159
201
from licensedcode .models import load_licenses
202
+ from licensedcode .models import load_licenses_from_multiple_dirs
160
203
from licensedcode .legalese import common_license_words
161
204
162
205
licenses_data_dir = licenses_data_dir or ldd
163
206
rules_data_dir = rules_data_dir or rdd
164
207
165
- licenses_db = licenses_db or load_licenses (licenses_data_dir = licenses_data_dir )
166
- rules = get_rules (licenses_db = licenses_db , rules_data_dir = rules_data_dir )
208
+ if not licenses_db :
209
+ if additional_directories :
210
+ # combine the licenses in these additional directories with the licenses in the original DB
211
+ additional_license_dirs = get_license_dirs (additional_dirs = additional_directories )
212
+ combined_license_directories = [licenses_data_dir ] + additional_license_dirs
213
+ # generate a single combined license db with all licenses
214
+ licenses_db = load_licenses_from_multiple_dirs (license_dirs = combined_license_directories )
215
+ else :
216
+ licenses_db = load_licenses (licenses_data_dir = licenses_data_dir )
217
+
218
+ if additional_directories :
219
+ # if we have additional directories, extract the rules from them
220
+ additional_rule_dirs = get_rule_dirs (additional_dirs = additional_directories )
221
+ # then combine the rules in these additional directories with the rules in the original rules directory
222
+ combined_rule_directories = [rules_data_dir ] + additional_rule_dirs
223
+ rules = get_rules_from_multiple_dirs (licenses_db = licenses_db , rule_directories = combined_rule_directories )
224
+ else :
225
+ rules = get_rules (licenses_db = licenses_db , rules_data_dir = rules_data_dir )
167
226
168
227
legalese = common_license_words
169
228
spdx_tokens = set (get_all_spdx_key_tokens (licenses_db ))
@@ -299,20 +358,20 @@ def build_unknown_spdx_symbol(licenses_db=None):
299
358
return LicenseSymbolLike (licenses_db ['unknown-spdx' ])
300
359
301
360
302
- def get_cache (force = False , index_all_languages = False ):
361
+ def get_cache (force = False , index_all_languages = False , additional_directories = None ):
303
362
"""
304
363
Return a LicenseCache either rebuilt, cached or loaded from disk.
305
364
306
365
If ``index_all_languages`` is True, include texts in all languages when
307
366
building the license index. Otherwise, only include the English license \
308
367
texts and rules (the default)
309
368
"""
310
- populate_cache (force = force , index_all_languages = index_all_languages )
369
+ populate_cache (force = force , index_all_languages = index_all_languages , additional_directories = additional_directories )
311
370
global _LICENSE_CACHE
312
371
return _LICENSE_CACHE
313
372
314
373
315
- def populate_cache (force = False , index_all_languages = False ):
374
+ def populate_cache (force = False , index_all_languages = False , additional_directories = None ):
316
375
"""
317
376
Load or build and cache a LicenseCache. Return None.
318
377
"""
@@ -325,6 +384,7 @@ def populate_cache(force=False, index_all_languages=False):
325
384
index_all_languages = index_all_languages ,
326
385
# used for testing only
327
386
timeout = LICENSE_INDEX_LOCK_TIMEOUT ,
387
+ additional_directories = additional_directories ,
328
388
)
329
389
330
390
@@ -346,11 +406,15 @@ def load_cache_file(cache_file):
346
406
raise Exception (msg ) from e
347
407
348
408
349
- def get_index (force = False , index_all_languages = False ):
409
+ def get_index (force = False , index_all_languages = False , additional_directories = None ):
350
410
"""
351
411
Return and eventually build and cache a LicenseIndex.
352
412
"""
353
- return get_cache (force = force , index_all_languages = index_all_languages ).index
413
+ return get_cache (
414
+ force = force ,
415
+ index_all_languages = index_all_languages ,
416
+ additional_directories = additional_directories
417
+ ).index
354
418
355
419
356
420
get_cached_index = get_index
0 commit comments