13
13
# GNU General Public License for more details.
14
14
#
15
15
# You should have received a copy of the GNU General Public License
16
- # along with this program; if not, write to the Free Software
17
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
17
#
19
18
# Authors:
20
19
# Nishchith Shetty <[email protected] >
30
29
get_unique_repository ,
31
30
get_files_at_time )
32
31
from .utils import fix_field_date
32
+ from ..elastic_mapping import Mapping as BaseMapping
33
33
34
34
from grimoirelab_toolkit .datetime import datetime_utcnow
35
35
from grimoire_elk .elastic import ElasticSearch
39
39
logger = logging .getLogger (__name__ )
40
40
41
41
42
+ class Mapping (BaseMapping ):
43
+
44
+ @staticmethod
45
+ def get_elastic_mappings (es_major ):
46
+ """Get Elasticsearch mapping.
47
+
48
+ Ensure data.message is string, since it can be very large
49
+
50
+ :param es_major: major version of Elasticsearch, as string
51
+ :returns: dictionary with a key, 'items', with the mapping
52
+ """
53
+
54
+ mapping = '''
55
+ {
56
+ "dynamic":true,
57
+ "properties": {
58
+ "id" : {
59
+ "type" : "keyword"
60
+ },
61
+ "interval_months" : {
62
+ "type" : "long"
63
+ },
64
+ "origin" : {
65
+ "type" : "keyword"
66
+ },
67
+ "study_creation_date" : {
68
+ "type" : "date"
69
+ },
70
+ "total_files": {
71
+ "type": "long"
72
+ },
73
+ "licensed_files": {
74
+ "type": "long"
75
+ },
76
+ "copyrighted_files": {
77
+ "type": "long"
78
+ }
79
+ }
80
+ }
81
+ '''
82
+
83
+ return {"items" : mapping }
84
+
85
+
42
86
class ColicEnrich (Enrich ):
43
87
44
88
def __init__ (self , db_sortinghat = None , db_projects_map = None , json_projects_map = None ,
@@ -167,8 +211,8 @@ def extract_modules(self, file_path):
167
211
return modules
168
212
169
213
@metadata
170
- def get_rich_item (self , file_analysis ):
171
- # TODO: requires adjustments regarding category of backend used
214
+ def __get_rich_scancode (self , file_analysis ):
215
+ # Scancode and Scancode-CLI Implementation
172
216
173
217
eitem = {}
174
218
eitem ["file_path" ] = file_analysis ["file_path" ]
@@ -192,14 +236,44 @@ def get_rich_item(self, file_analysis):
192
236
193
237
return eitem
194
238
239
+ @metadata
240
+ def __get_rich_nomossa (self , file_analysis ):
241
+ # NOMOS analyzer implementation
242
+
243
+ eitem = {}
244
+ eitem ["file_path" ] = file_analysis ["file_path" ]
245
+ eitem ["modules" ] = self .extract_modules (eitem ["file_path" ])
246
+ eitem ["licenses" ] = []
247
+ eitem ["license_name" ] = []
248
+ eitem ["has_license" ] = 0
249
+
250
+ if file_analysis ["licenses" ] != "No_license_found" :
251
+ eitem ["has_license" ] = 1
252
+ for _license in file_analysis ["licenses" ]:
253
+ eitem ["licenses" ].append (_license )
254
+ eitem ["license_name" ].append (_license )
255
+
256
+ # NOMOS doesn't provide copyright information.
257
+ eitem ["copyrights" ] = []
258
+ eitem ["has_copyright" ] = 0
259
+
260
+ return eitem
261
+
195
262
def get_rich_items (self , item ):
196
- # The real data
197
- entry = item ['data' ]
263
+ """
264
+ :category: code_license_scancode_cli(default)
265
+ """
198
266
267
+ if item ["category" ] == "code_license_nomos" :
268
+ get_rich_item = self .__get_rich_nomossa
269
+ else :
270
+ get_rich_item = self .__get_rich_scancode
271
+
272
+ entry = item ['data' ]
199
273
enriched_items = []
200
274
201
275
for file_analysis in entry ["analysis" ]:
202
- eitem = self . get_rich_item (file_analysis )
276
+ eitem = get_rich_item (file_analysis )
203
277
204
278
for f in self .RAW_FIELDS_COPY :
205
279
if f in item :
@@ -208,13 +282,14 @@ def get_rich_items(self, item):
208
282
eitem [f ] = None
209
283
210
284
# common attributes
211
- eitem ['commit_sha' ] = entry ['commit' ]
212
285
eitem ['author' ] = entry ['Author' ]
213
- eitem ['committer' ] = entry ['Commit' ]
214
- eitem ['commit' ] = entry ['commit' ]
215
- eitem ['message' ] = entry ['message' ]
216
286
eitem ['author_date' ] = fix_field_date (entry ['AuthorDate' ])
287
+ eitem ["category" ] = item ["category" ]
288
+ eitem ['commit' ] = entry ['commit' ]
289
+ eitem ['committer' ] = entry ['Commit' ]
217
290
eitem ['commit_date' ] = fix_field_date (entry ['CommitDate' ])
291
+ eitem ['commit_sha' ] = entry ['commit' ]
292
+ eitem ['message' ] = entry ['message' ]
218
293
219
294
if self .prjs_map :
220
295
eitem .update (self .get_item_project (eitem ))
@@ -280,7 +355,7 @@ def enrich_colic_analysis(self, ocean_backend, enrich_backend, no_incremental=Fa
280
355
ins_items = 0
281
356
282
357
for repository_url in repositories :
283
- es_out = ElasticSearch (enrich_backend .elastic .url , out_index )
358
+ es_out = ElasticSearch (enrich_backend .elastic .url , out_index , mappings = Mapping )
284
359
evolution_items = []
285
360
286
361
for interval in interval_months :
0 commit comments