@@ -138,12 +138,14 @@ def __add_derived_metrics(self, file_analysis, eitem):
138
138
""" Add derived metrics fields """
139
139
140
140
# TODO: Fix Logic: None rather than 1
141
- if None not in [ eitem ["loc" ], eitem ["comments" ], eitem ["num_funs" ]] :
142
- eitem ["loc_per_comment_lines " ] = eitem ["loc " ] / max (eitem ["comments " ], 1 )
143
- eitem ["loc_per_blank_lines " ] = eitem ["loc " ] / max (eitem ["blanks " ], 1 )
144
- eitem ["loc_per_function" ] = eitem ["loc" ] / max (eitem ["num_funs" ], 1 )
141
+ if eitem ["loc" ] is not None and eitem ["comments" ] is not None and eitem ["num_funs" ] is not None :
142
+ eitem ["comments_per_loc " ] = round ( eitem ["comments " ] / max (eitem ["loc " ], 1 ), 2 )
143
+ eitem ["blanks_per_loc " ] = round ( eitem ["blanks " ] / max (eitem ["loc " ], 1 ), 2 )
144
+ eitem ["loc_per_function" ] = round ( eitem ["loc" ] / max (eitem ["num_funs" ], 1 ), 2 )
145
145
else :
146
- eitem ["loc_per_comment_lines" ] = eitem ["loc_per_blank_lines" ] = eitem ["loc_per_function" ] = None
146
+ eitem ["comments_per_loc" ] = None
147
+ eitem ["blanks_per_loc" ] = None
148
+ eitem ["loc_per_function" ] = None
147
149
148
150
return eitem
149
151
@@ -176,7 +178,7 @@ def enrich_items(self, ocean_backend, events=False):
176
178
return num_items
177
179
178
180
def enrich_repo_analysis (self , ocean_backend , enrich_backend , no_incremental = False ,
179
- out_index = "cocom_enrich_graal_repo" , interval_months = 3 ,
181
+ out_index = "cocom_enrich_graal_repo" , interval_months = [ 3 ] ,
180
182
date_field = "grimoire_creation_date" ):
181
183
182
184
logger .info ("Doing enrich_repository_analysis study for index {}"
@@ -185,71 +187,76 @@ def enrich_repo_analysis(self, ocean_backend, enrich_backend, no_incremental=Fal
185
187
es_in = ES ([enrich_backend .elastic_url ], retry_on_timeout = True , timeout = 100 ,
186
188
verify_certs = self .elastic .requests .verify , connection_class = RequestsHttpConnection )
187
189
in_index = enrich_backend .elastic .index
190
+ interval_months = list (map (int , interval_months ))
188
191
189
192
unique_repos = es_in .search (
190
193
index = in_index ,
191
194
body = get_unique_repository ())
192
195
193
196
repositories = [repo ['key' ] for repo in unique_repos ['aggregations' ]['unique_repos' ].get ('buckets' , [])]
197
+ current_month = datetime_utcnow ().replace (day = 1 , hour = 0 , minute = 0 , second = 0 )
194
198
num_items = 0
195
199
ins_items = 0
196
200
197
201
for repository_url in repositories :
198
202
es_out = ElasticSearch (enrich_backend .elastic .url , out_index )
199
203
evolution_items = []
200
204
201
- to_month = get_to_date (es_in , in_index , out_index , repository_url )
202
- to_month = to_month .replace (day = 1 , hour = 0 , minute = 0 , second = 0 )
203
- current_month = datetime_utcnow ().replace (day = 1 , hour = 0 , minute = 0 , second = 0 )
204
-
205
- while to_month < current_month :
206
- files_at_time = es_in .search (
207
- index = in_index ,
208
- body = get_files_at_time (repository_url , to_month .isoformat ())
209
- )['aggregations' ]['file_stats' ].get ("buckets" , [])
210
-
211
- if not len (files_at_time ):
212
- to_month = to_month + relativedelta (months = + interval_months )
213
- continue
214
-
215
- repository_name = repository_url .split ("/" )[- 1 ]
216
- evolution_item = {
217
- "id" : "{}_{}_{}" .format (to_month .isoformat (), repository_name , interval_months ),
218
- "origin" : repository_url ,
219
- "interval_months" : interval_months ,
220
- "study_creation_date" : to_month .isoformat (),
221
- "total_files" : len (files_at_time )
222
- }
223
-
224
- for file_ in files_at_time :
225
- file_details = file_ ["1" ]["hits" ]["hits" ][0 ]["_source" ]
226
-
227
- for metric in self .metrics :
228
- total_metric = "total_" + metric
229
- evolution_item [total_metric ] = evolution_item .get (total_metric , 0 )
230
- evolution_item [total_metric ] += file_details [metric ] if file_details [metric ] is not None else 0
231
-
232
- # TODO: Fix Logic: None rather than 1
233
- evolution_item ["total_loc_per_comment_lines" ] = evolution_item ["total_loc" ] / \
234
- max (evolution_item ["total_comments" ], 1 )
235
- evolution_item ["total_loc_per_blank_lines" ] = evolution_item ["total_loc" ] / max (evolution_item ["total_blanks" ], 1 )
236
- evolution_item ["total_loc_per_function" ] = evolution_item ["total_loc" ] / max (evolution_item ["total_num_funs" ], 1 )
237
-
238
- evolution_items .append (evolution_item )
239
-
240
- if len (evolution_items ) >= self .elastic .max_items_bulk :
241
- num_items += len (evolution_items )
242
- ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
243
- evolution_items = []
205
+ for interval in interval_months :
244
206
245
- to_month = to_month + relativedelta (months = + interval_months )
207
+ to_month = get_to_date (es_in , in_index , out_index , repository_url , interval )
208
+ to_month = to_month .replace (month = int (interval ), day = 1 , hour = 0 , minute = 0 , second = 0 )
246
209
247
- if len (evolution_items ) > 0 :
248
- num_items += len (evolution_items )
249
- ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
210
+ while to_month < current_month :
211
+ files_at_time = es_in .search (
212
+ index = in_index ,
213
+ body = get_files_at_time (repository_url , to_month .isoformat ())
214
+ )['aggregations' ]['file_stats' ].get ("buckets" , [])
250
215
251
- if num_items != ins_items :
252
- missing = num_items - ins_items
253
- logger .error ("%s/%s missing items for Graal CoCom Analysis Study" , str (missing ), str (num_items ))
254
- else :
255
- logger .info ("%s items inserted for Graal CoCom Analysis Study" , str (num_items ))
216
+ if not len (files_at_time ):
217
+ to_month = to_month + relativedelta (months = + interval )
218
+ continue
219
+
220
+ repository_name = repository_url .split ("/" )[- 1 ]
221
+ evolution_item = {
222
+ "id" : "{}_{}_{}" .format (to_month .isoformat (), repository_name , interval ),
223
+ "origin" : repository_url ,
224
+ "interval_months" : interval ,
225
+ "study_creation_date" : to_month .isoformat (),
226
+ "total_files" : len (files_at_time )
227
+ }
228
+
229
+ for file_ in files_at_time :
230
+ file_details = file_ ["1" ]["hits" ]["hits" ][0 ]["_source" ]
231
+
232
+ for metric in self .metrics :
233
+ total_metric = "total_" + metric
234
+ evolution_item [total_metric ] = evolution_item .get (total_metric , 0 )
235
+ evolution_item [total_metric ] += file_details [metric ] if file_details [metric ] is not None else 0
236
+
237
+ # TODO: Fix Logic: None rather than 1
238
+ evolution_item ["total_comments_per_loc" ] = round (
239
+ evolution_item ["total_comments" ] / max (evolution_item ["total_loc" ], 1 ), 2 )
240
+ evolution_item ["total_blanks_per_loc" ] = round (
241
+ evolution_item ["total_blanks" ] / max (evolution_item ["total_loc" ], 1 ), 2 )
242
+ evolution_item ["total_loc_per_function" ] = round (
243
+ evolution_item ["total_loc" ] / max (evolution_item ["total_num_funs" ], 1 ), 2 )
244
+
245
+ evolution_items .append (evolution_item )
246
+
247
+ if len (evolution_items ) >= self .elastic .max_items_bulk :
248
+ num_items += len (evolution_items )
249
+ ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
250
+ evolution_items = []
251
+
252
+ to_month = to_month + relativedelta (months = + interval )
253
+
254
+ if len (evolution_items ) > 0 :
255
+ num_items += len (evolution_items )
256
+ ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
257
+
258
+ if num_items != ins_items :
259
+ missing = num_items - ins_items
260
+ logger .error ("%s/%s missing items for Graal CoCom Analysis Study" , str (missing ), str (num_items ))
261
+ else :
262
+ logger .info ("%s items inserted for Graal CoCom Analysis Study" , str (num_items ))
0 commit comments