@@ -139,8 +139,8 @@ def __add_derived_metrics(self, file_analysis, eitem):
139
139
140
140
# TODO: Fix Logic: None rather than 1
141
141
if None not in [eitem ["loc" ], eitem ["comments" ], eitem ["num_funs" ]]:
142
- eitem ["loc_per_comment_lines " ] = eitem ["loc " ] / max (eitem ["comments " ], 1 )
143
- eitem ["loc_per_blank_lines " ] = eitem ["loc " ] / max (eitem ["blanks " ], 1 )
142
+ eitem ["comments_per_loc " ] = eitem ["comments " ] / max (eitem ["loc " ], 1 )
143
+ eitem ["blanks_per_loc " ] = eitem ["blanks " ] / max (eitem ["loc " ], 1 )
144
144
eitem ["loc_per_function" ] = eitem ["loc" ] / max (eitem ["num_funs" ], 1 )
145
145
else :
146
146
eitem ["loc_per_comment_lines" ] = eitem ["loc_per_blank_lines" ] = eitem ["loc_per_function" ] = None
@@ -176,7 +176,7 @@ def enrich_items(self, ocean_backend, events=False):
176
176
return num_items
177
177
178
178
def enrich_repo_analysis (self , ocean_backend , enrich_backend , no_incremental = False ,
179
- out_index = "cocom_enrich_graal_repo" , interval_months = 3 ,
179
+ out_index = "cocom_enrich_graal_repo" , interval_months = [ 3 ] ,
180
180
date_field = "grimoire_creation_date" ):
181
181
182
182
logger .info ("Doing enrich_repository_analysis study for index {}"
@@ -185,71 +185,75 @@ def enrich_repo_analysis(self, ocean_backend, enrich_backend, no_incremental=Fal
185
185
es_in = ES ([enrich_backend .elastic_url ], retry_on_timeout = True , timeout = 100 ,
186
186
verify_certs = self .elastic .requests .verify , connection_class = RequestsHttpConnection )
187
187
in_index = enrich_backend .elastic .index
188
+ interval_months = list (map (int , interval_months ))
188
189
189
190
unique_repos = es_in .search (
190
191
index = in_index ,
191
192
body = get_unique_repository ())
192
193
193
194
repositories = [repo ['key' ] for repo in unique_repos ['aggregations' ]['unique_repos' ].get ('buckets' , [])]
195
+ current_month = datetime_utcnow ().replace (day = 1 , hour = 0 , minute = 0 , second = 0 )
194
196
num_items = 0
195
197
ins_items = 0
196
198
197
199
for repository_url in repositories :
198
200
es_out = ElasticSearch (enrich_backend .elastic .url , out_index )
199
201
evolution_items = []
200
202
201
- to_month = get_to_date (es_in , in_index , out_index , repository_url )
202
- to_month = to_month .replace (day = 1 , hour = 0 , minute = 0 , second = 0 )
203
- current_month = datetime_utcnow ().replace (day = 1 , hour = 0 , minute = 0 , second = 0 )
204
-
205
- while to_month < current_month :
206
- files_at_time = es_in .search (
207
- index = in_index ,
208
- body = get_files_at_time (repository_url , to_month .isoformat ())
209
- )['aggregations' ]['file_stats' ].get ("buckets" , [])
210
-
211
- if not len (files_at_time ):
212
- to_month = to_month + relativedelta (months = + interval_months )
213
- continue
214
-
215
- repository_name = repository_url .split ("/" )[- 1 ]
216
- evolution_item = {
217
- "id" : "{}_{}_{}" .format (to_month .isoformat (), repository_name , interval_months ),
218
- "origin" : repository_url ,
219
- "interval_months" : interval_months ,
220
- "study_creation_date" : to_month .isoformat (),
221
- "total_files" : len (files_at_time )
222
- }
223
-
224
- for file_ in files_at_time :
225
- file_details = file_ ["1" ]["hits" ]["hits" ][0 ]["_source" ]
226
-
227
- for metric in self .metrics :
228
- total_metric = "total_" + metric
229
- evolution_item [total_metric ] = evolution_item .get (total_metric , 0 )
230
- evolution_item [total_metric ] += file_details [metric ] if file_details [metric ] is not None else 0
231
-
232
- # TODO: Fix Logic: None rather than 1
233
- evolution_item ["total_loc_per_comment_lines" ] = evolution_item ["total_loc" ] / \
234
- max (evolution_item ["total_comments" ], 1 )
235
- evolution_item ["total_loc_per_blank_lines" ] = evolution_item ["total_loc" ] / max (evolution_item ["total_blanks" ], 1 )
236
- evolution_item ["total_loc_per_function" ] = evolution_item ["total_loc" ] / max (evolution_item ["total_num_funs" ], 1 )
237
-
238
- evolution_items .append (evolution_item )
239
-
240
- if len (evolution_items ) >= self .elastic .max_items_bulk :
241
- num_items += len (evolution_items )
242
- ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
243
- evolution_items = []
203
+ for interval in interval_months :
244
204
245
- to_month = to_month + relativedelta (months = + interval_months )
205
+ to_month = get_to_date (es_in , in_index , out_index , repository_url , interval )
206
+ to_month = to_month .replace (month = int (interval ), day = 1 , hour = 0 , minute = 0 , second = 0 )
246
207
247
- if len (evolution_items ) > 0 :
248
- num_items += len (evolution_items )
249
- ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
208
+ while to_month < current_month :
209
+ files_at_time = es_in .search (
210
+ index = in_index ,
211
+ body = get_files_at_time (repository_url , to_month .isoformat ())
212
+ )['aggregations' ]['file_stats' ].get ("buckets" , [])
250
213
251
- if num_items != ins_items :
252
- missing = num_items - ins_items
253
- logger .error ("%s/%s missing items for Graal CoCom Analysis Study" , str (missing ), str (num_items ))
254
- else :
255
- logger .info ("%s items inserted for Graal CoCom Analysis Study" , str (num_items ))
214
+ if not len (files_at_time ):
215
+ to_month = to_month + relativedelta (months = + interval )
216
+ continue
217
+
218
+ repository_name = repository_url .split ("/" )[- 1 ]
219
+ evolution_item = {
220
+ "id" : "{}_{}_{}" .format (to_month .isoformat (), repository_name , interval ),
221
+ "origin" : repository_url ,
222
+ "interval_months" : interval ,
223
+ "study_creation_date" : to_month .isoformat (),
224
+ "total_files" : len (files_at_time )
225
+ }
226
+
227
+ for file_ in files_at_time :
228
+ file_details = file_ ["1" ]["hits" ]["hits" ][0 ]["_source" ]
229
+
230
+ for metric in self .metrics :
231
+ total_metric = "total_" + metric
232
+ evolution_item [total_metric ] = evolution_item .get (total_metric , 0 )
233
+ evolution_item [total_metric ] += file_details [metric ] if file_details [metric ] is not None else 0
234
+
235
+ # TODO: Fix Logic: None rather than 1
236
+ evolution_item ["total_comments_per_loc" ] = evolution_item ["total_comments" ] / \
237
+ max (evolution_item ["total_loc" ], 1 )
238
+ evolution_item ["total_blanks_per_loc" ] = evolution_item ["total_blanks" ] / max (evolution_item ["total_loc" ], 1 )
239
+ evolution_item ["total_loc_per_function" ] = evolution_item ["total_loc" ] / \
240
+ max (evolution_item ["total_num_funs" ], 1 )
241
+
242
+ evolution_items .append (evolution_item )
243
+
244
+ if len (evolution_items ) >= self .elastic .max_items_bulk :
245
+ num_items += len (evolution_items )
246
+ ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
247
+ evolution_items = []
248
+
249
+ to_month = to_month + relativedelta (months = + interval )
250
+
251
+ if len (evolution_items ) > 0 :
252
+ num_items += len (evolution_items )
253
+ ins_items += es_out .bulk_upload (evolution_items , self .get_field_unique_id ())
254
+
255
+ if num_items != ins_items :
256
+ missing = num_items - ins_items
257
+ logger .error ("%s/%s missing items for Graal CoCom Analysis Study" , str (missing ), str (num_items ))
258
+ else :
259
+ logger .info ("%s items inserted for Graal CoCom Analysis Study" , str (num_items ))
0 commit comments