Skip to content

Commit b9426a5

Browse files
committed
[colic] Add support of scancode_cli and it's corresponding tests to colic backend
A faster version of scancode is now added to colic backend scancode_cli usage: colic https://github.com/chaoss/grimoirelab-toolkit --git-path /tmp/scancode_cli --exec-path /home/scancode-toolkit/etc/scripts/scancli.py --category code_license_scancode_cli Added tests for scancode_cli analyzer which also required updating tests for in-place implementation of colic backend. Signed-off-by: inishchith <[email protected]>
1 parent 1915529 commit b9426a5

File tree

5 files changed

+199
-29
lines changed

5 files changed

+199
-29
lines changed

graal/backends/core/analyzers/scancode.py

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,38 +22,44 @@
2222

2323
import json
2424
import subprocess
25-
2625
from graal.graal import (GraalError,
2726
GraalRepository)
2827
from .analyzer import Analyzer
2928

3029

30+
SCANCODE_CLI_EXEC = "etc/scripts/scancli.py"
31+
CONFIGURE_EXEC = 'configure'
32+
33+
3134
class ScanCode(Analyzer):
3235
"""A wrapper for nexB/scancode-toolkit.
3336
3437
This class allows to call scancode-toolkit over a file, parses
3538
the result of the analysis and returns it as a dict.
3639
3740
:param exec_path: path of the scancode executable
41+
:param cli: True, if scancode_cli is used
3842
"""
39-
version = '0.1.0'
43+
version = '0.2.0'
4044

41-
def __init__(self, exec_path):
45+
def __init__(self, exec_path, cli=False):
4246
if not GraalRepository.exists(exec_path):
4347
raise GraalError(cause="executable path %s not valid" % exec_path)
4448

4549
self.exec_path = exec_path
50+
self.cli = cli
4651

47-
def analyze(self, **kwargs):
48-
"""Add information about license
52+
if self.cli:
53+
exec_path = self.exec_path.replace(SCANCODE_CLI_EXEC, CONFIGURE_EXEC)
54+
_ = subprocess.check_output([exec_path]).decode("utf-8")
4955

50-
:param file_path: file path
56+
def __analyze_scancode(self, file_path):
57+
"""Add information about license using scancode
5158
52-
:returns result: dict of the results of the analysis
59+
:param file_path: file path (in case of scancode)
60+
:param file_paths: file paths (in case of scancode_cli for concurrent execution on files)
5361
"""
5462
result = {'licenses': []}
55-
file_path = kwargs['file_path']
56-
5763
try:
5864
msg = subprocess.check_output([self.exec_path, '--json-pp', '-', '--license', file_path]).decode("utf-8")
5965
except subprocess.CalledProcessError as e:
@@ -62,9 +68,61 @@ def analyze(self, **kwargs):
6268
subprocess._cleanup()
6369

6470
licenses_raw = json.loads(msg)
65-
if 'files' not in licenses_raw:
66-
return result
71+
if 'files' in licenses_raw:
72+
result['licenses'] = licenses_raw['files'][0]['licenses']
6773

68-
result['licenses'] = licenses_raw['files'][0]['licenses']
74+
return result
75+
76+
def __analyze_scancode_cli(self, file_paths):
77+
"""Add information about license using scancode-cli
78+
79+
:param file_paths: file paths
80+
:returns result: dict of the results of the analysis
81+
"""
82+
result = {'files': []}
83+
84+
try:
85+
cmd_scancli = ['python3', self.exec_path]
86+
cmd_scancli.extend(file_paths)
87+
msg = subprocess.check_output(cmd_scancli).decode("utf-8")
88+
except subprocess.CalledProcessError as e:
89+
raise GraalError(cause="Scancode failed at %s, %s" % (file_paths,
90+
e.output.decode("utf-8")))
91+
finally:
92+
subprocess._cleanup()
93+
94+
output_content = ''
95+
outputs_json = []
96+
for line in msg.split('\n'):
97+
if line == '':
98+
if output_content:
99+
output_json = json.loads(output_content)[1:]
100+
outputs_json.append(output_json)
101+
output_content = ''
102+
else:
103+
output_content += line
104+
105+
if output_content:
106+
output_json = json.loads(output_content)[1:]
107+
outputs_json.append(output_json)
108+
109+
for output_json in outputs_json:
110+
file_info = output_json[0]['files'][0]
111+
result['files'].append(file_info)
112+
113+
return result
114+
115+
def analyze(self, **kwargs):
116+
"""Add information about license
117+
118+
:param file_path: file path
119+
:param file_paths: file paths ( in case of scancode_cli for concurrent execution on files )
120+
121+
:returns result: dict of the results of the analysis
122+
"""
123+
if self.cli:
124+
result = self.__analyze_scancode_cli(kwargs['file_paths'])
125+
else:
126+
result = self.__analyze_scancode(kwargs['file_path'])
69127

70128
return result

graal/backends/core/colic.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@
3333

3434
NOMOS = 'nomos'
3535
SCANCODE = 'scancode'
36+
SCANCODE_CLI = 'scancode_cli'
3637

3738
CATEGORY_COLIC_NOMOS = 'code_license_' + NOMOS
3839
CATEGORY_COLIC_SCANCODE = 'code_license_' + SCANCODE
40+
CATEGORY_COLIC_SCANCODE_CLI = 'code_license_' + SCANCODE_CLI
3941

4042
logger = logging.getLogger(__name__)
4143

@@ -44,7 +46,7 @@ class CoLic(Graal):
4446
"""CoLic backend.
4547
4648
This class extends the Graal backend. It gathers license information
47-
using Nomos
49+
using Nomos, Scancode or Scancode-cli
4850
4951
:param uri: URI of the Git repository
5052
:param git_path: path to the repository or to the log file
@@ -59,9 +61,11 @@ class CoLic(Graal):
5961
:raises RepositoryError: raised when there was an error cloning or
6062
updating the repository.
6163
"""
62-
version = '0.4.0'
64+
version = '0.5.0'
6365

64-
CATEGORIES = [CATEGORY_COLIC_NOMOS, CATEGORY_COLIC_SCANCODE]
66+
CATEGORIES = [CATEGORY_COLIC_NOMOS,
67+
CATEGORY_COLIC_SCANCODE,
68+
CATEGORY_COLIC_SCANCODE_CLI]
6569

6670
def __init__(self, uri, git_path, exec_path, worktreepath=DEFAULT_WORKTREE_PATH,
6771
entrypoint=None, in_paths=None, out_paths=None,
@@ -84,6 +88,8 @@ def fetch(self, category=CATEGORY_COLIC_NOMOS, paths=None,
8488

8589
if category == CATEGORY_COLIC_SCANCODE:
8690
self.analyzer_kind = SCANCODE
91+
elif category == CATEGORY_COLIC_SCANCODE_CLI:
92+
self.analyzer_kind = SCANCODE_CLI
8793
elif category == CATEGORY_COLIC_NOMOS:
8894
self.analyzer_kind = NOMOS
8995
else:
@@ -101,13 +107,17 @@ def fetch(self, category=CATEGORY_COLIC_NOMOS, paths=None,
101107
def metadata_category(item):
102108
"""Extracts the category from a Code item.
103109
104-
This backend generates two types of item which can be:
105-
'code_license_nomos' or 'code_license_scancode'.
110+
This backend generates the following types of item:
111+
- 'code_license_nomos'
112+
- 'code_license_scancode'
113+
- 'code_license_scancode_cli'
106114
"""
107115
if item['analyzer'] == NOMOS:
108116
return CATEGORY_COLIC_NOMOS
109117
elif item['analyzer'] == SCANCODE:
110118
return CATEGORY_COLIC_SCANCODE
119+
elif item['analyzer'] == SCANCODE_CLI:
120+
return CATEGORY_COLIC_SCANCODE_CLI
111121
else:
112122
raise GraalError(cause="Unknown analyzer %s" % item['analyzer'])
113123

@@ -135,6 +145,7 @@ def _analyze(self, commit):
135145
:param commit: a Perceval commit item
136146
"""
137147
analysis = []
148+
files_to_process = []
138149

139150
for committed_file in commit['files']:
140151

@@ -148,9 +159,18 @@ def _analyze(self, commit):
148159
if not GraalRepository.exists(local_path):
149160
continue
150161

151-
license_info = self.analyzer.analyze(local_path)
152-
license_info.update({'file_path': file_path})
153-
analysis.append(license_info)
162+
if self.analyzer_kind == NOMOS or self.analyzer_kind == SCANCODE:
163+
license_info = self.analyzer.analyze(local_path)
164+
license_info.update({'file_path': file_path})
165+
analysis.append(license_info)
166+
elif self.analyzer_kind == SCANCODE_CLI:
167+
files_to_process.append((file_path, local_path))
168+
169+
if files_to_process:
170+
local_paths = [path[1] for path in files_to_process]
171+
analysis = self.analyzer.analyze(local_paths)
172+
for i in range(len(analysis['files'])):
173+
analysis['files'][i]['file_path'] = files_to_process[i][0]
154174

155175
return analysis
156176

@@ -170,17 +190,20 @@ class LicenseAnalyzer:
170190
"""Class to analyse the content of files
171191
172192
:param exec_path: path of the license analyzer executable
173-
:param kind: the analyzer kind (e.g., NOMOS, SCANCODE)
193+
:param kind: the analyzer kind (e.g., NOMOS, SCANCODE, SCANCODE_CLI)
174194
"""
175195

176196
def __init__(self, exec_path, kind=NOMOS):
197+
self.kind = kind
177198
if kind == SCANCODE:
178199
self.analyzer = ScanCode(exec_path)
200+
elif kind == SCANCODE_CLI:
201+
self.analyzer = ScanCode(exec_path, cli=True)
179202
else:
180203
self.analyzer = Nomos(exec_path)
181204

182205
def analyze(self, file_path):
183-
"""Analyze the content of a file using Nomos
206+
"""Analyze the content of a file using Nomos/Scancode
184207
185208
:param file_path: file path
186209
@@ -189,7 +212,11 @@ def analyze(self, file_path):
189212
'licenses': [..]
190213
}
191214
"""
192-
kwargs = {'file_path': file_path}
215+
if self.kind == SCANCODE_CLI:
216+
kwargs = {'file_paths': file_path}
217+
else:
218+
kwargs = {'file_path': file_path}
219+
193220
analysis = self.analyzer.analyze(**kwargs)
194221

195222
return analysis

tests/test_colic.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,18 @@
3333
from graal.backends.core.analyzers.scancode import ScanCode
3434
from graal.backends.core.colic import (CATEGORY_COLIC_NOMOS,
3535
CATEGORY_COLIC_SCANCODE,
36+
CATEGORY_COLIC_SCANCODE_CLI,
3637
NOMOS,
3738
SCANCODE,
39+
SCANCODE_CLI,
3840
CoLic,
3941
LicenseAnalyzer,
4042
CoLicCommand)
4143
from perceval.utils import DEFAULT_DATETIME
4244
from test_graal import TestCaseGraal
4345
from base_analyzer import (ANALYZER_TEST_FILE,
4446
TestCaseAnalyzer)
45-
from utils import NOMOS_PATH, SCANCODE_PATH
47+
from utils import NOMOS_PATH, SCANCODE_PATH, SCANCODE_CLI_PATH
4648

4749

4850
class TestCoLicBackend(TestCaseGraal):
@@ -122,7 +124,7 @@ def test_fetch_nomossa(self):
122124

123125
cl = CoLic('http://example.com', self.git_path, NOMOS_PATH, self.worktree_path,
124126
in_paths=['perceval/backends/core/github.py'])
125-
commits = [commit for commit in cl.fetch()]
127+
commits = [commit for commit in cl.fetch(category=CATEGORY_COLIC_NOMOS)]
126128

127129
self.assertEqual(len(commits), 1)
128130
self.assertFalse(os.path.exists(cl.worktreepath))
@@ -159,6 +161,36 @@ def test_fetch_scancode(self):
159161
self.assertFalse('parents' in commit['data'])
160162
self.assertFalse('refs' in commit['data'])
161163

164+
def test_fetch_scancode_cli(self):
165+
"""Test whether commits are properly processed"""
166+
167+
cl = CoLic('http://example.com', self.git_path, SCANCODE_CLI_PATH, self.worktree_path,
168+
in_paths=['perceval/backends/core/github.py'])
169+
commits = [commit for commit in cl.fetch(category=CATEGORY_COLIC_SCANCODE_CLI)]
170+
171+
self.assertEqual(len(commits), 1)
172+
self.assertFalse(os.path.exists(cl.worktreepath))
173+
174+
for commit in commits:
175+
self.assertEqual(commit['backend_name'], 'CoLic')
176+
self.assertEqual(commit['category'], CATEGORY_COLIC_SCANCODE_CLI)
177+
self.assertEqual(commit['data']['analysis']['files'][0]['file_path'],
178+
'perceval/backends/core/github.py')
179+
self.assertTrue('Author' in commit['data'])
180+
self.assertTrue('Commit' in commit['data'])
181+
self.assertFalse('files' in commit['data'])
182+
self.assertFalse('parents' in commit['data'])
183+
self.assertFalse('refs' in commit['data'])
184+
185+
def test_fetch_unknown(self):
186+
"""Test whether commits are properly processed"""
187+
188+
cl = CoLic('http://example.com', self.git_path, SCANCODE_CLI_PATH, self.worktree_path,
189+
in_paths=['perceval/backends/core/github.py'])
190+
191+
with self.assertRaises(GraalError):
192+
_ = cl.fetch(category="unknown")
193+
162194
def test_metadata_category(self):
163195
"""Test metadata_category"""
164196

@@ -186,6 +218,18 @@ def test_metadata_category(self):
186218
}
187219
self.assertEqual(CoLic.metadata_category(item), CATEGORY_COLIC_NOMOS)
188220

221+
item = {
222+
"Author": "Valerio Cosentino <[email protected]>",
223+
"AuthorDate": "Fri May 18 18:26:48 2018 +0200",
224+
"Commit": "Valerio Cosentino <[email protected]>",
225+
"CommitDate": "Fri May 18 18:26:48 2018 +0200",
226+
"analysis": [],
227+
"analyzer": "scancode_cli",
228+
"commit": "075f0c6161db5a3b1c8eca45e08b88469bb148b9",
229+
"message": "[perceval] first commit"
230+
}
231+
self.assertEqual(CoLic.metadata_category(item), CATEGORY_COLIC_SCANCODE_CLI)
232+
189233
item = {
190234
"Author": "Valerio Cosentino <[email protected]>",
191235
"AuthorDate": "Fri May 18 18:26:48 2018 +0200",
@@ -218,6 +262,10 @@ def test_init(self):
218262
self.assertIsInstance(license_analyzer, LicenseAnalyzer)
219263
self.assertIsInstance(license_analyzer.analyzer, ScanCode)
220264

265+
license_analyzer = LicenseAnalyzer(SCANCODE_CLI_PATH, SCANCODE_CLI)
266+
self.assertIsInstance(license_analyzer, LicenseAnalyzer)
267+
self.assertIsInstance(license_analyzer.analyzer, ScanCode)
268+
221269
with self.assertRaises(GraalError):
222270
_ = LicenseAnalyzer("/tmp/analyzer", SCANCODE)
223271

@@ -236,6 +284,12 @@ def test_analyze(self):
236284

237285
self.assertIn('licenses', analysis)
238286

287+
file_paths = [os.path.join(self.tmp_data_path, ANALYZER_TEST_FILE)]
288+
license_analyzer = LicenseAnalyzer(SCANCODE_CLI_PATH, kind=SCANCODE_CLI)
289+
analysis = license_analyzer.analyze(file_paths)
290+
291+
self.assertIn('licenses', analysis['files'][0])
292+
239293

240294
class TestCoLicCommand(unittest.TestCase):
241295
"""CoLicCommand tests"""

0 commit comments

Comments
 (0)