Skip to content

Commit d198c34

Browse files
committed
feat(fossid-webapp): Map FossID snippets to the ScanSummary
When FossId identifies a file matching snippets, it is a pending file. An operator needs to log to FossID UI and use the license of a snippet or manually enter difference license information. Then the file is marked as "identified". Currently, the FossID scanner in ORT returns the list of all pending files in `ScanSummary` issues, with a severity of `HINT`. This commit maps the snippets of pending files using the newly-created snippet data model. The pending files are still listed as issues: This will be removed in a future commit as it is a breaking change. Signed-off-by: Nicolas Nobelis <[email protected]>
1 parent ee0014e commit d198c34

File tree

3 files changed

+217
-5
lines changed

3 files changed

+217
-5
lines changed

scanner/src/main/kotlin/scanners/fossid/FossId.kt

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ import kotlin.time.Duration.Companion.minutes
2727
import kotlin.time.Duration.Companion.seconds
2828
import kotlin.time.measureTimedValue
2929

30+
import kotlinx.coroutines.Dispatchers
31+
import kotlinx.coroutines.async
32+
import kotlinx.coroutines.awaitAll
3033
import kotlinx.coroutines.delay
3134
import kotlinx.coroutines.runBlocking
3235
import kotlinx.coroutines.withTimeoutOrNull
@@ -48,6 +51,7 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles
4851
import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles
4952
import org.ossreviewtoolkit.clients.fossid.listPendingFiles
5053
import org.ossreviewtoolkit.clients.fossid.listScansForProject
54+
import org.ossreviewtoolkit.clients.fossid.listSnippets
5155
import org.ossreviewtoolkit.clients.fossid.model.Project
5256
import org.ossreviewtoolkit.clients.fossid.model.Scan
5357
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope
@@ -56,28 +60,40 @@ import org.ossreviewtoolkit.clients.fossid.model.status.DownloadStatus
5660
import org.ossreviewtoolkit.clients.fossid.model.status.ScanStatus
5761
import org.ossreviewtoolkit.clients.fossid.runScan
5862
import org.ossreviewtoolkit.downloader.VersionControlSystem
63+
import org.ossreviewtoolkit.model.ArtifactProvenance
64+
import org.ossreviewtoolkit.model.Hash
5965
import org.ossreviewtoolkit.model.Issue
66+
import org.ossreviewtoolkit.model.LicenseFinding
6067
import org.ossreviewtoolkit.model.Package
68+
import org.ossreviewtoolkit.model.PackageProvider
6169
import org.ossreviewtoolkit.model.Provenance
70+
import org.ossreviewtoolkit.model.RemoteArtifact
6271
import org.ossreviewtoolkit.model.RepositoryProvenance
6372
import org.ossreviewtoolkit.model.ScanResult
6473
import org.ossreviewtoolkit.model.ScanSummary
6574
import org.ossreviewtoolkit.model.ScannerDetails
6675
import org.ossreviewtoolkit.model.Severity
76+
import org.ossreviewtoolkit.model.TextLocation
6777
import org.ossreviewtoolkit.model.UnknownProvenance
6878
import org.ossreviewtoolkit.model.VcsType
6979
import org.ossreviewtoolkit.model.config.DownloaderConfiguration
7080
import org.ossreviewtoolkit.model.config.Options
7181
import org.ossreviewtoolkit.model.config.ScannerConfiguration
7282
import org.ossreviewtoolkit.model.createAndLogIssue
83+
import org.ossreviewtoolkit.model.utils.PurlType
84+
import org.ossreviewtoolkit.model.utils.Snippet
85+
import org.ossreviewtoolkit.model.utils.SnippetFinding
7386
import org.ossreviewtoolkit.scanner.AbstractScannerWrapperFactory
7487
import org.ossreviewtoolkit.scanner.PackageScannerWrapper
7588
import org.ossreviewtoolkit.scanner.ProvenanceScannerWrapper
7689
import org.ossreviewtoolkit.scanner.ScanContext
7790
import org.ossreviewtoolkit.scanner.ScannerCriteria
91+
import org.ossreviewtoolkit.utils.common.collectMessages
7892
import org.ossreviewtoolkit.utils.common.enumSetOf
7993
import org.ossreviewtoolkit.utils.common.replaceCredentialsInUri
8094
import org.ossreviewtoolkit.utils.ort.showStackTrace
95+
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
96+
import org.ossreviewtoolkit.utils.spdx.toSpdx
8197

8298
/**
8399
* A wrapper for [FossID](https://fossid.com/).
@@ -746,7 +762,23 @@ class FossId internal constructor(
746762
"${pendingFiles.size} pending files have been returned for scan '$scanCode'."
747763
}
748764

749-
return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles)
765+
val snippets = runBlocking(Dispatchers.IO) {
766+
pendingFiles.map {
767+
async {
768+
logger.info { "Listing snippet for $it..." }
769+
val snippetResponse = service.listSnippets(config.user, config.apiKey, scanCode, it)
770+
.checkResponse("list snippets")
771+
val snippets = checkNotNull(snippetResponse.data) {
772+
"Snippet could not be listed. Response was ${snippetResponse.message}."
773+
}
774+
logger.info { "${snippets.size} snippets." }
775+
776+
it to snippets.toSet()
777+
}
778+
}.awaitAll().toMap()
779+
}
780+
781+
return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles, snippets)
750782
}
751783

752784
/**
@@ -760,10 +792,61 @@ class FossId internal constructor(
760792
scanId: String
761793
): ScanResult {
762794
// TODO: Maybe get issues from FossID (see has_failed_scan_files, get_failed_files and maybe get_scan_log).
795+
796+
// TODO: Deprecation: Remove the pending files in issues. This is a breaking change.
763797
val issues = rawResults.listPendingFiles.mapTo(mutableListOf()) {
764798
Issue(source = name, message = "Pending identification for '$it'.", severity = Severity.HINT)
765799
}
766800

801+
val snippetFindings = mutableSetOf<SnippetFinding>()
802+
val fakeLocation = TextLocation(".", TextLocation.UNKNOWN_LINE)
803+
snippetFindings += rawResults.listSnippets.flatMap { (file, rawSnippets) ->
804+
val snippets = rawSnippets.map {
805+
val license = it.artifactLicense?.let {
806+
runCatching {
807+
LicenseFinding.createAndMap(
808+
it,
809+
fakeLocation,
810+
detectedLicenseMapping = scannerConfig.detectedLicenseMapping
811+
).license
812+
}.onFailure { spdxException ->
813+
issues += FossId.createAndLogIssue(
814+
source = "FossId",
815+
message = "Failed to parse license '$it' as an SPDX expression:" +
816+
" ${spdxException.collectMessages()}"
817+
)
818+
}.getOrNull()
819+
} ?: SpdxConstants.NOASSERTION.toSpdx()
820+
821+
// FossID does not return the hash of the remote artifact. Instead, it returns the MD5 hash of the
822+
// matched file in the remote artifact as part of the "match_file_id" property.
823+
val snippetProvenance = it.url?.let { url ->
824+
ArtifactProvenance(RemoteArtifact(url, Hash.NONE))
825+
} ?: UnknownProvenance
826+
val purlType = it.url?.let { url -> urlToPackageType(url, issues)?.toString() } ?: "generic"
827+
828+
// TODO: FossID doesn't return the line numbers of the match, only the character range. One must use
829+
// another call "getMatchedLine" to retrieve the matched line numbers. Unfortunately, this is a
830+
// call per snippet which is too expensive. When it is available for a batch of snippets, it can
831+
// be used here.
832+
Snippet(
833+
it.score.toFloat(),
834+
TextLocation(it.file, TextLocation.UNKNOWN_LINE),
835+
snippetProvenance,
836+
"pkg:$purlType/${it.author}/${it.artifact}@${it.version}",
837+
license
838+
)
839+
}
840+
841+
val sourceLocation = TextLocation(file, TextLocation.UNKNOWN_LINE)
842+
snippets.map {
843+
SnippetFinding(
844+
sourceLocation,
845+
it
846+
)
847+
}
848+
}
849+
767850
val ignoredFiles = rawResults.listIgnoredFiles.associateBy { it.path }
768851

769852
val (licenseFindings, copyrightFindings) = rawResults.markedAsIdentifiedFiles.ifEmpty {
@@ -776,6 +859,7 @@ class FossId internal constructor(
776859
packageVerificationCode = "",
777860
licenseFindings = licenseFindings.toSortedSet(),
778861
copyrightFindings = copyrightFindings.toSortedSet(),
862+
snippetFindings = snippetFindings,
779863
issues = issues
780864
)
781865

@@ -786,4 +870,32 @@ class FossId internal constructor(
786870
mapOf(SCAN_CODE_KEY to scanCode, SCAN_ID_KEY to scanId, SERVER_URL_KEY to config.serverUrl)
787871
)
788872
}
873+
874+
/**
875+
* Return the [PurlType] as determined from the given [url], or null if there is no match, in which case an issue
876+
* will be added to [issues].
877+
*/
878+
private fun urlToPackageType(url: String, issues: MutableList<Issue>): PurlType? =
879+
when (val provider = PackageProvider.get(url)) {
880+
PackageProvider.COCOAPODS -> PurlType.COCOAPODS
881+
PackageProvider.CRATES_IO -> PurlType.CARGO
882+
PackageProvider.DEBIAN -> PurlType.DEBIAN
883+
PackageProvider.GITHUB -> PurlType.GITHUB
884+
PackageProvider.GITLAB -> PurlType.GITLAB
885+
PackageProvider.GOLANG -> PurlType.GOLANG
886+
PackageProvider.MAVEN_CENTRAL, PackageProvider.MAVEN_GOOGLE -> PurlType.MAVEN
887+
PackageProvider.NPM_JS -> PurlType.NPM
888+
PackageProvider.NUGET -> PurlType.NUGET
889+
PackageProvider.PACKAGIST -> PurlType.COMPOSER
890+
PackageProvider.PYPI -> PurlType.PYPI
891+
PackageProvider.RUBYGEMS -> PurlType.GEM
892+
893+
else -> {
894+
issues += FossId.createAndLogIssue(
895+
source = "FossId",
896+
message = "Cannot determine PURL type for url '$url' and provider '$provider'."
897+
)
898+
null
899+
}
900+
}
789901
}

scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ package org.ossreviewtoolkit.scanner.scanners.fossid
2222
import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile
2323
import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile
2424
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile
25+
import org.ossreviewtoolkit.clients.fossid.model.result.Snippet
2526
import org.ossreviewtoolkit.clients.fossid.model.summary.Summarizable
2627
import org.ossreviewtoolkit.model.CopyrightFinding
2728
import org.ossreviewtoolkit.model.Issue
@@ -37,7 +38,8 @@ internal data class RawResults(
3738
val identifiedFiles: List<IdentifiedFile>,
3839
val markedAsIdentifiedFiles: List<MarkedAsIdentifiedFile>,
3940
val listIgnoredFiles: List<IgnoredFile>,
40-
val listPendingFiles: List<String>
41+
val listPendingFiles: List<String>,
42+
val listSnippets: Map<String, Set<Snippet>>
4143
)
4244

4345
/**

scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,16 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles
6262
import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles
6363
import org.ossreviewtoolkit.clients.fossid.listPendingFiles
6464
import org.ossreviewtoolkit.clients.fossid.listScansForProject
65+
import org.ossreviewtoolkit.clients.fossid.listSnippets
6566
import org.ossreviewtoolkit.clients.fossid.model.Scan
6667
import org.ossreviewtoolkit.clients.fossid.model.identification.common.LicenseMatchType
6768
import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile
6869
import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile
6970
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.License
7071
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.LicenseFile
7172
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile
73+
import org.ossreviewtoolkit.clients.fossid.model.result.MatchType
74+
import org.ossreviewtoolkit.clients.fossid.model.result.Snippet
7275
import org.ossreviewtoolkit.clients.fossid.model.rules.IgnoreRule
7376
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope
7477
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleType
@@ -78,23 +81,29 @@ import org.ossreviewtoolkit.clients.fossid.model.status.UnversionedScanDescripti
7881
import org.ossreviewtoolkit.clients.fossid.runScan
7982
import org.ossreviewtoolkit.downloader.VersionControlSystem
8083
import org.ossreviewtoolkit.downloader.vcs.Git
84+
import org.ossreviewtoolkit.model.ArtifactProvenance
8185
import org.ossreviewtoolkit.model.CopyrightFinding
86+
import org.ossreviewtoolkit.model.Hash
8287
import org.ossreviewtoolkit.model.Identifier
8388
import org.ossreviewtoolkit.model.Issue
8489
import org.ossreviewtoolkit.model.LicenseFinding
8590
import org.ossreviewtoolkit.model.Package
8691
import org.ossreviewtoolkit.model.PackageType
92+
import org.ossreviewtoolkit.model.RemoteArtifact
8793
import org.ossreviewtoolkit.model.ScanResult
8894
import org.ossreviewtoolkit.model.Severity
8995
import org.ossreviewtoolkit.model.TextLocation
9096
import org.ossreviewtoolkit.model.VcsInfo
9197
import org.ossreviewtoolkit.model.VcsType
9298
import org.ossreviewtoolkit.model.config.ScannerConfiguration
99+
import org.ossreviewtoolkit.model.utils.Snippet as OrtSnippet
100+
import org.ossreviewtoolkit.model.utils.SnippetFinding
93101
import org.ossreviewtoolkit.scanner.ScanContext
94102
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_CODE_KEY
95103
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_ID_KEY
96104
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SERVER_URL_KEY
97105
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.convertGitUrlToProjectName
106+
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
98107

99108
@Suppress("LargeClass")
100109
class FossIdTest : WordSpec({
@@ -314,6 +323,7 @@ class FossIdTest : WordSpec({
314323
summary.licenseFindings shouldContainExactlyInAnyOrder expectedLicenseFindings
315324
}
316325

326+
// TODO: Deprecation: Remove the pending files in issues. This is a breaking change.
317327
"report pending files as issues" {
318328
val projectCode = projectCode(PROJECT)
319329
val scanCode = scanCode(PROJECT, null)
@@ -328,19 +338,57 @@ class FossIdTest : WordSpec({
328338
.expectCheckScanStatus(scanCode, ScanStatus.FINISHED)
329339
.expectCreateScan(projectCode, scanCode, vcsInfo, "")
330340
.expectDownload(scanCode)
331-
.mockFiles(scanCode, pendingRange = 4..5)
341+
.mockFiles(scanCode, pendingRange = 4..5, snippetRange = 1..5)
332342

333343
val fossId = createFossId(config)
334344

335345
val summary = fossId.scan(createPackage(pkgId, vcsInfo)).summary
336346

337-
val expectedIssues = listOf(createPendingFile(4), createPendingFile(5)).map {
347+
val pendingFilesIssues = listOf(createPendingFile(4), createPendingFile(5)).map {
338348
Issue(Instant.EPOCH, "FossId", "Pending identification for '$it'.", Severity.HINT)
339349
}
350+
val urlMappingIssues = (1..5).map {
351+
Issue(
352+
Instant.EPOCH,
353+
"FossId",
354+
"Cannot determine PURL type for url 'url$it' and provider 'null'.",
355+
Severity.ERROR
356+
)
357+
}
358+
// Add the mapping issues from the snippet fake URLs: 5 issues for each pending file.
359+
val expectedIssues = pendingFilesIssues + urlMappingIssues + urlMappingIssues
340360

341361
summary.issues.map { it.copy(timestamp = Instant.EPOCH) } shouldBe expectedIssues
342362
}
343363

364+
"report pending files as snippets" {
365+
val projectCode = projectCode(PROJECT)
366+
val scanCode = scanCode(PROJECT, null)
367+
val config = createConfig(deltaScans = false)
368+
val vcsInfo = createVcsInfo()
369+
val scan = createScan(vcsInfo.url, "${vcsInfo.revision}_other", scanCode)
370+
val pkgId = createIdentifier(index = 42)
371+
372+
FossIdRestService.create(config.serverUrl)
373+
.expectProjectRequest(projectCode)
374+
.expectListScans(projectCode, listOf(scan))
375+
.expectCheckScanStatus(scanCode, ScanStatus.FINISHED)
376+
.expectCreateScan(projectCode, scanCode, vcsInfo, "")
377+
.expectDownload(scanCode)
378+
.mockFiles(scanCode, pendingRange = 1..5, snippetRange = 1..5)
379+
380+
val fossId = createFossId(config)
381+
382+
val summary = fossId.scan(createPackage(pkgId, vcsInfo)).summary
383+
384+
val expectedPendingFile = (1..5).map(::createPendingFile).toSet()
385+
val expectedSnippetFindings = (1..5).map(::createSnippetFindings).flatten()
386+
387+
summary.snippetFindings shouldHaveSize expectedPendingFile.size * 5
388+
summary.snippetFindings.map { it.sourceLocation.path }.toSet() shouldBe expectedPendingFile
389+
summary.snippetFindings shouldBe expectedSnippetFindings
390+
}
391+
344392
"create a new project if none exists yet" {
345393
val projectCode = projectCode(PROJECT)
346394
val scanCode = scanCode(PROJECT, null)
@@ -1238,6 +1286,52 @@ private fun createIgnoredFile(index: Int): IgnoredFile =
12381286
*/
12391287
private fun createPendingFile(index: Int): String = "/pending/file/$index"
12401288

1289+
/**
1290+
* Generate a FossID snippet based on the given [index].
1291+
*/
1292+
private fun createSnippet(index: Int): Snippet = Snippet(
1293+
index,
1294+
"created$index",
1295+
index,
1296+
index,
1297+
index,
1298+
MatchType.PARTIAL,
1299+
"reason$index",
1300+
"author$index",
1301+
"artifact$index",
1302+
"version$index",
1303+
"MIT",
1304+
"releaseDate$index",
1305+
"mirror$index",
1306+
"file$index",
1307+
"fileLicense$index",
1308+
"url$index",
1309+
"hits$index",
1310+
index,
1311+
"updated$index",
1312+
"cpe$index",
1313+
"$index",
1314+
"matchField$index",
1315+
"classification$index",
1316+
"highlighting$index"
1317+
)
1318+
1319+
/**
1320+
* Generate a ORT snippet finding based on the given [index].
1321+
*/
1322+
private fun createSnippetFindings(index: Int): Set<SnippetFinding> = (1..5).map { snippetIndex ->
1323+
SnippetFinding(
1324+
TextLocation("/pending/file/$index", TextLocation.UNKNOWN_LINE),
1325+
OrtSnippet(
1326+
snippetIndex.toFloat(),
1327+
TextLocation("file$snippetIndex", TextLocation.UNKNOWN_LINE),
1328+
ArtifactProvenance(RemoteArtifact("url$snippetIndex", Hash.NONE)),
1329+
"pkg:generic/author$snippetIndex/artifact$snippetIndex@version$snippetIndex",
1330+
SpdxExpression.Companion.parse("MIT")
1331+
)
1332+
)
1333+
}.toSet()
1334+
12411335
/**
12421336
* Prepare this service mock to answer a request for a project with the given [projectCode]. Return a response with
12431337
* the given [status] and [error].
@@ -1348,12 +1442,14 @@ private fun FossIdServiceWithVersion.mockFiles(
13481442
identifiedRange: IntRange = IntRange.EMPTY,
13491443
markedRange: IntRange = IntRange.EMPTY,
13501444
ignoredRange: IntRange = IntRange.EMPTY,
1351-
pendingRange: IntRange = IntRange.EMPTY
1445+
pendingRange: IntRange = IntRange.EMPTY,
1446+
snippetRange: IntRange = IntRange.EMPTY
13521447
): FossIdServiceWithVersion {
13531448
val identifiedFiles = identifiedRange.map(::createIdentifiedFile)
13541449
val markedFiles = markedRange.map(::createMarkedIdentifiedFile)
13551450
val ignoredFiles = ignoredRange.map(::createIgnoredFile)
13561451
val pendingFiles = pendingRange.map(::createPendingFile)
1452+
val snippets = snippetRange.map(::createSnippet)
13571453

13581454
coEvery { listIdentifiedFiles(USER, API_KEY, scanCode) } returns
13591455
PolymorphicResponseBody(
@@ -1367,6 +1463,8 @@ private fun FossIdServiceWithVersion.mockFiles(
13671463
PolymorphicResponseBody(status = 1, data = PolymorphicList(ignoredFiles))
13681464
coEvery { listPendingFiles(USER, API_KEY, scanCode) } returns
13691465
PolymorphicResponseBody(status = 1, data = PolymorphicList(pendingFiles))
1466+
coEvery { listSnippets(USER, API_KEY, scanCode, any()) } returns
1467+
PolymorphicResponseBody(status = 1, data = PolymorphicList(snippets))
13701468

13711469
return this
13721470
}

0 commit comments

Comments
 (0)