Skip to content

Commit 7932272

Browse files
committed
feat(fossid-webapp): Map FossID snippets to the ScanSummary
When FossId identifies a file matching snippets, it is a pending file. An operator needs to log to FossID UI and use the license of a snippet or manually enter difference license information. Then the file is marked as "identified". Currently, the FossID scanner in ORT returns the list of all pending files in `ScanSummary` issues, with a severity of `HINT`. This commit maps the snippets of pending files using the newly-created snippet data model. The pending files are still listed as issues: This will be removed in a future commit as it is a breaking change. Signed-off-by: Nicolas Nobelis <[email protected]>
1 parent 9eda92f commit 7932272

File tree

3 files changed

+207
-4
lines changed

3 files changed

+207
-4
lines changed

scanner/src/main/kotlin/scanners/fossid/FossId.kt

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ import kotlin.time.Duration.Companion.minutes
2727
import kotlin.time.Duration.Companion.seconds
2828
import kotlin.time.measureTimedValue
2929

30+
import kotlinx.coroutines.Dispatchers
31+
import kotlinx.coroutines.async
32+
import kotlinx.coroutines.awaitAll
3033
import kotlinx.coroutines.delay
3134
import kotlinx.coroutines.runBlocking
3235
import kotlinx.coroutines.withTimeoutOrNull
@@ -48,6 +51,7 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles
4851
import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles
4952
import org.ossreviewtoolkit.clients.fossid.listPendingFiles
5053
import org.ossreviewtoolkit.clients.fossid.listScansForProject
54+
import org.ossreviewtoolkit.clients.fossid.listSnippets
5155
import org.ossreviewtoolkit.clients.fossid.model.Project
5256
import org.ossreviewtoolkit.clients.fossid.model.Scan
5357
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope
@@ -56,28 +60,40 @@ import org.ossreviewtoolkit.clients.fossid.model.status.DownloadStatus
5660
import org.ossreviewtoolkit.clients.fossid.model.status.ScanStatus
5761
import org.ossreviewtoolkit.clients.fossid.runScan
5862
import org.ossreviewtoolkit.downloader.VersionControlSystem
63+
import org.ossreviewtoolkit.model.ArtifactProvenance
64+
import org.ossreviewtoolkit.model.Hash
5965
import org.ossreviewtoolkit.model.Issue
66+
import org.ossreviewtoolkit.model.LicenseFinding
6067
import org.ossreviewtoolkit.model.Package
68+
import org.ossreviewtoolkit.model.PackageProvider
6169
import org.ossreviewtoolkit.model.Provenance
70+
import org.ossreviewtoolkit.model.RemoteArtifact
6271
import org.ossreviewtoolkit.model.RepositoryProvenance
6372
import org.ossreviewtoolkit.model.ScanResult
6473
import org.ossreviewtoolkit.model.ScanSummary
6574
import org.ossreviewtoolkit.model.ScannerDetails
6675
import org.ossreviewtoolkit.model.Severity
76+
import org.ossreviewtoolkit.model.TextLocation
6777
import org.ossreviewtoolkit.model.UnknownProvenance
6878
import org.ossreviewtoolkit.model.VcsType
6979
import org.ossreviewtoolkit.model.config.DownloaderConfiguration
7080
import org.ossreviewtoolkit.model.config.Options
7181
import org.ossreviewtoolkit.model.config.ScannerConfiguration
7282
import org.ossreviewtoolkit.model.createAndLogIssue
83+
import org.ossreviewtoolkit.model.utils.PurlType
84+
import org.ossreviewtoolkit.model.utils.Snippet
85+
import org.ossreviewtoolkit.model.utils.SnippetFinding
7386
import org.ossreviewtoolkit.scanner.AbstractScannerWrapperFactory
7487
import org.ossreviewtoolkit.scanner.PackageScannerWrapper
7588
import org.ossreviewtoolkit.scanner.ProvenanceScannerWrapper
7689
import org.ossreviewtoolkit.scanner.ScanContext
7790
import org.ossreviewtoolkit.scanner.ScannerCriteria
91+
import org.ossreviewtoolkit.utils.common.collectMessages
7892
import org.ossreviewtoolkit.utils.common.enumSetOf
7993
import org.ossreviewtoolkit.utils.common.replaceCredentialsInUri
8094
import org.ossreviewtoolkit.utils.ort.showStackTrace
95+
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
96+
import org.ossreviewtoolkit.utils.spdx.toSpdx
8197

8298
/**
8399
* A wrapper for [FossID](https://fossid.com/).
@@ -746,7 +762,23 @@ class FossId internal constructor(
746762
"${pendingFiles.size} pending files have been returned for scan '$scanCode'."
747763
}
748764

749-
return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles)
765+
val snippets = runBlocking(Dispatchers.IO) {
766+
pendingFiles.map {
767+
async {
768+
logger.info { "Listing snippet for $it..." }
769+
val snippetResponse = service.listSnippets(config.user, config.apiKey, scanCode, it)
770+
.checkResponse("list snippets")
771+
val snippets = requireNotNull(snippetResponse.data) {
772+
"Snippet could not be listed. Response was ${snippetResponse.message}."
773+
}
774+
logger.info { "${snippets.size} snippets." }
775+
776+
it to snippets.toSet()
777+
}
778+
}.awaitAll().toMap()
779+
}
780+
781+
return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles, snippets)
750782
}
751783

752784
/**
@@ -760,10 +792,61 @@ class FossId internal constructor(
760792
scanId: String
761793
): ScanResult {
762794
// TODO: Maybe get issues from FossID (see has_failed_scan_files, get_failed_files and maybe get_scan_log).
795+
796+
// TODO: Deprecation: Remove the pending files in issues. This is a breaking change.
763797
val issues = rawResults.listPendingFiles.mapTo(mutableListOf()) {
764798
Issue(source = name, message = "Pending identification for '$it'.", severity = Severity.HINT)
765799
}
766800

801+
val snippetFindings = mutableSetOf<SnippetFinding>()
802+
val fakeLocation = TextLocation(".", TextLocation.UNKNOWN_LINE)
803+
snippetFindings += rawResults.listSnippets.flatMap { (file, rawSnippets) ->
804+
val snippets = rawSnippets.map {
805+
val license = it.artifactLicense?.let {
806+
runCatching {
807+
LicenseFinding.createAndMap(
808+
it,
809+
fakeLocation,
810+
detectedLicenseMapping = scannerConfig.detectedLicenseMapping
811+
).license
812+
}.onFailure { spdxException ->
813+
issues += FossId.createAndLogIssue(
814+
source = "FossId",
815+
message = "Failed to parse license '$it' as an SPDX expression:" +
816+
" ${spdxException.collectMessages()}"
817+
)
818+
}.getOrNull()
819+
} ?: SpdxConstants.NOASSERTION.toSpdx()
820+
821+
// FossID does not return the hash of the remote artifact: it returns instead, in the property
822+
// "match_file_id", the MD5 hash of the matched file IN the remote artifact.
823+
val snippetProvenance = it.url?.let { url ->
824+
ArtifactProvenance(RemoteArtifact(url, Hash.NONE))
825+
} ?: UnknownProvenance
826+
val purlType = it.url?.let { url -> urlToPackageType(url, issues)?.toString() } ?: "generic"
827+
828+
// TODO: FossId doesn't return the line numbers of the match, only the character range. One must use
829+
// another call "getMatchedLine" to retrieve the matched line numbers. Unfortunately, this is a call
830+
// per snippet which is too expensive. When it is available for a batch of snippets, it can be used
831+
// here.
832+
Snippet(
833+
it.score.toFloat(),
834+
TextLocation(it.file, TextLocation.UNKNOWN_LINE),
835+
snippetProvenance,
836+
"pkg:$purlType/${it.author}/${it.artifact}@${it.version}",
837+
license
838+
)
839+
}
840+
841+
val sourceLocation = TextLocation(file, TextLocation.UNKNOWN_LINE)
842+
snippets.map {
843+
SnippetFinding(
844+
sourceLocation,
845+
it
846+
)
847+
}
848+
}
849+
767850
val ignoredFiles = rawResults.listIgnoredFiles.associateBy { it.path }
768851

769852
val (licenseFindings, copyrightFindings) = rawResults.markedAsIdentifiedFiles.ifEmpty {
@@ -776,6 +859,7 @@ class FossId internal constructor(
776859
packageVerificationCode = "",
777860
licenseFindings = licenseFindings.toSortedSet(),
778861
copyrightFindings = copyrightFindings.toSortedSet(),
862+
snippetFindings = snippetFindings,
779863
issues = issues
780864
)
781865

@@ -786,4 +870,33 @@ class FossId internal constructor(
786870
mapOf(SCAN_CODE_KEY to scanCode, SCAN_ID_KEY to scanId, SERVER_URL_KEY to config.serverUrl)
787871
)
788872
}
873+
874+
/**
875+
* Return the [PurlType] as determined from the given [url], or null if there is no match. An issue will be added to
876+
* [issues] in this case.
877+
*/
878+
private fun urlToPackageType(url: String, issues: MutableList<Issue>): PurlType? =
879+
when (val provider = PackageProvider.get(url)) {
880+
PackageProvider.COCOAPODS -> PurlType.COCOAPODS
881+
PackageProvider.CRATES_IO -> PurlType.CARGO
882+
PackageProvider.DEBIAN -> PurlType.DEBIAN
883+
PackageProvider.GITHUB -> PurlType.GITHUB
884+
PackageProvider.GITLAB -> PurlType.GITLAB
885+
PackageProvider.GOLANG -> PurlType.GOLANG
886+
PackageProvider.MAVEN_CENTRAL, PackageProvider.MAVEN_GOOGLE -> PurlType.MAVEN
887+
PackageProvider.NPM_JS -> PurlType.NPM
888+
PackageProvider.NUGET -> PurlType.NUGET
889+
PackageProvider.PACKAGIST -> PurlType.COMPOSER
890+
PackageProvider.PYPI -> PurlType.PYPI
891+
PackageProvider.RUBYGEMS -> PurlType.GEM
892+
null -> null
893+
894+
else -> {
895+
issues += FossId.createAndLogIssue(
896+
source = "FossId",
897+
message = "Cannot determine PURL type for url '$url' and provider '$provider'."
898+
)
899+
null
900+
}
901+
}
789902
}

scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ package org.ossreviewtoolkit.scanner.scanners.fossid
2222
import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile
2323
import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile
2424
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile
25+
import org.ossreviewtoolkit.clients.fossid.model.result.Snippet
2526
import org.ossreviewtoolkit.clients.fossid.model.summary.Summarizable
2627
import org.ossreviewtoolkit.model.CopyrightFinding
2728
import org.ossreviewtoolkit.model.Issue
@@ -37,7 +38,8 @@ internal data class RawResults(
3738
val identifiedFiles: List<IdentifiedFile>,
3839
val markedAsIdentifiedFiles: List<MarkedAsIdentifiedFile>,
3940
val listIgnoredFiles: List<IgnoredFile>,
40-
val listPendingFiles: List<String>
41+
val listPendingFiles: List<String>,
42+
val listSnippets: Map<String, Set<Snippet>>
4143
)
4244

4345
/**

scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,16 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles
6262
import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles
6363
import org.ossreviewtoolkit.clients.fossid.listPendingFiles
6464
import org.ossreviewtoolkit.clients.fossid.listScansForProject
65+
import org.ossreviewtoolkit.clients.fossid.listSnippets
6566
import org.ossreviewtoolkit.clients.fossid.model.Scan
6667
import org.ossreviewtoolkit.clients.fossid.model.identification.common.LicenseMatchType
6768
import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile
6869
import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile
6970
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.License
7071
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.LicenseFile
7172
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile
73+
import org.ossreviewtoolkit.clients.fossid.model.result.MatchType
74+
import org.ossreviewtoolkit.clients.fossid.model.result.Snippet
7275
import org.ossreviewtoolkit.clients.fossid.model.rules.IgnoreRule
7376
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope
7477
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleType
@@ -78,23 +81,29 @@ import org.ossreviewtoolkit.clients.fossid.model.status.UnversionedScanDescripti
7881
import org.ossreviewtoolkit.clients.fossid.runScan
7982
import org.ossreviewtoolkit.downloader.VersionControlSystem
8083
import org.ossreviewtoolkit.downloader.vcs.Git
84+
import org.ossreviewtoolkit.model.ArtifactProvenance
8185
import org.ossreviewtoolkit.model.CopyrightFinding
86+
import org.ossreviewtoolkit.model.Hash
8287
import org.ossreviewtoolkit.model.Identifier
8388
import org.ossreviewtoolkit.model.Issue
8489
import org.ossreviewtoolkit.model.LicenseFinding
8590
import org.ossreviewtoolkit.model.Package
8691
import org.ossreviewtoolkit.model.PackageType
92+
import org.ossreviewtoolkit.model.RemoteArtifact
8793
import org.ossreviewtoolkit.model.ScanResult
8894
import org.ossreviewtoolkit.model.Severity
8995
import org.ossreviewtoolkit.model.TextLocation
9096
import org.ossreviewtoolkit.model.VcsInfo
9197
import org.ossreviewtoolkit.model.VcsType
9298
import org.ossreviewtoolkit.model.config.ScannerConfiguration
99+
import org.ossreviewtoolkit.model.utils.Snippet as OrtSnippet
100+
import org.ossreviewtoolkit.model.utils.SnippetFinding
93101
import org.ossreviewtoolkit.scanner.ScanContext
94102
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_CODE_KEY
95103
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_ID_KEY
96104
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SERVER_URL_KEY
97105
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.convertGitUrlToProjectName
106+
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
98107

99108
@Suppress("LargeClass")
100109
class FossIdTest : WordSpec({
@@ -314,6 +323,7 @@ class FossIdTest : WordSpec({
314323
summary.licenseFindings shouldContainExactlyInAnyOrder expectedLicenseFindings
315324
}
316325

326+
// TODO: Deprecation: Remove the pending files in issues. This is a breaking change.
317327
"report pending files as issues" {
318328
val projectCode = projectCode(PROJECT)
319329
val scanCode = scanCode(PROJECT, null)
@@ -328,7 +338,7 @@ class FossIdTest : WordSpec({
328338
.expectCheckScanStatus(scanCode, ScanStatus.FINISHED)
329339
.expectCreateScan(projectCode, scanCode, vcsInfo, "")
330340
.expectDownload(scanCode)
331-
.mockFiles(scanCode, pendingRange = 4..5)
341+
.mockFiles(scanCode, pendingRange = 4..5, snippetRange = 1..5)
332342

333343
val fossId = createFossId(config)
334344

@@ -341,6 +351,34 @@ class FossIdTest : WordSpec({
341351
summary.issues.map { it.copy(timestamp = Instant.EPOCH) } shouldBe expectedIssues
342352
}
343353

354+
"report pending files as snippets" {
355+
val projectCode = projectCode(PROJECT)
356+
val scanCode = scanCode(PROJECT, null)
357+
val config = createConfig(deltaScans = false)
358+
val vcsInfo = createVcsInfo()
359+
val scan = createScan(vcsInfo.url, "${vcsInfo.revision}_other", scanCode)
360+
val pkgId = createIdentifier(index = 42)
361+
362+
FossIdRestService.create(config.serverUrl)
363+
.expectProjectRequest(projectCode)
364+
.expectListScans(projectCode, listOf(scan))
365+
.expectCheckScanStatus(scanCode, ScanStatus.FINISHED)
366+
.expectCreateScan(projectCode, scanCode, vcsInfo, "")
367+
.expectDownload(scanCode)
368+
.mockFiles(scanCode, pendingRange = 1..5, snippetRange = 1..5)
369+
370+
val fossId = createFossId(config)
371+
372+
val summary = fossId.scan(createPackage(pkgId, vcsInfo)).summary
373+
374+
val expectedPendingFile = (1..5).map(::createPendingFile).toSet()
375+
val expectedSnippetFindings = (1..5).map(::createSnippetFindings).flatten()
376+
377+
summary.snippetFindings shouldHaveSize expectedPendingFile.size * 5
378+
summary.snippetFindings.map { it.sourceLocation.path }.toSet() shouldBe expectedPendingFile
379+
summary.snippetFindings shouldBe expectedSnippetFindings
380+
}
381+
344382
"create a new project if none exists yet" {
345383
val projectCode = projectCode(PROJECT)
346384
val scanCode = scanCode(PROJECT, null)
@@ -1238,6 +1276,52 @@ private fun createIgnoredFile(index: Int): IgnoredFile =
12381276
*/
12391277
private fun createPendingFile(index: Int): String = "/pending/file/$index"
12401278

1279+
/**
1280+
* Generate a FossID snippet based on the given [index].
1281+
*/
1282+
private fun createSnippet(index: Int): Snippet = Snippet(
1283+
index,
1284+
"created$index",
1285+
index,
1286+
index,
1287+
index,
1288+
MatchType.PARTIAL,
1289+
"reason$index",
1290+
"author$index",
1291+
"artifact$index",
1292+
"version$index",
1293+
"MIT",
1294+
"releaseDate$index",
1295+
"mirror$index",
1296+
"file$index",
1297+
"fileLicense$index",
1298+
"url$index",
1299+
"hits$index",
1300+
index,
1301+
"updated$index",
1302+
"cpe$index",
1303+
"$index",
1304+
"matchField$index",
1305+
"classification$index",
1306+
"highlighting$index"
1307+
)
1308+
1309+
/**
1310+
* Generate a ORT snippet finding based on the given [index].
1311+
*/
1312+
private fun createSnippetFindings(index: Int): Set<SnippetFinding> = (1..5).map { snippetIndex ->
1313+
SnippetFinding(
1314+
TextLocation("/pending/file/$index", TextLocation.UNKNOWN_LINE),
1315+
OrtSnippet(
1316+
snippetIndex.toFloat(),
1317+
TextLocation("file$snippetIndex", TextLocation.UNKNOWN_LINE),
1318+
ArtifactProvenance(RemoteArtifact("url$snippetIndex", Hash.NONE)),
1319+
"pkg:generic/author$snippetIndex/artifact$snippetIndex@version$snippetIndex",
1320+
SpdxExpression.Companion.parse("MIT")
1321+
)
1322+
)
1323+
}.toSet()
1324+
12411325
/**
12421326
* Prepare this service mock to answer a request for a project with the given [projectCode]. Return a response with
12431327
* the given [status] and [error].
@@ -1348,12 +1432,14 @@ private fun FossIdServiceWithVersion.mockFiles(
13481432
identifiedRange: IntRange = IntRange.EMPTY,
13491433
markedRange: IntRange = IntRange.EMPTY,
13501434
ignoredRange: IntRange = IntRange.EMPTY,
1351-
pendingRange: IntRange = IntRange.EMPTY
1435+
pendingRange: IntRange = IntRange.EMPTY,
1436+
snippetRange: IntRange = IntRange.EMPTY
13521437
): FossIdServiceWithVersion {
13531438
val identifiedFiles = identifiedRange.map(::createIdentifiedFile)
13541439
val markedFiles = markedRange.map(::createMarkedIdentifiedFile)
13551440
val ignoredFiles = ignoredRange.map(::createIgnoredFile)
13561441
val pendingFiles = pendingRange.map(::createPendingFile)
1442+
val snippets = snippetRange.map(::createSnippet)
13571443

13581444
coEvery { listIdentifiedFiles(USER, API_KEY, scanCode) } returns
13591445
PolymorphicResponseBody(
@@ -1367,6 +1453,8 @@ private fun FossIdServiceWithVersion.mockFiles(
13671453
PolymorphicResponseBody(status = 1, data = PolymorphicList(ignoredFiles))
13681454
coEvery { listPendingFiles(USER, API_KEY, scanCode) } returns
13691455
PolymorphicResponseBody(status = 1, data = PolymorphicList(pendingFiles))
1456+
coEvery { listSnippets(USER, API_KEY, scanCode, any()) } returns
1457+
PolymorphicResponseBody(status = 1, data = PolymorphicList(snippets))
13701458

13711459
return this
13721460
}

0 commit comments

Comments
 (0)