-
-
Notifications
You must be signed in to change notification settings - Fork 608
Description
Background
I am currently working on copyleft license compatibility rules for ORT. For this I created a test repository containing GPL-2.0-or-later
and GPL-3.0
(later I added some CC licenses as well, but those are not relevant to the issue). Both license texts were copied from spdx.org.
ORT issue: oss-review-toolkit/ort#5967
Description of Bug
When scanning said test repository with ORT, scancode detects both GPL-2.0-only
and GPL-2.0-or-later
, even though only GPL-2.0-or-later
was added to the repo:
- license: "GPL-2.0-only"
location:
path: "license-gpl-2.0-or-later.txt"
start_line: 3
end_line: 110
score: 98.02
- license: "GPL-2.0-or-later"
location:
path: "license-gpl-2.0-or-later.txt"
start_line: 110
end_line: 114
score: 100.0
As you can see scancode gives GPL-2.0-only
a 98.02
score, even though it ignores the last 4 lines of the license text. And GPL-2.0-or-later
only is detected in the last four lines.
Later I ran a scan directly with scancode, getting the same results:
complete json
{
"headers": [
{
"tool_name": "scancode-toolkit",
"tool_version": "31.2.1",
"options": {
"input": [
"."
],
"--copyright": true,
"--info": true,
"--json-pp": "report-scancode.json",
"--license": true,
"--package": true,
"--verbose": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"start_timestamp": "2022-10-19T140338.813472",
"end_timestamp": "2022-10-19T140353.061902",
"output_format_version": "2.0.0",
"duration": 14.248444318771362,
"message": null,
"errors": [],
"warnings": [],
"extra_data": {
"system_environment": {
"operating_system": "linux",
"cpu_architecture": "64",
"platform": "Linux-5.15.0-50-generic-x86_64-with-glibc2.35",
"platform_version": "#56-Ubuntu SMP Tue Sep 20 13:23:26 UTC 2022",
"python_version": "3.10.6 (main, Aug 10 2022, 11:40:04) [GCC 11.3.0]"
},
"spdx_license_list_version": "3.17",
"files_count": 5
}
}
],
"dependencies": [],
"packages": [],
"files": [
{
"path": "compatibility_test",
"type": "directory",
"name": "compatibility_test",
"base_name": "compatibility_test",
"extension": "",
"size": 0,
"date": null,
"sha1": null,
"md5": null,
"sha256": null,
"mime_type": null,
"file_type": null,
"programming_language": null,
"is_binary": false,
"is_text": false,
"is_archive": false,
"is_media": false,
"is_source": false,
"is_script": false,
"licenses": [],
"license_expressions": [],
"percentage_of_license_text": 0,
"copyrights": [],
"holders": [],
"authors": [],
"package_data": [],
"for_packages": [],
"files_count": 5,
"dirs_count": 0,
"size_count": 94469,
"scan_errors": []
},
{
"path": "compatibility_test/CC-BY-NC-SA-3.0.txt",
"type": "file",
"name": "CC-BY-NC-SA-3.0.txt",
"base_name": "CC-BY-NC-SA-3.0",
"extension": ".txt",
"size": 21448,
"date": "2022-10-18",
"sha1": "7295cb93cd11ad9912bbc495a3ef6d7a91cdb44c",
"md5": "666f6d1f58d456548a3156f86e7a3146",
"sha256": "0abe2645856e5e739bf858bbedc527bb58d053f628880c9e9a26ef85d2f7c713",
"mime_type": "text/plain",
"file_type": "ASCII text, with very long lines",
"programming_language": null,
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_media": false,
"is_source": false,
"is_script": false,
"licenses": [
{
"key": "cc-by-nc-sa-3.0",
"score": 100.0,
"name": "Creative Commons Attribution Non-Commercial Share Alike License 3.0",
"short_name": "CC-BY-NC-SA-3.0",
"category": "Source-available",
"is_exception": false,
"is_unknown": false,
"owner": "Creative Commons",
"homepage_url": "http://creativecommons.org/licenses/by-nc-sa/3.0/",
"text_url": "http://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
"reference_url": "https://scancode-licensedb.aboutcode.org/cc-by-nc-sa-3.0",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/cc-by-nc-sa-3.0.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/cc-by-nc-sa-3.0.yml",
"spdx_license_key": "CC-BY-NC-SA-3.0",
"spdx_url": "https://spdx.org/licenses/CC-BY-NC-SA-3.0",
"start_line": 3,
"end_line": 63,
"matched_rule": {
"identifier": "cc-by-nc-sa-3.0_47.RULE",
"license_expression": "cc-by-nc-sa-3.0",
"licenses": [
"cc-by-nc-sa-3.0"
],
"referenced_filenames": [],
"is_license_text": true,
"is_license_notice": false,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"has_unknown": false,
"matcher": "1-hash",
"rule_length": 3360,
"matched_length": 3360,
"match_coverage": 100.0,
"rule_relevance": 100
}
}
],
"license_expressions": [
"cc-by-nc-sa-3.0"
],
"percentage_of_license_text": 100.0,
"copyrights": [],
"holders": [],
"authors": [],
"package_data": [],
"for_packages": [],
"files_count": 0,
"dirs_count": 0,
"size_count": 0,
"scan_errors": []
},
{
"path": "compatibility_test/CC-BY-NC-SA-4.0.txt",
"type": "file",
"name": "CC-BY-NC-SA-4.0.txt",
"base_name": "CC-BY-NC-SA-4.0",
"extension": ".txt",
"size": 19066,
"date": "2022-10-18",
"sha1": "c7b58be452219bd8b74710dd9e6e7ed449517da0",
"md5": "4a206eed80a3482b8fbf26350ead4538",
"sha256": "f5992db54c7473dcda6f16a40679d305aa31440489fdcb1eaca84d7da6290ee4",
"mime_type": "text/plain",
"file_type": "UTF-8 Unicode text, with very long lines",
"programming_language": null,
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_media": false,
"is_source": false,
"is_script": false,
"licenses": [
{
"key": "cc-by-nc-sa-4.0",
"score": 100.0,
"name": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License",
"short_name": "CC-BY-NC-SA-4.0",
"category": "Source-available",
"is_exception": false,
"is_unknown": false,
"owner": "Creative Commons",
"homepage_url": "http://creativecommons.org/licenses/by-nc-sa/4.0/",
"text_url": "http://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
"reference_url": "https://scancode-licensedb.aboutcode.org/cc-by-nc-sa-4.0",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/cc-by-nc-sa-4.0.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/cc-by-nc-sa-4.0.yml",
"spdx_license_key": "CC-BY-NC-SA-4.0",
"spdx_url": "https://spdx.org/licenses/CC-BY-NC-SA-4.0",
"start_line": 3,
"end_line": 117,
"matched_rule": {
"identifier": "cc-by-nc-sa-4.0_23.RULE",
"license_expression": "cc-by-nc-sa-4.0",
"licenses": [
"cc-by-nc-sa-4.0"
],
"referenced_filenames": [],
"is_license_text": true,
"is_license_notice": false,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"has_unknown": false,
"matcher": "1-hash",
"rule_length": 2861,
"matched_length": 2861,
"match_coverage": 100.0,
"rule_relevance": 100
}
}
],
"license_expressions": [
"cc-by-nc-sa-4.0"
],
"percentage_of_license_text": 100.0,
"copyrights": [],
"holders": [],
"authors": [],
"package_data": [],
"for_packages": [],
"files_count": 0,
"dirs_count": 0,
"size_count": 0,
"scan_errors": []
},
{
"path": "compatibility_test/license-gpl-2.0-or-later.txt",
"type": "file",
"name": "license-gpl-2.0-or-later.txt",
"base_name": "license-gpl-2.0-or-later",
"extension": ".txt",
"size": 18229,
"date": "2022-10-18",
"sha1": "040cfa18ce31bbc2748537fe3f8aedfb25af6165",
"md5": "d736fb04757076bd90e1677e9aa37230",
"sha256": "1cc9cfdb6b5d3737e3c672d938c0e1ed7070ed1ed631b4e881076e1eabaafccf",
"mime_type": "text/plain",
"file_type": "ASCII text, with very long lines",
"programming_language": null,
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_media": false,
"is_source": false,
"is_script": false,
"licenses": [
{
"key": "gpl-2.0",
"score": 98.02,
"name": "GNU General Public License 2.0",
"short_name": "GPL 2.0",
"category": "Copyleft",
"is_exception": false,
"is_unknown": false,
"owner": "Free Software Foundation (FSF)",
"homepage_url": "http://www.gnu.org/licenses/gpl-2.0.html",
"text_url": "http://www.gnu.org/licenses/gpl-2.0.txt",
"reference_url": "https://scancode-licensedb.aboutcode.org/gpl-2.0",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/gpl-2.0.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/gpl-2.0.yml",
"spdx_license_key": "GPL-2.0-only",
"spdx_url": "https://spdx.org/licenses/GPL-2.0-only",
"start_line": 3,
"end_line": 110,
"matched_rule": {
"identifier": "gpl-2.0.LICENSE",
"license_expression": "gpl-2.0",
"licenses": [
"gpl-2.0"
],
"referenced_filenames": [],
"is_license_text": true,
"is_license_notice": false,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"has_unknown": false,
"matcher": "3-seq",
"rule_length": 2931,
"matched_length": 2873,
"match_coverage": 98.02,
"rule_relevance": 100
}
},
{
"key": "gpl-2.0-plus",
"score": 100.0,
"name": "GNU General Public License 2.0 or later",
"short_name": "GPL 2.0 or later",
"category": "Copyleft",
"is_exception": false,
"is_unknown": false,
"owner": "Free Software Foundation (FSF)",
"homepage_url": "http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html",
"text_url": "http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html",
"reference_url": "https://scancode-licensedb.aboutcode.org/gpl-2.0-plus",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/gpl-2.0-plus.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/gpl-2.0-plus.yml",
"spdx_license_key": "GPL-2.0-or-later",
"spdx_url": "https://spdx.org/licenses/GPL-2.0-or-later",
"start_line": 110,
"end_line": 114,
"matched_rule": {
"identifier": "gpl-2.0-plus_420.RULE",
"license_expression": "gpl-2.0-plus",
"licenses": [
"gpl-2.0-plus"
],
"referenced_filenames": [],
"is_license_text": false,
"is_license_notice": true,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"has_unknown": false,
"matcher": "2-aho",
"rule_length": 113,
"matched_length": 113,
"match_coverage": 100.0,
"rule_relevance": 100
}
}
],
"license_expressions": [
"gpl-2.0",
"gpl-2.0-plus"
],
"percentage_of_license_text": 98.94,
"copyrights": [
{
"copyright": "Copyright (c) 1989, 1991 Free Software Foundation, Inc.",
"start_line": 6,
"end_line": 6
},
{
"copyright": "copyrighted by the Free Software Foundation",
"start_line": 69,
"end_line": 69
}
],
"holders": [
{
"holder": "Free Software Foundation, Inc.",
"start_line": 6,
"end_line": 6
},
{
"holder": "the Free Software Foundation",
"start_line": 69,
"end_line": 69
}
],
"authors": [],
"package_data": [],
"for_packages": [],
"files_count": 0,
"dirs_count": 0,
"size_count": 0,
"scan_errors": []
},
{
"path": "compatibility_test/license-gpl-3.0.txt",
"type": "file",
"name": "license-gpl-3.0.txt",
"base_name": "license-gpl-3.0",
"extension": ".txt",
"size": 35405,
"date": "2022-10-18",
"sha1": "579b08f7066f9491391a5eb2e9f238a71f4d4981",
"md5": "2103bb15c50dc81f64b2315ab249ad3d",
"sha256": "44d90a331f505bd19b626cef0e3ae59da08830cf8b9efbde5f71025faa3c463b",
"mime_type": "text/plain",
"file_type": "UTF-8 Unicode text, with very long lines",
"programming_language": null,
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_media": false,
"is_source": false,
"is_script": false,
"licenses": [
{
"key": "gpl-3.0",
"score": 99.96,
"name": "GNU General Public License 3.0",
"short_name": "GPL 3.0",
"category": "Copyleft",
"is_exception": false,
"is_unknown": false,
"owner": "Free Software Foundation (FSF)",
"homepage_url": "http://www.gnu.org/licenses/gpl-3.0.html",
"text_url": "http://www.gnu.org/licenses/gpl-3.0-standalone.html",
"reference_url": "https://scancode-licensedb.aboutcode.org/gpl-3.0",
"scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/gpl-3.0.LICENSE",
"scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/gpl-3.0.yml",
"spdx_license_key": "GPL-3.0-only",
"spdx_url": "https://spdx.org/licenses/GPL-3.0-only",
"start_line": 3,
"end_line": 211,
"matched_rule": {
"identifier": "gpl-3.0_466.RULE",
"license_expression": "gpl-3.0",
"licenses": [
"gpl-3.0"
],
"referenced_filenames": [],
"is_license_text": true,
"is_license_notice": false,
"is_license_reference": false,
"is_license_tag": false,
"is_license_intro": false,
"has_unknown": false,
"matcher": "3-seq",
"rule_length": 5612,
"matched_length": 5610,
"match_coverage": 99.96,
"rule_relevance": 100
}
}
],
"license_expressions": [
"gpl-3.0"
],
"percentage_of_license_text": 99.95,
"copyrights": [
{
"copyright": "Copyright (c) 2007 Free Software Foundation, Inc. <https://fsf.org/>",
"start_line": 6,
"end_line": 6
}
],
"holders": [
{
"holder": "Free Software Foundation, Inc.",
"start_line": 6,
"end_line": 6
}
],
"authors": [],
"package_data": [],
"for_packages": [],
"files_count": 0,
"dirs_count": 0,
"size_count": 0,
"scan_errors": []
},
{
"path": "compatibility_test/pipeline.txt",
"type": "file",
"name": "pipeline.txt",
"base_name": "pipeline",
"extension": ".txt",
"size": 321,
"date": "2022-10-18",
"sha1": "1901cb60d897ee69d8dd4aba036b4927e5b2ae41",
"md5": "3c20a2c7f064715557f4706f5ba997ab",
"sha256": "ce1a94fde580caa420d2a6517cf0ed4197a8a16d123f30afc6391bd86ca8ae64",
"mime_type": "text/plain",
"file_type": "ASCII text",
"programming_language": null,
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_media": false,
"is_source": false,
"is_script": false,
"licenses": [],
"license_expressions": [],
"percentage_of_license_text": 0,
"copyrights": [],
"holders": [],
"authors": [],
"package_data": [],
"for_packages": [],
"files_count": 0,
"dirs_count": 0,
"size_count": 0,
"scan_errors": []
}
]
}
How To Reproduce
$ cd /path/to/compatibility_test
$ scancode --license --package --copyright --verbose --info --json-pp report-scancode.json .
System configuration
- Linux
- scancode-toolkit-31.2.1
- Python 3.10.6
- installed via pip in an otherwise clean venv
Also occurs with ORT, which uses ScanCode 30.1.0
or older (the ort-image we use in the pipeline is older then the current ORT master).