diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index a69fe1629..be1e838b0 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -22,7 +22,6 @@ from vulnerabilities.importers import github_osv from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla -from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql @@ -41,10 +40,10 @@ from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer +from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ - nvd.NVDImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +77,7 @@ nginx_importer.NginxImporterPipeline, gitlab_importer.GitLabImporterPipeline, github_importer.GitHubAPIImporterPipeline, + nvd_importer.NVDImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/vulnerability_status.py b/vulnerabilities/improvers/vulnerability_status.py index 0157db557..353cca54c 100644 --- a/vulnerabilities/improvers/vulnerability_status.py +++ b/vulnerabilities/improvers/vulnerability_status.py @@ -14,7 +14,6 @@ from django.db.models.query import QuerySet from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.nvd import NVDImporter from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory @@ -22,6 +21,7 @@ from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityChangeLog from vulnerabilities.models import VulnerabilityStatusType +from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline from vulnerabilities.utils import fetch_response from vulnerabilities.utils import get_item @@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver): @property def interesting_advisories(self) -> QuerySet: return ( - Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name)) + Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id)) .distinct("aliases") .paginated() ) diff --git a/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py b/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py new file mode 100644 index 000000000..2a91f55ee --- /dev/null +++ b/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 19:38 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").update( + created_by=NVDImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NVDImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.nvd.NVDImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0067_update_github_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/importers/nvd.py b/vulnerabilities/pipelines/nvd_importer.py similarity index 88% rename from vulnerabilities/importers/nvd.py rename to vulnerabilities/pipelines/nvd_importer.py index 1a6048dfd..38800eb62 100644 --- a/vulnerabilities/importers/nvd.py +++ b/vulnerabilities/pipelines/nvd_importer.py @@ -9,7 +9,10 @@ import gzip import json +import logging from datetime import date +from traceback import format_exc as traceback_format_exc +from typing import Iterable import attr import requests @@ -17,14 +20,18 @@ from vulnerabilities import severity_systems from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item -class NVDImporter(Importer): +class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from NVD.""" + + pipeline_id = "nvd_importer" + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up spdx_license_expression = ( "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" @@ -61,19 +68,46 @@ class NVDImporter(Importer): """ importer_name = "NVD Importer" - def advisory_data(self): - for _year, cve_data in fetch_cve_data_1_1(): + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + def advisories_count(self): + url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1" + + advisory_count = 0 + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return advisory_count + + advisory_count = data.get("totalResults", 0) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + for _year, cve_data in fetch_cve_data_1_1(logger=self.log): yield from to_advisories(cve_data=cve_data) # Isolating network calls for simplicity of testing -def fetch(url): +def fetch(url, logger=None): + if logger: + logger(f"Fetching `{url}`") gz_file = requests.get(url) data = gzip.decompress(gz_file.content) return json.loads(data) -def fetch_cve_data_1_1(starting_year=2002): +def fetch_cve_data_1_1(starting_year=2002, logger=None): """ Yield tuples of (year, lists of CVE mappings) from the NVD, one for each year since ``starting_year`` defaulting to 2002. @@ -82,7 +116,7 @@ def fetch_cve_data_1_1(starting_year=2002): # NVD json feeds start from 2002. for year in range(starting_year, current_year + 1): download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" - yield year, fetch(url=download_url) + yield year, fetch(url=download_url, logger=logger) def to_advisories(cve_data): diff --git a/vulnerabilities/tests/test_nvd.py b/vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py similarity index 84% rename from vulnerabilities/tests/test_nvd.py rename to vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py index 702faa7f4..549a269bb 100644 --- a/vulnerabilities/tests/test_nvd.py +++ b/vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py @@ -8,14 +8,12 @@ # import json -import os +from pathlib import Path -from vulnerabilities.importers import nvd +from vulnerabilities.pipelines import nvd_importer from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data/nvd/nvd_test.json") -REJECTED_CVE = os.path.join(BASE_DIR, "test_data/nvd/rejected_nvd.json") +TEST_DATA = Path(__file__).parent.parent / "test_data" / "nvd" def load_test_data(file): @@ -37,10 +35,11 @@ def sorted_advisory_data(advisory_data): def test_to_advisories_skips_hardware(regen=REGEN): - expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-expected.json") + expected_file = TEST_DATA / "nvd-expected.json" - test_data = load_test_data(file=TEST_DATA) - result = [data.to_dict() for data in nvd.to_advisories(test_data)] + test_file = TEST_DATA / "nvd_test.json" + test_data = load_test_data(file=test_file) + result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)] result = sorted_advisory_data(result) if regen: @@ -56,10 +55,11 @@ def test_to_advisories_skips_hardware(regen=REGEN): def test_to_advisories_marks_rejected_cve(regen=REGEN): - expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-rejected-expected.json") + expected_file = TEST_DATA / "nvd-rejected-expected.json" - test_data = load_test_data(file=REJECTED_CVE) - result = [data.to_dict() for data in nvd.to_advisories(test_data)] + test_file = TEST_DATA / "rejected_nvd.json" + test_data = load_test_data(file=test_file) + result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)] result = sorted_advisory_data(result) if regen: @@ -168,14 +168,16 @@ def test_CveItem_cpes(): "cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*", ] - found_cpes = nvd.CveItem(cve_item=get_test_cve_item()).cpes + found_cpes = nvd_importer.CveItem(cve_item=get_test_cve_item()).cpes assert found_cpes == expected_cpes def test_is_related_to_hardware(): - assert nvd.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*") - assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*") - assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*") + assert nvd_importer.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*") + assert not nvd_importer.is_related_to_hardware( + "cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*" + ) + assert not nvd_importer.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*") def test_CveItem_summary_with_single_summary(): @@ -186,7 +188,7 @@ def test_CveItem_summary_with_single_summary(): "be allocated than expected." ) - assert nvd.CveItem(cve_item=get_test_cve_item()).summary == expected_summary + assert nvd_importer.CveItem(cve_item=get_test_cve_item()).summary == expected_summary def test_CveItem_reference_urls(): @@ -195,4 +197,4 @@ def test_CveItem_reference_urls(): "http://kqueue.org/blog/2012/03/05/memory-allocator-security-revisited/", ] - assert nvd.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls + assert nvd_importer.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index fcad0a1d4..d43755980 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -802,3 +802,42 @@ def test_removal_of_duped_purls(self): adv.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").count() == 0 ) assert adv.filter(created_by="github_importer").count() == 1 + + +class TestUpdateNVDAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0067_update_github_advisory_created_by" + migrate_to = "0068_update_nvd_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="foobar"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.nvd.NVDImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert adv.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").count() == 0 + assert adv.filter(created_by="nvd_importer").count() == 1 diff --git a/vulnerabilities/tests/test_vulnerability_status_improver.py b/vulnerabilities/tests/test_vulnerability_status_improver.py index 5bad2f498..f2eb5ce0f 100644 --- a/vulnerabilities/tests/test_vulnerability_status_improver.py +++ b/vulnerabilities/tests/test_vulnerability_status_improver.py @@ -13,13 +13,12 @@ import pytest -from vulnerabilities.importers.nvd import NVDImporter from vulnerabilities.improvers.vulnerability_status import VulnerabilityStatusImprover -from vulnerabilities.improvers.vulnerability_status import get_status_from_api from vulnerabilities.models import Advisory from vulnerabilities.models import Alias from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityStatusType +from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -34,13 +33,13 @@ def test_interesting_advisories(): Advisory.objects.create( aliases=["CVE-1"], - created_by=NVDImporter.qualified_name, + created_by=NVDImporterPipeline.pipeline_id, summary="1", date_collected=datetime.now(), ) Advisory.objects.create( aliases=["CVE-1"], - created_by=NVDImporter.qualified_name, + created_by=NVDImporterPipeline.pipeline_id, summary="2", date_collected=datetime.now(), ) @@ -55,7 +54,7 @@ def test_improver_end_to_end(mock_response): mock_response.return_value = response adv = Advisory.objects.create( aliases=["CVE-2023-35866"], - created_by=NVDImporter.qualified_name, + created_by=NVDImporterPipeline.pipeline_id, summary="1", date_collected=datetime.now(), )