Skip to content

Commit

Permalink
Merge pull request #1587 from aboutcode-org/nvd-importer-pipeline
Browse files Browse the repository at this point in the history
Migrate NVD importer to aboutcode pipeline
  • Loading branch information
keshav-space authored Sep 27, 2024
2 parents 1ea270a + 2c2dfff commit 021b568
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 33 deletions.
4 changes: 2 additions & 2 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from vulnerabilities.importers import github_osv
from vulnerabilities.importers import istio
from vulnerabilities.importers import mozilla
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
from vulnerabilities.importers import oss_fuzz
from vulnerabilities.importers import postgresql
Expand All @@ -41,10 +40,10 @@
from vulnerabilities.pipelines import gitlab_importer
from vulnerabilities.pipelines import nginx_importer
from vulnerabilities.pipelines import npm_importer
from vulnerabilities.pipelines import nvd_importer
from vulnerabilities.pipelines import pypa_importer

IMPORTERS_REGISTRY = [
nvd.NVDImporter,
pysec.PyPIImporter,
alpine_linux.AlpineImporter,
openssl.OpensslImporter,
Expand Down Expand Up @@ -78,6 +77,7 @@
nginx_importer.NginxImporterPipeline,
gitlab_importer.GitLabImporterPipeline,
github_importer.GitHubAPIImporterPipeline,
nvd_importer.NVDImporterPipeline,
]

IMPORTERS_REGISTRY = {
Expand Down
4 changes: 2 additions & 2 deletions vulnerabilities/improvers/vulnerability_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
from django.db.models.query import QuerySet

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importers.nvd import NVDImporter
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.models import Alias
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityChangeLog
from vulnerabilities.models import VulnerabilityStatusType
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline
from vulnerabilities.utils import fetch_response
from vulnerabilities.utils import get_item

Expand All @@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver):
@property
def interesting_advisories(self) -> QuerySet:
return (
Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name))
Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id))
.distinct("aliases")
.paginated()
)
Expand Down
38 changes: 38 additions & 0 deletions vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 4.2.15 on 2024-09-27 19:38

from django.db import migrations

"""
Update the created_by field on Advisory from the old qualified_name
to the new pipeline_id.
"""


def update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").update(
created_by=NVDImporterPipeline.pipeline_id
)



def reverse_update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by=NVDImporterPipeline.pipeline_id).update(
created_by="vulnerabilities.importers.nvd.NVDImporter"
)


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0067_update_github_advisory_created_by"),
]

operations = [
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
]
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,29 @@

import gzip
import json
import logging
from datetime import date
from traceback import format_exc as traceback_format_exc
from typing import Iterable

import attr
import requests
from dateutil import parser as dateparser

from vulnerabilities import severity_systems
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.utils import get_cwe_id
from vulnerabilities.utils import get_item


class NVDImporter(Importer):
class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from NVD."""

pipeline_id = "nvd_importer"

# See https://github.com/nexB/vulnerablecode/issues/665 for follow up
spdx_license_expression = (
"LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou"
Expand Down Expand Up @@ -61,19 +68,46 @@ class NVDImporter(Importer):
"""
importer_name = "NVD Importer"

def advisory_data(self):
for _year, cve_data in fetch_cve_data_1_1():
@classmethod
def steps(cls):
return (
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def advisories_count(self):
url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1"

advisory_count = 0
try:
response = requests.get(url)
response.raise_for_status()
data = response.json()
except requests.HTTPError as http_err:
self.log(
f"HTTP error occurred: {http_err} \n {traceback_format_exc()}",
level=logging.ERROR,
)
return advisory_count

advisory_count = data.get("totalResults", 0)
return advisory_count

def collect_advisories(self) -> Iterable[AdvisoryData]:
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
yield from to_advisories(cve_data=cve_data)


# Isolating network calls for simplicity of testing
def fetch(url):
def fetch(url, logger=None):
if logger:
logger(f"Fetching `{url}`")
gz_file = requests.get(url)
data = gzip.decompress(gz_file.content)
return json.loads(data)


def fetch_cve_data_1_1(starting_year=2002):
def fetch_cve_data_1_1(starting_year=2002, logger=None):
"""
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
year since ``starting_year`` defaulting to 2002.
Expand All @@ -82,7 +116,7 @@ def fetch_cve_data_1_1(starting_year=2002):
# NVD json feeds start from 2002.
for year in range(starting_year, current_year + 1):
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
yield year, fetch(url=download_url)
yield year, fetch(url=download_url, logger=logger)


def to_advisories(cve_data):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@
#

import json
import os
from pathlib import Path

from vulnerabilities.importers import nvd
from vulnerabilities.pipelines import nvd_importer
from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_DATA = os.path.join(BASE_DIR, "test_data/nvd/nvd_test.json")
REJECTED_CVE = os.path.join(BASE_DIR, "test_data/nvd/rejected_nvd.json")
TEST_DATA = Path(__file__).parent.parent / "test_data" / "nvd"


def load_test_data(file):
Expand All @@ -37,10 +35,11 @@ def sorted_advisory_data(advisory_data):


def test_to_advisories_skips_hardware(regen=REGEN):
expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-expected.json")
expected_file = TEST_DATA / "nvd-expected.json"

test_data = load_test_data(file=TEST_DATA)
result = [data.to_dict() for data in nvd.to_advisories(test_data)]
test_file = TEST_DATA / "nvd_test.json"
test_data = load_test_data(file=test_file)
result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)]
result = sorted_advisory_data(result)

if regen:
Expand All @@ -56,10 +55,11 @@ def test_to_advisories_skips_hardware(regen=REGEN):


def test_to_advisories_marks_rejected_cve(regen=REGEN):
expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-rejected-expected.json")
expected_file = TEST_DATA / "nvd-rejected-expected.json"

test_data = load_test_data(file=REJECTED_CVE)
result = [data.to_dict() for data in nvd.to_advisories(test_data)]
test_file = TEST_DATA / "rejected_nvd.json"
test_data = load_test_data(file=test_file)
result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)]
result = sorted_advisory_data(result)

if regen:
Expand Down Expand Up @@ -168,14 +168,16 @@ def test_CveItem_cpes():
"cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*",
]

found_cpes = nvd.CveItem(cve_item=get_test_cve_item()).cpes
found_cpes = nvd_importer.CveItem(cve_item=get_test_cve_item()).cpes
assert found_cpes == expected_cpes


def test_is_related_to_hardware():
assert nvd.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*")
assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*")
assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*")
assert nvd_importer.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*")
assert not nvd_importer.is_related_to_hardware(
"cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*"
)
assert not nvd_importer.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*")


def test_CveItem_summary_with_single_summary():
Expand All @@ -186,7 +188,7 @@ def test_CveItem_summary_with_single_summary():
"be allocated than expected."
)

assert nvd.CveItem(cve_item=get_test_cve_item()).summary == expected_summary
assert nvd_importer.CveItem(cve_item=get_test_cve_item()).summary == expected_summary


def test_CveItem_reference_urls():
Expand All @@ -195,4 +197,4 @@ def test_CveItem_reference_urls():
"http://kqueue.org/blog/2012/03/05/memory-allocator-security-revisited/",
]

assert nvd.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls
assert nvd_importer.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls
39 changes: 39 additions & 0 deletions vulnerabilities/tests/test_data_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,3 +802,42 @@ def test_removal_of_duped_purls(self):
adv.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").count() == 0
)
assert adv.filter(created_by="github_importer").count() == 1


class TestUpdateNVDAdvisoryCreatedByField(TestMigrations):
app_name = "vulnerabilities"
migrate_from = "0067_update_github_advisory_created_by"
migrate_to = "0068_update_nvd_advisory_created_by"

advisory_data1 = AdvisoryData(
aliases=["CVE-2020-13371337"],
summary="vulnerability description here",
affected_packages=[
AffectedPackage(
package=PackageURL(type="pypi", name="foobar"),
affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"),
)
],
references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")],
date_published=timezone.now(),
url="https://test.com",
)

def setUpBeforeMigration(self, apps):
Advisory = apps.get_model("vulnerabilities", "Advisory")
adv1 = Advisory.objects.create(
aliases=self.advisory_data1.aliases,
summary=self.advisory_data1.summary,
affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages],
references=[ref.to_dict() for ref in self.advisory_data1.references],
url=self.advisory_data1.url,
created_by="vulnerabilities.importers.nvd.NVDImporter",
date_collected=timezone.now(),
)

def test_removal_of_duped_purls(self):
Advisory = apps.get_model("vulnerabilities", "Advisory")
adv = Advisory.objects.all()

assert adv.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").count() == 0
assert adv.filter(created_by="nvd_importer").count() == 1
9 changes: 4 additions & 5 deletions vulnerabilities/tests/test_vulnerability_status_improver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@

import pytest

from vulnerabilities.importers.nvd import NVDImporter
from vulnerabilities.improvers.vulnerability_status import VulnerabilityStatusImprover
from vulnerabilities.improvers.vulnerability_status import get_status_from_api
from vulnerabilities.models import Advisory
from vulnerabilities.models import Alias
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityStatusType
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

Expand All @@ -34,13 +33,13 @@
def test_interesting_advisories():
Advisory.objects.create(
aliases=["CVE-1"],
created_by=NVDImporter.qualified_name,
created_by=NVDImporterPipeline.pipeline_id,
summary="1",
date_collected=datetime.now(),
)
Advisory.objects.create(
aliases=["CVE-1"],
created_by=NVDImporter.qualified_name,
created_by=NVDImporterPipeline.pipeline_id,
summary="2",
date_collected=datetime.now(),
)
Expand All @@ -55,7 +54,7 @@ def test_improver_end_to_end(mock_response):
mock_response.return_value = response
adv = Advisory.objects.create(
aliases=["CVE-2023-35866"],
created_by=NVDImporter.qualified_name,
created_by=NVDImporterPipeline.pipeline_id,
summary="1",
date_collected=datetime.now(),
)
Expand Down

0 comments on commit 021b568

Please sign in to comment.