From a02b53f4bc3484e77dd3898cc99f35a5d31873f7 Mon Sep 17 00:00:00 2001 From: kunalsz Date: Sun, 2 Mar 2025 16:46:06 -0500 Subject: [PATCH 1/4] Added Apache Camel Pipeline Signed-off-by: kunalsz --- vulnerabilities/importers/__init__.py | 2 + .../pipelines/apache_camel_importer.py | 222 ++++++++++++++++++ .../test_apache_camel_importer_pipeline.py | 66 ++++++ .../apache_camel/apache_camel_expected.json | 72 ++++++ .../apache_camel/apache_camel_test.html | 16 ++ 5 files changed, 378 insertions(+) create mode 100644 vulnerabilities/pipelines/apache_camel_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py create mode 100644 vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json create mode 100644 vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 3f429f669..f501aaa0e 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -35,6 +35,7 @@ from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import alpine_linux_importer +from vulnerabilities.pipelines import apache_camel_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer @@ -44,6 +45,7 @@ from vulnerabilities.pipelines import pysec_importer IMPORTERS_REGISTRY = [ + apache_camel_importer.ApacheCamelImporterPipeline, openssl.OpensslImporter, redhat.RedhatImporter, debian.DebianImporter, diff --git a/vulnerabilities/pipelines/apache_camel_importer.py b/vulnerabilities/pipelines/apache_camel_importer.py new file mode 100644 index 000000000..4aae34a97 --- /dev/null +++ b/vulnerabilities/pipelines/apache_camel_importer.py @@ -0,0 +1,222 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import logging +import re +from datetime import datetime +from datetime import timezone +from typing import Iterable +from typing import Tuple + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import MavenVersionRange +from univers.version_range import VersionRange +from univers.versions import GenericVersion +from univers.versions import MavenVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.severity_systems import GENERIC +from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import fetch_response +from vulnerabilities.utils import get_item + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class ApacheCamelImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect Advisories from Apache Camel""" + + pipeline_id = "apache_camel_importer" + spdx_license_expression = "Apache-2.0" + license_url = "https://www.apache.org/licenses/LICENSE-2.0" + root_url = "https://camel.apache.org/security/" + importer_name = "Apache Camel Importter" + + def __init__(self): + super().__init__() + self.raw_data = None + + @classmethod + def steps(cls): + return ( + cls.fetch_html_response, + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + # fetch the html content and saves in raw_data + def fetch_html_response(self): + try: + response = fetch_response(self.root_url).content + self.raw_data = BeautifulSoup(response, "html.parser") + except: + logger.error(f"Failed to fetch URL {self.root_url}") + + # num of advisories + def advisories_count(self) -> int: + return fetch_count_advisories(self.raw_data) + + # parse the response data + def collect_advisories(self) -> Iterable[AdvisoryData]: + adv_data = fetch_advisory_data(self.raw_data) + for data in adv_data: + yield to_advisory_data(data) + + +# fetch the html content +def fetch_html_response(url): + try: + response = fetch_response(url).content + soup = BeautifulSoup(response, "html.parser") + return soup + except: + logger.error(f"Failed to fetch URL {url}") + + +def fetch_count_advisories(soup): + # soup = fetch_html_response(url) + table = soup.find("tbody") + advisory_len = len(table.find_all("tr")) + return advisory_len + + +# fetch the content from the html data +def fetch_advisory_data(soup): + advisories = [] + # soup = fetch_html_response(url) + + # Find the table containing the security advisories,ignoring the thead + table = soup.find("tbody") + + # Initialize a list to store the extracted data + advisories = [] + + # Iterate through each row in the table + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) == 5: # Ensure it's a row with data (not headers or empty rows) + reference = columns[0].text.strip() + affected = columns[1].text.strip() + fixed = columns[2].text.strip() + score = columns[3].text.strip() + description = columns[4].text.strip() + + # Append the extracted data to the list + advisories.append( + { + "Reference": reference, + "Affected": affected, + "Fixed": fixed, + "Score": score, + "Description": description, + } + ) + + return advisories + + +def to_advisory_data(raw_data) -> AdvisoryData: + """Parses extracted data to Advisory Data""" + # alias + alias = get_item(raw_data, "Reference") + + # affected packages + affected_packages = [] + affected_package_string = get_item(raw_data, "Affected") + affected_package = parse_apache_camel_versions(affected_package_string) + affected_packages.append( + AffectedPackage( + package=PackageURL( + type="maven", + namespace="org.apache.camel", + name="camel", + ), + affected_version_range=affected_package, + ) + ) + + # fixed versions + version_pattern = re.compile(r"\b\d+\.\d+\.\d+\b") + fixed_version_out = get_item(raw_data, "Fixed") + fixed_versions = version_pattern.findall(fixed_version_out) + + # score + score = get_item(raw_data, "Score") # words not numbers + severity = VulnerabilitySeverity(system=SCORING_SYSTEMS["generic_textual"], value=score) + # Reference + references = [] + references.append( + Reference( + severities=[severity], + reference_id=alias, + url=f"https://camel.apache.org/security/{alias}.html", + ) + ) + + # description + description = get_item(raw_data, "Description") + + return AdvisoryData( + aliases=alias, + summary=description, + affected_packages=affected_packages, + references=references, + url=f"https://camel.apache.org/security/{alias}.html", + ) + + +def parse_apache_camel_versions(version_string): + version_ranges = [] + + # Handle "from X before Y" + for match in re.finditer(r"from ([\d\w.-]+) before ([\d\w.-]+)", version_string): + start_version, end_version = match.groups() + version_ranges.extend( + [ + VersionConstraint(comparator=">=", version=MavenVersion(start_version)), + VersionConstraint(comparator="<", version=MavenVersion(end_version)), + ] + ) + + # Handle "from X up to Y" + for match in re.finditer(r"from ([\d\w.-]+) up to ([\d\w.-]+)", version_string): + start_version, end_version = match.groups() + version_ranges.extend( + [ + VersionConstraint(comparator=">=", version=MavenVersion(start_version)), + VersionConstraint(comparator="<=", version=MavenVersion(end_version)), + ] + ) + + # Handle isolated versions like `3.19.0` + for match in re.finditer(r"(\d+\.\d+\.\d+)", version_string): + version = match.group(1) + version_ranges.append(VersionConstraint(comparator="=", version=MavenVersion(version))) + + # Handle X.x style like 2.22.x + for match in re.finditer(r"(\d+\.\d+)\.x", version_string): + version_prefix = match.group(1) + start_version = f"{version_prefix}.0" + end_version = f"{version_prefix}.99999" # To cover all patch versions + version_ranges.extend( + [ + VersionConstraint(comparator=">=", version=MavenVersion(start_version)), + VersionConstraint(comparator="<=", version=MavenVersion(end_version)), + ] + ) + + return MavenVersionRange(constraints=version_ranges) + diff --git a/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py new file mode 100644 index 000000000..b44121c72 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py @@ -0,0 +1,66 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import datetime +import json +import os +from pathlib import Path +from unittest.mock import patch + +import pytest +from bs4 import BeautifulSoup +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.severity_systems import GENERIC +from vulnerabilities.severity_systems import ScoringSystem +from vulnerabilities.tests import util_tests + +TEST_DATA = ( + Path(__file__).parent.parent / "test_data" / "apache_camel" / "apache_camel_expected.json" +) +TEST_HTML = Path(__file__).parent.parent / "test_data" / "apache_camel" / "apache_camel_test.html" + + +def load_test_data(file): + with open(file) as f: + return json.load(f) + + +def test_to_advisory_data(): + """test for parsing the html data""" + with open(TEST_HTML) as f: + mock_response = BeautifulSoup(f.read(), features="html.parser") + + expected = load_test_data(TEST_DATA) + + with patch("requests.get") as mock_response_get: + mock_response_get.return_value.text = mock_response + from vulnerabilities.pipelines.apache_camel_importer import ApacheCamelImporterPipeline + + pipeline = ApacheCamelImporterPipeline() + pipeline.raw_data = mock_response + results = [data.to_dict() for data in pipeline.collect_advisories()] # advisories + + for result, exp in zip( + sorted(results, key=lambda x: x["aliases"][0]), + sorted(expected, key=lambda x: x["aliases"][0]), + ): + assert result["aliases"] == exp["aliases"] + assert result["summary"] == exp["summary"] + assert len(result["affected_packages"]) == len(exp["affected_packages"]) + for r_pkg, e_pkg in zip( + sorted(result["affected_packages"], key=lambda x: x["affected_version_range"]), + sorted(exp["affected_packages"], key=lambda x: x["affected_version_range"]), + ): + assert r_pkg["package"]["name"] == e_pkg["package"]["name"] + assert r_pkg["package"]["type"] == e_pkg["package"]["type"] diff --git a/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json b/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json new file mode 100644 index 000000000..afa02e44c --- /dev/null +++ b/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json @@ -0,0 +1,72 @@ +[ + { + "aliases": "CVE-2024-22371", + "summary": "Exposure of sensitive data by crafting a malicious EventFactory and providing a custom ExchangeCreatedEvent that exposes sensitive data", + "affected_packages": [ + { + "package": { + "type": "maven", + "namespace": "org.apache.camel", + "name": "camel", + "version": "", + "qualifiers": "", + "subpath": "" + }, + "affected_version_range": "vers:maven/3.0.0|3.21.4|3.22.0|>=3.22.0|<3.22.1|3.22.1|4.0.0|>=4.0.0|<4.0.4|4.0.4|4.1.0|>=4.1.0|<4.4.0|4.4.0", + "fixed_version": ["3.21.4", "3.22.1", "4.0.4", "4.4.0"] + } + ], + "references": [ + { + "reference_id": "CVE-2024-22371", + "reference_type": "", + "url": "https://camel.apache.org/security/CVE-2024-22371.html", + "severities": [ + { + "system": "generic_textual", + "value": "LOW", + "scoring_elements": "" + } + ] + } + ], + "date_published": null, + "weaknesses": [], + "url": "https://camel.apache.org/security/CVE-2024-22371.html" + }, + { + "aliases": "CVE-2024-23114", + "summary": "Apache Camel: Camel-CassandraQL: Unsafe Deserialization from CassandraAggregationRepository", + "affected_packages": [ + { + "package": { + "type": "maven", + "namespace": "org.apache.camel", + "name": "camel", + "version": "", + "qualifiers": "", + "subpath": "" + }, + "affected_version_range": "vers:maven/3.0.0|3.21.4|3.22.0|>=3.22.0|<3.22.1|3.22.1|4.0.0|>=4.0.0|<4.0.4|4.0.4|4.1.0|>=4.1.0|<4.4.0|4.4.0", + "fixed_version": ["3.21.4", "3.22.1", "4.0.4", "4.4.0"] + } + ], + "references": [ + { + "reference_id": "CVE-2024-23114", + "reference_type": "", + "url": "https://camel.apache.org/security/CVE-2024-23114.html", + "severities": [ + { + "system": "generic_textual", + "value": "HIGH", + "scoring_elements": "" + } + ] + } + ], + "date_published": null, + "weaknesses": [], + "url": "https://camel.apache.org/security/CVE-2024-23114.html" + } +] \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html b/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html new file mode 100644 index 000000000..818edb042 --- /dev/null +++ b/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html @@ -0,0 +1,16 @@ + + + CVE-2024-22371 + From 3.0.0 before 3.21.4, from 3.22.0 before 3.22.1, from 4.0.0 before 4.0.4, from 4.1.0 before 4.4.0 + 3.21.4, 3.22.1, 4.0.4 and 4.4.0 + LOW + Exposure of sensitive data by crafting a malicious EventFactory and providing a custom ExchangeCreatedEvent that exposes sensitive data + + + CVE-2024-23114 + From 3.0.0 before 3.21.4, from 3.22.0 before 3.22.1, from 4.0.0 before 4.0.4, from 4.1.0 before 4.4.0. + 3.21.4, 3.22.1, 4.0.4 and 4.4.0 + HIGH + Apache Camel: Camel-CassandraQL: Unsafe Deserialization from CassandraAggregationRepository + + \ No newline at end of file From a98b69a27f1fcf3c4247df0d36324d34350b406d Mon Sep 17 00:00:00 2001 From: kunalsz Date: Sun, 2 Mar 2025 16:56:35 -0500 Subject: [PATCH 2/4] fix linting Signed-off-by: kunalsz --- vulnerabilities/pipelines/apache_camel_importer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vulnerabilities/pipelines/apache_camel_importer.py b/vulnerabilities/pipelines/apache_camel_importer.py index 4aae34a97..eeda96875 100644 --- a/vulnerabilities/pipelines/apache_camel_importer.py +++ b/vulnerabilities/pipelines/apache_camel_importer.py @@ -219,4 +219,3 @@ def parse_apache_camel_versions(version_string): ) return MavenVersionRange(constraints=version_ranges) - From a3845daf084b5f6df3dcd07fc535c3b8a511ac57 Mon Sep 17 00:00:00 2001 From: kunalsz Date: Wed, 2 Apr 2025 05:34:22 -0400 Subject: [PATCH 3/4] Refactor the code Signed-off-by: kunalsz --- .../pipelines/apache_camel_importer.py | 72 ++++++++----------- 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/vulnerabilities/pipelines/apache_camel_importer.py b/vulnerabilities/pipelines/apache_camel_importer.py index eeda96875..3e0119482 100644 --- a/vulnerabilities/pipelines/apache_camel_importer.py +++ b/vulnerabilities/pipelines/apache_camel_importer.py @@ -47,74 +47,54 @@ class ApacheCamelImporterPipeline(VulnerableCodeBaseImporterPipeline): def __init__(self): super().__init__() - self.raw_data = None @classmethod def steps(cls): return ( - cls.fetch_html_response, cls.collect_and_store_advisories, cls.import_new_advisories, ) - # fetch the html content and saves in raw_data - def fetch_html_response(self): - try: - response = fetch_response(self.root_url).content - self.raw_data = BeautifulSoup(response, "html.parser") - except: - logger.error(f"Failed to fetch URL {self.root_url}") - # num of advisories def advisories_count(self) -> int: - return fetch_count_advisories(self.raw_data) + return fetch_count_advisories(self.root_url) - # parse the response data def collect_advisories(self) -> Iterable[AdvisoryData]: - adv_data = fetch_advisory_data(self.raw_data) + adv_data = fetch_advisory_data(self.root_url) for data in adv_data: yield to_advisory_data(data) -# fetch the html content -def fetch_html_response(url): - try: - response = fetch_response(url).content - soup = BeautifulSoup(response, "html.parser") - return soup - except: - logger.error(f"Failed to fetch URL {url}") +def fetch_count_advisories(url): + """Return the count of advisories""" -def fetch_count_advisories(soup): - # soup = fetch_html_response(url) + response = fetch_response(url).content + soup = BeautifulSoup(response, "html.parser") table = soup.find("tbody") advisory_len = len(table.find_all("tr")) + return advisory_len -# fetch the content from the html data -def fetch_advisory_data(soup): - advisories = [] - # soup = fetch_html_response(url) +def fetch_advisory_data(url): + """Fetch advisory data from the table and return a list containing all the advisories""" + response = fetch_response(url).content + soup = BeautifulSoup(response, "html.parser") - # Find the table containing the security advisories,ignoring the thead table = soup.find("tbody") - # Initialize a list to store the extracted data advisories = [] - # Iterate through each row in the table for row in table.find_all("tr"): columns = row.find_all("td") - if len(columns) == 5: # Ensure it's a row with data (not headers or empty rows) + if len(columns) == 5: #Ensure it's a row with data (not headers or empty rows) reference = columns[0].text.strip() affected = columns[1].text.strip() fixed = columns[2].text.strip() score = columns[3].text.strip() description = columns[4].text.strip() - # Append the extracted data to the list advisories.append( { "Reference": reference, @@ -130,10 +110,16 @@ def fetch_advisory_data(soup): def to_advisory_data(raw_data) -> AdvisoryData: """Parses extracted data to Advisory Data""" - # alias + alias = get_item(raw_data, "Reference") - # affected packages + version_pattern = re.compile(r"\b\d+\.\d+\.\d+\b") + fixed_version_out = get_item(raw_data, "Fixed") + fixed_versions = [] + for fixed_version in version_pattern.findall(fixed_version_out): + fixed_versions.append(MavenVersion(fixed_version)) + print(fixed_versions) + affected_packages = [] affected_package_string = get_item(raw_data, "Affected") affected_package = parse_apache_camel_versions(affected_package_string) @@ -145,18 +131,15 @@ def to_advisory_data(raw_data) -> AdvisoryData: name="camel", ), affected_version_range=affected_package, + fixed_version=fixed_versions ) ) - # fixed versions - version_pattern = re.compile(r"\b\d+\.\d+\.\d+\b") - fixed_version_out = get_item(raw_data, "Fixed") - fixed_versions = version_pattern.findall(fixed_version_out) - # score - score = get_item(raw_data, "Score") # words not numbers + score = get_item(raw_data, "Score") severity = VulnerabilitySeverity(system=SCORING_SYSTEMS["generic_textual"], value=score) - # Reference + + references = [] references.append( Reference( @@ -166,7 +149,7 @@ def to_advisory_data(raw_data) -> AdvisoryData: ) ) - # description + description = get_item(raw_data, "Description") return AdvisoryData( @@ -219,3 +202,8 @@ def parse_apache_camel_versions(version_string): ) return MavenVersionRange(constraints=version_ranges) + + +imp = ApacheCamelImporterPipeline() +adv = imp.collect_advisories() +print(next(adv)) \ No newline at end of file From dbb97d8da321e1b0a3af78bed35dc27aa5e2c08c Mon Sep 17 00:00:00 2001 From: kunalsz Date: Sun, 13 Apr 2025 22:52:06 -0400 Subject: [PATCH 4/4] Improve tests and add Docstrings and Doctests Signed-off-by: kunalsz --- .../pipelines/apache_camel_importer.py | 192 +++++++++++++++--- .../test_apache_camel_importer_pipeline.py | 154 ++++++++++---- .../apache_camel/apache_camel_expected.json | 53 +---- .../apache_camel/apache_camel_test.html | 31 ++- 4 files changed, 305 insertions(+), 125 deletions(-) diff --git a/vulnerabilities/pipelines/apache_camel_importer.py b/vulnerabilities/pipelines/apache_camel_importer.py index 3e0119482..a9c9386dc 100644 --- a/vulnerabilities/pipelines/apache_camel_importer.py +++ b/vulnerabilities/pipelines/apache_camel_importer.py @@ -6,20 +6,17 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +import json import logging import re -from datetime import datetime from datetime import timezone from typing import Iterable -from typing import Tuple -import requests +import dateparser from bs4 import BeautifulSoup from packageurl import PackageURL from univers.version_constraint import VersionConstraint from univers.version_range import MavenVersionRange -from univers.version_range import VersionRange -from univers.versions import GenericVersion from univers.versions import MavenVersion from vulnerabilities.importer import AdvisoryData @@ -27,7 +24,6 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.severity_systems import GENERIC from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import fetch_response from vulnerabilities.utils import get_item @@ -55,7 +51,6 @@ def steps(cls): cls.import_new_advisories, ) - def advisories_count(self) -> int: return fetch_count_advisories(self.root_url) @@ -65,12 +60,60 @@ def collect_advisories(self) -> Iterable[AdvisoryData]: yield to_advisory_data(data) +def fetch_html_response(url): + """ + Fetch and parse the HTML content of a given URL. + This function sends a request to the URL, retrieves the HTML content, + and parses it using BeautifulSoup. + Args: + url (str): The URL to fetch the HTML content from. + Returns: + A BeautifulSoup object representing the parsed HTML content. + """ + try: + response = fetch_response(url).content + soup = BeautifulSoup(response, "html.parser") + return soup + except: + logger.error(f"Failed to fetch URL {url}") -def fetch_count_advisories(url): - """Return the count of advisories""" - response = fetch_response(url).content - soup = BeautifulSoup(response, "html.parser") +def fetch_count_advisories(url): + """ + Gives the number of advisories from the given URL. + Advisories are identified by tags. + + Args: + url (str): The URL to fetch the advisories from. + + Returns: + int: The number of advisories found on the page. + + Doctests: + >>> from unittest.mock import patch + >>> from bs4 import BeautifulSoup + >>> from vulnerabilities.pipelines.apache_camel_importer import fetch_count_advisories + >>> mock_html = ''' + ... + ... + ... + ... + ... + ... + ... + ... + ...
Advisory 1
Advisory 2
Advisory 3
+ ... + ... + ... ''' + >>> with patch('vulnerabilities.pipelines.apache_camel_importer.fetch_html_response') as mock_fetch: + ... mock_fetch.return_value = BeautifulSoup(mock_html, "html.parser") + ... count = fetch_count_advisories("http://example.com") + >>> count + 3 + """ + + soup = fetch_html_response(url) table = soup.find("tbody") advisory_len = len(table.find_all("tr")) @@ -78,17 +121,60 @@ def fetch_count_advisories(url): def fetch_advisory_data(url): - """Fetch advisory data from the table and return a list containing all the advisories""" - response = fetch_response(url).content - soup = BeautifulSoup(response, "html.parser") - + """ + Fetch advisory data from the given URL. + + Args: + url (str): The URL to fetch the advisory data from. + + Returns: + list: A list of dictionaries, where each dictionary contains advisory details. + + Doctests: + >>> from unittest.mock import patch + >>> mock_html = ''' + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + ...
CVE-2025-30177Apache Camel 4.10.0 before 4.10.34.10.3MEDIUMCamel-Undertow Message Header Injection
CVE-2025-30178Apache Camel 4.8.0 before 4.8.64.8.6HIGHAnother vulnerability description
+ ... + ... + ... ''' + >>> with patch('vulnerabilities.pipelines.apache_camel_importer.fetch_html_response') as mock_fetch: + ... mock_fetch.return_value = BeautifulSoup(mock_html, "html.parser") + ... advisories = fetch_advisory_data("http://example.com") + >>> len(advisories) + 2 + >>> advisories[0]['Reference'] + 'CVE-2025-30177' + >>> advisories[0]['Affected'] + 'Apache Camel 4.10.0 before 4.10.3' + """ + + soup = fetch_html_response(url) table = soup.find("tbody") advisories = [] for row in table.find_all("tr"): columns = row.find_all("td") - if len(columns) == 5: #Ensure it's a row with data (not headers or empty rows) + if len(columns) == 5: reference = columns[0].text.strip() affected = columns[1].text.strip() fixed = columns[2].text.strip() @@ -108,9 +194,52 @@ def fetch_advisory_data(url): return advisories +""" +{ + 'Reference': 'CVE-2025-30177', + 'Affected': 'Apache Camel 4.10.0 before 4.10.3. Apache Camel 4.8.0 before 4.8.6.', + 'Fixed': '4.8.6 and 4.10.3', + 'Score': 'MEDIUM', + 'Description': 'Camel-Undertow Message Header Injection via Improper Filtering' +} +""" + + def to_advisory_data(raw_data) -> AdvisoryData: - """Parses extracted data to Advisory Data""" - + """ + Convert raw advisory data into an AdvisoryData object. + + Args: + raw_data (dict): A dictionary containing raw advisory data. + + Returns: + AdvisoryData: An object containing structured advisory information. + + Doctests: + >>> from unittest.mock import patch + >>> from vulnerabilities.pipelines.apache_camel_importer import fetch_date_published + >>> from vulnerabilities.pipelines.apache_camel_importer import to_advisory_data + >>> from vulnerabilities.importer import AdvisoryData + >>> raw_data = { + ... 'Reference': 'CVE-2025-30177', + ... 'Affected': 'Apache Camel 4.10.0 before 4.10.3. Apache Camel 4.8.0 before 4.8.6.', + ... 'Fixed': '4.8.6 and 4.10.3', + ... 'Score': 'MEDIUM', + ... 'Description': 'Camel-Undertow Message Header Injection via Improper Filtering' + ... } + >>> with patch('vulnerabilities.pipelines.apache_camel_importer.fetch_date_published') as mock_fetch_date_published: + ... mock_fetch_date_published.return_value = "2025-04-01T11:56:30.484000+00:00" + ... advisory = to_advisory_data(raw_data) + >>> advisory.aliases + ['CVE-2025-30177'] + >>> advisory.summary + 'Camel-Undertow Message Header Injection via Improper Filtering' + >>> len(advisory.affected_packages) + 1 + >>> advisory.affected_packages[0].package.name + 'camel' + """ + alias = get_item(raw_data, "Reference") version_pattern = re.compile(r"\b\d+\.\d+\.\d+\b") @@ -118,7 +247,6 @@ def to_advisory_data(raw_data) -> AdvisoryData: fixed_versions = [] for fixed_version in version_pattern.findall(fixed_version_out): fixed_versions.append(MavenVersion(fixed_version)) - print(fixed_versions) affected_packages = [] affected_package_string = get_item(raw_data, "Affected") @@ -131,14 +259,11 @@ def to_advisory_data(raw_data) -> AdvisoryData: name="camel", ), affected_version_range=affected_package, - fixed_version=fixed_versions ) ) - score = get_item(raw_data, "Score") severity = VulnerabilitySeverity(system=SCORING_SYSTEMS["generic_textual"], value=score) - references = [] references.append( @@ -149,19 +274,33 @@ def to_advisory_data(raw_data) -> AdvisoryData: ) ) - description = get_item(raw_data, "Description") + date_published = fetch_date_published(alias) + parsed_date_published = dateparser.parse(date_published).replace(tzinfo=timezone.utc) + return AdvisoryData( - aliases=alias, + aliases=[alias], summary=description, affected_packages=affected_packages, references=references, url=f"https://camel.apache.org/security/{alias}.html", + date_published=parsed_date_published, ) +def fetch_date_published(cve): + """Fetches Date of a CVE""" + + url = f"https://cveawg.mitre.org/api/cve/{cve}" + response = fetch_response(url).content + response = json.loads(response) + return response["cveMetadata"]["datePublished"] + + def parse_apache_camel_versions(version_string): + """Parse version strings from Apache Camel advisories into version constraints""" + version_ranges = [] # Handle "from X before Y" @@ -202,8 +341,3 @@ def parse_apache_camel_versions(version_string): ) return MavenVersionRange(constraints=version_ranges) - - -imp = ApacheCamelImporterPipeline() -adv = imp.collect_advisories() -print(next(adv)) \ No newline at end of file diff --git a/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py index b44121c72..38eaffb69 100644 --- a/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_apache_camel_importer_pipeline.py @@ -9,26 +9,27 @@ import datetime import json -import os from pathlib import Path -from unittest.mock import patch +from unittest import mock -import pytest -from bs4 import BeautifulSoup from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import MavenVersionRange +from univers.versions import MavenVersion from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.severity_systems import GENERIC +from vulnerabilities.pipelines import apache_camel_importer from vulnerabilities.severity_systems import ScoringSystem -from vulnerabilities.tests import util_tests -TEST_DATA = ( +EXPECTED_DATA = ( Path(__file__).parent.parent / "test_data" / "apache_camel" / "apache_camel_expected.json" ) -TEST_HTML = Path(__file__).parent.parent / "test_data" / "apache_camel" / "apache_camel_test.html" +ADVISORY_HTML_DATA = ( + Path(__file__).parent.parent / "test_data" / "apache_camel" / "apache_camel_test.html" +) def load_test_data(file): @@ -36,31 +37,112 @@ def load_test_data(file): return json.load(f) -def test_to_advisory_data(): - """test for parsing the html data""" - with open(TEST_HTML) as f: - mock_response = BeautifulSoup(f.read(), features="html.parser") - - expected = load_test_data(TEST_DATA) - - with patch("requests.get") as mock_response_get: - mock_response_get.return_value.text = mock_response - from vulnerabilities.pipelines.apache_camel_importer import ApacheCamelImporterPipeline - - pipeline = ApacheCamelImporterPipeline() - pipeline.raw_data = mock_response - results = [data.to_dict() for data in pipeline.collect_advisories()] # advisories - - for result, exp in zip( - sorted(results, key=lambda x: x["aliases"][0]), - sorted(expected, key=lambda x: x["aliases"][0]), - ): - assert result["aliases"] == exp["aliases"] - assert result["summary"] == exp["summary"] - assert len(result["affected_packages"]) == len(exp["affected_packages"]) - for r_pkg, e_pkg in zip( - sorted(result["affected_packages"], key=lambda x: x["affected_version_range"]), - sorted(exp["affected_packages"], key=lambda x: x["affected_version_range"]), - ): - assert r_pkg["package"]["name"] == e_pkg["package"]["name"] - assert r_pkg["package"]["type"] == e_pkg["package"]["type"] +@mock.patch("requests.get") +def test_fetch_advisory_data(mock_get): + """Test fetching and parsing of advisory data""" + + expected_data = load_test_data(EXPECTED_DATA) + + with open(ADVISORY_HTML_DATA, "r", encoding="utf-8") as file: + mock_html_content = file.read() + + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.text = mock_html_content + mock_response.content = mock_html_content.encode("utf-8") + + mock_get.return_value = mock_response + + advisory_data = apache_camel_importer.fetch_advisory_data("https://camel.apache.org/security/") + + assert advisory_data[0]["Reference"] == expected_data[0]["aliases"][0] + assert advisory_data[0]["Description"] == expected_data[0]["summary"] + + +@mock.patch("vulnerabilities.pipelines.apache_camel_importer.fetch_date_published") +@mock.patch("vulnerabilities.pipelines.apache_camel_importer.fetch_advisory_data") +def test_apache_camel_importer_pipeline_collect_advisories( + mock_fetch_advisory_data, mock_fetch_date_published +): + """Test the collect_advisories method in ApacheCamelImporterPipeline""" + + with open(ADVISORY_HTML_DATA, "r", encoding="utf-8") as file: + mock_html_content = file.read() + + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.text = mock_html_content + mock_response.content = mock_html_content.encode("utf-8") + + mock_fetch_advisory_data.return_value = [ + { + "Reference": "CVE-2025-30177", + "Affected": "Apache Camel 4.10.0 before 4.10.3. Apache Camel 4.8.0 before 4.8.6.", + "Fixed": "4.8.6 and 4.10.3", + "Score": "MEDIUM", + "Description": "Camel-Undertow Message Header Injection via Improper Filtering", + } + ] + + mock_fetch_date_published.return_value = "2025-04-01T11:56:30.484000+00:00" + + pipeline = apache_camel_importer.ApacheCamelImporterPipeline() + + generator = pipeline.collect_advisories() + advisories = list(generator) + + assert len(advisories) == 1 + assert advisories[0].aliases == advisory_data.aliases + assert advisories[0].date_published == advisory_data.date_published + assert advisories[0].summary == advisory_data.summary + assert advisories[0].affected_packages == advisory_data.affected_packages + + +advisory_data = AdvisoryData( + aliases=["CVE-2025-30177"], + summary="Camel-Undertow Message Header Injection via Improper Filtering", + affected_packages=[ + AffectedPackage( + package=PackageURL( + type="maven", + namespace="org.apache.camel", + name="camel", + version=None, + qualifiers={}, + subpath=None, + ), + affected_version_range=MavenVersionRange( + constraints=( + VersionConstraint(comparator="=", version=MavenVersion(string="4.8.0")), + VersionConstraint(comparator="=", version=MavenVersion(string="4.8.6")), + VersionConstraint(comparator="=", version=MavenVersion(string="4.10.0")), + VersionConstraint(comparator="=", version=MavenVersion(string="4.10.3")), + ) + ), + fixed_version=None, + ) + ], + references=[ + Reference( + reference_id="CVE-2025-30177", + reference_type="", + url="https://camel.apache.org/security/CVE-2025-30177.html", + severities=[ + VulnerabilitySeverity( + system=ScoringSystem( + identifier="generic_textual", + name="Generic textual severity rating", + url="", + notes="Severity for generic scoring systems. Contains generic textual values like High, Low etc", + ), + value="MEDIUM", + scoring_elements="", + published_at=None, + ) + ], + ) + ], + date_published=datetime.datetime(2025, 4, 1, 11, 56, 30, 484000, tzinfo=datetime.timezone.utc), + weaknesses=[], + url="https://camel.apache.org/security/CVE-2025-30177.html", +) diff --git a/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json b/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json index afa02e44c..646d6c817 100644 --- a/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json +++ b/vulnerabilities/tests/test_data/apache_camel/apache_camel_expected.json @@ -1,7 +1,7 @@ [ { - "aliases": "CVE-2024-22371", - "summary": "Exposure of sensitive data by crafting a malicious EventFactory and providing a custom ExchangeCreatedEvent that exposes sensitive data", + "aliases": ["CVE-2025-30177"], + "summary": "Camel-Undertow Message Header Injection via Improper Filtering", "affected_packages": [ { "package": { @@ -12,61 +12,26 @@ "qualifiers": "", "subpath": "" }, - "affected_version_range": "vers:maven/3.0.0|3.21.4|3.22.0|>=3.22.0|<3.22.1|3.22.1|4.0.0|>=4.0.0|<4.0.4|4.0.4|4.1.0|>=4.1.0|<4.4.0|4.4.0", - "fixed_version": ["3.21.4", "3.22.1", "4.0.4", "4.4.0"] + "affected_version_range": "vers:maven/4.8.0|4.8.6|4.10.0|4.10.3", + "fixed_version": null } ], "references": [ { - "reference_id": "CVE-2024-22371", + "reference_id": "CVE-2025-30177", "reference_type": "", - "url": "https://camel.apache.org/security/CVE-2024-22371.html", + "url": "https://camel.apache.org/security/CVE-2025-30177.html", "severities": [ { "system": "generic_textual", - "value": "LOW", + "value": "MEDIUM", "scoring_elements": "" } ] } ], - "date_published": null, + "date_published": "2025-04-01T11:56:30.484000+00:00", "weaknesses": [], - "url": "https://camel.apache.org/security/CVE-2024-22371.html" - }, - { - "aliases": "CVE-2024-23114", - "summary": "Apache Camel: Camel-CassandraQL: Unsafe Deserialization from CassandraAggregationRepository", - "affected_packages": [ - { - "package": { - "type": "maven", - "namespace": "org.apache.camel", - "name": "camel", - "version": "", - "qualifiers": "", - "subpath": "" - }, - "affected_version_range": "vers:maven/3.0.0|3.21.4|3.22.0|>=3.22.0|<3.22.1|3.22.1|4.0.0|>=4.0.0|<4.0.4|4.0.4|4.1.0|>=4.1.0|<4.4.0|4.4.0", - "fixed_version": ["3.21.4", "3.22.1", "4.0.4", "4.4.0"] - } - ], - "references": [ - { - "reference_id": "CVE-2024-23114", - "reference_type": "", - "url": "https://camel.apache.org/security/CVE-2024-23114.html", - "severities": [ - { - "system": "generic_textual", - "value": "HIGH", - "scoring_elements": "" - } - ] - } - ], - "date_published": null, - "weaknesses": [], - "url": "https://camel.apache.org/security/CVE-2024-23114.html" + "url": "https://camel.apache.org/security/CVE-2025-30177.html" } ] \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html b/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html index 818edb042..e39f7a8f7 100644 --- a/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html +++ b/vulnerabilities/tests/test_data/apache_camel/apache_camel_test.html @@ -1,16 +1,15 @@ - - - CVE-2024-22371 - From 3.0.0 before 3.21.4, from 3.22.0 before 3.22.1, from 4.0.0 before 4.0.4, from 4.1.0 before 4.4.0 - 3.21.4, 3.22.1, 4.0.4 and 4.4.0 - LOW - Exposure of sensitive data by crafting a malicious EventFactory and providing a custom ExchangeCreatedEvent that exposes sensitive data - - - CVE-2024-23114 - From 3.0.0 before 3.21.4, from 3.22.0 before 3.22.1, from 4.0.0 before 4.0.4, from 4.1.0 before 4.4.0. - 3.21.4, 3.22.1, 4.0.4 and 4.4.0 - HIGH - Apache Camel: Camel-CassandraQL: Unsafe Deserialization from CassandraAggregationRepository - - \ No newline at end of file + + + + CVE-2025-30177 + Apache Camel 4.10.0 before 4.10.3. Apache Camel 4.8.0 before 4.8.6. + 4.8.6 and 4.10.3 + MEDIUM + Camel-Undertow Message Header Injection via Improper Filtering + + + CVE-2025-29891 + Apache Camel 4.10.0 before 4.10.2. Apache Camel 4.8.0 before 4.8.5. Apache Camel 3.10.0 before 3.22.4. + 3.22.4, 4.8.5 and 4.10.2 HIGH Camel Message Header Injection through request parameters + + \ No newline at end of file