diff --git a/alembic/versions/587c186d91ee_better_match_information.py b/alembic/versions/587c186d91ee_better_match_information.py deleted file mode 100644 index cacffaf8..00000000 --- a/alembic/versions/587c186d91ee_better_match_information.py +++ /dev/null @@ -1,29 +0,0 @@ -"""better-match-information - -Revision ID: 587c186d91ee -Revises: 6991bcb18f89 -Create Date: 2024-07-27 19:51:33.408128 - -""" - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "587c186d91ee" -down_revision = "6991bcb18f89" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column("scans", sa.Column("files", postgresql.JSONB(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("scans", "files") - # ### end Alembic commands ### diff --git a/alembic/versions/a62a93704798_add_distributions.py b/alembic/versions/a62a93704798_add_distributions.py deleted file mode 100644 index ea7407b3..00000000 --- a/alembic/versions/a62a93704798_add_distributions.py +++ /dev/null @@ -1,23 +0,0 @@ -"""add distributions - -Revision ID: a62a93704798 -Revises: 587c186d91ee -Create Date: 2024-08-11 08:12:42.354151 - -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "a62a93704798" -down_revision = "587c186d91ee" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - op.alter_column("scans", "files", new_column_name="distributions") - - -def downgrade() -> None: - op.alter_column("scans", "distributions", new_column_name="files") diff --git a/docs/source/database_schema.rst b/docs/source/database_schema.rst index faf3828b..9d49e73c 100644 --- a/docs/source/database_schema.rst +++ b/docs/source/database_schema.rst @@ -44,8 +44,7 @@ Database Schema pending_by text, finished_by text, commit_hash text, - fail_reason text, - files jsonb + fail_reason text ); ALTER TABLE ONLY public.download_urls diff --git a/pyproject.toml b/pyproject.toml index 3471ca92..2f71f965 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,4 +132,3 @@ omit = [ [tool.coverage.report] fail_under = 100 -exclude_also = ["if TYPE_CHECKING:"] diff --git a/src/mainframe/endpoints/package.py b/src/mainframe/endpoints/package.py index a95c30e0..7a19a38c 100644 --- a/src/mainframe/endpoints/package.py +++ b/src/mainframe/endpoints/package.py @@ -78,7 +78,6 @@ def submit_results( scan.score = result.score scan.finished_by = auth.subject scan.commit_hash = result.commit - scan.distributions = result.distributions # These are the rules that already have an entry in the database rules = session.scalars(select(Rule).where(Rule.name.in_(result.rules_matched))).all() diff --git a/src/mainframe/models/__init__.py b/src/mainframe/models/__init__.py index a0f2e447..7bb09cc1 100644 --- a/src/mainframe/models/__init__.py +++ b/src/mainframe/models/__init__.py @@ -1,27 +1 @@ """Database models.""" - -from typing import Optional, Any, Type -from pydantic import BaseModel -from sqlalchemy import Dialect, TypeDecorator -from sqlalchemy.dialects.postgresql import JSONB - - -class Pydantic[T: BaseModel](TypeDecorator[T]): - """TypeDecorator to convert between Pydantic models and JSONB.""" - - impl = JSONB - cache_ok = True - - def __init__(self, pydantic_type: Type[T]): - super().__init__() - self.pydantic_type = pydantic_type - - def process_bind_param(self, value: Optional[T], dialect: Dialect) -> dict[str, Any]: - if value: - return value.model_dump() - else: - return {} - - def process_result_value(self, value: Any, dialect: Dialect) -> Optional[T]: - if value: - return self.pydantic_type.model_validate(value) diff --git a/src/mainframe/models/orm.py b/src/mainframe/models/orm.py index a8211bbc..b2c6265f 100644 --- a/src/mainframe/models/orm.py +++ b/src/mainframe/models/orm.py @@ -27,9 +27,6 @@ relationship, ) -from mainframe.models import Pydantic -from mainframe.models.schemas import Distributions - class Base(MappedAsDataclass, DeclarativeBase, kw_only=True): pass @@ -102,8 +99,6 @@ class Scan(Base): commit_hash: Mapped[Optional[str]] = mapped_column(default=None) - distributions: Mapped[Optional[Distributions]] = mapped_column(Pydantic(Distributions), default=None) - Index(None, Scan.status, postgresql_where=or_(Scan.status == Status.QUEUED, Scan.status == Status.PENDING)) diff --git a/src/mainframe/models/schemas.py b/src/mainframe/models/schemas.py index a27a11ff..cad4d420 100644 --- a/src/mainframe/models/schemas.py +++ b/src/mainframe/models/schemas.py @@ -1,62 +1,10 @@ -from __future__ import annotations - import datetime from enum import Enum -from typing import TYPE_CHECKING, Annotated, Any, Optional - -from pydantic import BaseModel, Field, field_serializer, ConfigDict, RootModel - -if TYPE_CHECKING: - from mainframe.models.orm import Scan - -type MetaValue = int | float | bool | str | bytes - - -class Range(BaseModel): - """Represents the inclusive range in the source file that was matched.""" - - start: int - end: int - - -class Match(BaseModel): - """Represents a specific match by a pattern in a rule.""" - - range: Range - data: list[Annotated[int, Field(ge=0, lt=256)]] - - -class PatternMatch(BaseModel): - """Represents the data matched by a pattern inside a rule.""" +from typing import Any, Optional - identifier: str - matches: list[Match] +from pydantic import BaseModel, Field, field_serializer, ConfigDict - -class RuleMatch(BaseModel): - """Represents the matches of a rule on a file""" - - identifier: str - patterns: list[PatternMatch] - metadata: dict[str, MetaValue] - - -class File(BaseModel): - """Represents a file and the rule matches for it.""" - - path: str - matches: list[RuleMatch] - - -Files = list[File] - - -class Distribution(BaseModel): - download_url: str - files: Files - - -Distributions = RootModel[list[Distribution]] +from .orm import Scan class ServerMetadata(BaseModel): @@ -96,8 +44,6 @@ class Package(BaseModel): commit_hash: Optional[str] - distributions: Optional[Distributions] - @classmethod def from_db(cls, scan: Scan): return cls( @@ -118,7 +64,6 @@ def from_db(cls, scan: Scan): finished_at=scan.finished_at, finished_by=scan.finished_by, commit_hash=scan.commit_hash, - distributions=scan.distributions, ) @field_serializer( @@ -187,7 +132,6 @@ class PackageScanResult(PackageSpecifier): score: int = 0 inspector_url: Optional[str] = None rules_matched: list[str] = [] - distributions: Optional[Distributions] = None class PackageScanResultFail(PackageSpecifier): diff --git a/tests/test_package.py b/tests/test_package.py index 7a705d44..f3a5865e 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -20,18 +20,10 @@ from mainframe.json_web_token import AuthenticationData from mainframe.models.orm import Scan, Status from mainframe.models.schemas import ( - Distribution, - Distributions, - File, - Files, - Match, Package, PackageScanResult, PackageScanResultFail, PackageSpecifier, - PatternMatch, - Range, - RuleMatch, ) from mainframe.rules import Rules @@ -88,32 +80,6 @@ def test_package_lookup_rejects_invalid_combinations( assert e.value.status_code == 400 -def test_package_lookup_files(db_session: Session): - """Test that `lookup_package_info` returns detailed file information.""" - - range_ = Range(start=0, end=4) - match = Match(range=range_, data=[0xDE, 0xAD, 0xBE, 0xEF]) - pattern = PatternMatch(identifier="$pat", matches=[match]) - rule = RuleMatch(identifier="rule1", patterns=[pattern], metadata={"author": "remmy", "score": 5}) - file = File(path="dist1/a/b.py", matches=[rule]) - files = Files([file]) - distros = Distributions([Distribution(download_url="http://example.com", files=files)]) - scan = Scan( - name="abc", - version="1.0.0", - status=Status.FINISHED, - queued_by="remmy", - distributions=distros, - ) - - with db_session.begin(): - db_session.add(scan) - - package = lookup_package_info(db_session, name="abc", version="1.0.0")[0] - - assert package.distributions == distros - - def test_handle_success(db_session: Session, test_data: list[Scan], auth: AuthenticationData, rules_state: Rules): job = get_jobs(db_session, auth, rules_state, batch=1) @@ -122,14 +88,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen name = job.name version = job.version - range_ = Range(start=0, end=4) - match = Match(range=range_, data=[0xDE, 0xAD, 0xBE, 0xEF]) - pattern = PatternMatch(identifier="$pat", matches=[match]) - rule = RuleMatch(identifier="rule1", patterns=[pattern], metadata={"author": "remmy", "score": 5}) - file = File(path="dist1/a/b.py", matches=[rule]) - files = Files([file]) - distros = Distributions([Distribution(download_url="http://example.com", files=files)]) - body = PackageScanResult( name=job.name, version=job.version, @@ -137,7 +95,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen score=2, inspector_url="test inspector url", rules_matched=["a", "b", "c"], - distributions=distros, ) submit_results(body, db_session, auth) @@ -150,7 +107,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen assert record.score == 2 assert record.inspector_url == "test inspector url" assert {rule.name for rule in record.rules} == {"a", "b", "c"} - assert record.distributions == distros else: assert all(scan.status != Status.QUEUED for scan in test_data)