diff --git a/api/openapi.generated.yml b/api/openapi.generated.yml index b17a53122..06968471a 100644 --- a/api/openapi.generated.yml +++ b/api/openapi.generated.yml @@ -108,6 +108,7 @@ paths: application/json: schema: $ref: '#/components/schemas/OpportunitySearchResponseV1' + application/octet-stream: {} description: Successful response '422': content: @@ -200,6 +201,20 @@ paths: page_offset: 1 page_size: 25 sort_direction: descending + example4: + summary: CSV file response + value: + format: csv + filters: + opportunity_status: + one_of: + - forecasted + - posted + pagination: + order_by: opportunity_id + page_offset: 1 + page_size: 100 + sort_direction: ascending security: - ApiKeyAuth: [] /v0.1/opportunities/search: @@ -894,6 +909,15 @@ components: - object allOf: - $ref: '#/components/schemas/OpportunityPaginationV1' + format: + default: !!python/object/apply:src.api.opportunities_v1.opportunity_schemas.SearchResponseFormat + - json + description: The format of the response + enum: + - json + - csv + type: + - string required: - pagination OpportunityAssistanceListingV1: @@ -1153,6 +1177,14 @@ components: - unrestricted type: - string + created_at: + type: string + format: date-time + description: When the opportunity summary was created + updated_at: + type: string + format: date-time + description: When the opportunity summary was last updated OpportunityV1: type: object properties: diff --git a/api/src/api/opportunities_v1/opportunity_routes.py b/api/src/api/opportunities_v1/opportunity_routes.py index 4bb4484d6..c1861a31f 100644 --- a/api/src/api/opportunities_v1/opportunity_routes.py +++ b/api/src/api/opportunities_v1/opportunity_routes.py @@ -1,15 +1,19 @@ import logging +from flask import Response + import src.adapters.db as db import src.adapters.db.flask_db as flask_db import src.adapters.search as search import src.adapters.search.flask_opensearch as flask_opensearch import src.api.opportunities_v1.opportunity_schemas as opportunity_schemas import src.api.response as response +import src.util.datetime_util as datetime_util from src.api.opportunities_v1.opportunity_blueprint import opportunity_blueprint from src.auth.api_key_auth import api_key_auth from src.logging.flask_logger import add_extra_data_to_current_request_logs from src.services.opportunities_v1.get_opportunity import get_opportunity, get_opportunity_versions +from src.services.opportunities_v1.opportunity_to_csv import opportunity_to_csv from src.services.opportunities_v1.search_opportunities import search_opportunities from src.util.dict_util import flatten_dict @@ -74,6 +78,21 @@ }, }, }, + "example4": { + "summary": "CSV file response", + "value": { + "format": "csv", + "filters": { + "opportunity_status": {"one_of": ["forecasted", "posted"]}, + }, + "pagination": { + "order_by": "opportunity_id", + "page_offset": 1, + "page_size": 100, + "sort_direction": "ascending", + }, + }, + }, } @@ -85,11 +104,16 @@ ) @opportunity_blueprint.output(opportunity_schemas.OpportunitySearchResponseV1Schema()) @opportunity_blueprint.auth_required(api_key_auth) -@opportunity_blueprint.doc(description=SHARED_ALPHA_DESCRIPTION) +@opportunity_blueprint.doc( + description=SHARED_ALPHA_DESCRIPTION, + # This adds a file response schema + # in addition to the one added by the output decorator + responses={200: {"content": {"application/octet-stream": {}}}}, # type: ignore +) @flask_opensearch.with_search_client() def opportunity_search( search_client: search.SearchClient, search_params: dict -) -> response.ApiResponse: +) -> response.ApiResponse | Response: add_extra_data_to_current_request_logs(flatten_dict(search_params, prefix="request.body")) logger.info("POST /v1/opportunities/search") @@ -105,6 +129,18 @@ def opportunity_search( ) logger.info("Successfully fetched opportunities") + if search_params.get("format") == opportunity_schemas.SearchResponseFormat.CSV: + # Convert the response into a CSV and return the contents + output = opportunity_to_csv(opportunities) + timestamp = datetime_util.utcnow().strftime("%Y%m%d-%H%M%S") + return Response( + output.getvalue().encode("utf-8"), + content_type="text/csv", + headers={ + "Content-Disposition": f"attachment; filename=opportunity_search_results_{timestamp}.csv" + }, + ) + return response.ApiResponse( message="Success", data=opportunities, diff --git a/api/src/api/opportunities_v1/opportunity_schemas.py b/api/src/api/opportunities_v1/opportunity_schemas.py index 58a6dac36..815f7b417 100644 --- a/api/src/api/opportunities_v1/opportunity_schemas.py +++ b/api/src/api/opportunities_v1/opportunity_schemas.py @@ -1,3 +1,5 @@ +from enum import StrEnum + from src.api.schemas.extension import Schema, fields, validators from src.api.schemas.response_schema import AbstractResponseSchema, PaginationMixinSchema from src.api.schemas.search_schema import StrSearchSchemaBuilder @@ -11,6 +13,11 @@ from src.pagination.pagination_schema import generate_pagination_schema +class SearchResponseFormat(StrEnum): + JSON = "json" + CSV = "csv" + + class OpportunitySummaryV1Schema(Schema): summary_description = fields.String( allow_none=True, @@ -208,6 +215,13 @@ class OpportunitySummaryV1Schema(Schema): funding_categories = fields.List(fields.Enum(FundingCategory)) applicant_types = fields.List(fields.Enum(ApplicantType)) + created_at = fields.DateTime( + metadata={"description": "When the opportunity summary was created"} + ) + updated_at = fields.DateTime( + metadata={"description": "When the opportunity summary was last updated"} + ) + class OpportunityAssistanceListingV1Schema(Schema): program_title = fields.String( @@ -382,6 +396,15 @@ class OpportunitySearchRequestV1Schema(Schema): required=True, ) + format = fields.Enum( + SearchResponseFormat, + load_default=SearchResponseFormat.JSON, + metadata={ + "description": "The format of the response", + "default": SearchResponseFormat.JSON, + }, + ) + class OpportunityGetResponseV1Schema(AbstractResponseSchema): data = fields.Nested(OpportunityV1Schema()) diff --git a/api/src/services/opportunities_v1/opportunity_to_csv.py b/api/src/services/opportunities_v1/opportunity_to_csv.py new file mode 100644 index 000000000..8be6f6c0d --- /dev/null +++ b/api/src/services/opportunities_v1/opportunity_to_csv.py @@ -0,0 +1,93 @@ +import csv +import io +from typing import Sequence + +from src.util.dict_util import flatten_dict + +CSV_FIELDS = [ + "opportunity_id", + "opportunity_number", + "opportunity_title", + "opportunity_status", + "agency", + "category", + "category_explanation", + "post_date", + "close_date", + "close_date_description", + "archive_date", + "is_cost_sharing", + "expected_number_of_awards", + "estimated_total_program_funding", + "award_floor", + "award_ceiling", + "additional_info_url", + "additional_info_url_description", + "opportunity_assistance_listings", + "funding_instruments", + "funding_categories", + "funding_category_description", + "applicant_types", + "applicant_eligibility_description", + "agency_code", + "agency_name", + "agency_phone_number", + "agency_contact_description", + "agency_email_address", + "agency_email_address_description", + "is_forecast", + "forecasted_post_date", + "forecasted_close_date", + "forecasted_close_date_description", + "forecasted_award_date", + "forecasted_project_start_date", + "fiscal_year", + "created_at", + "updated_at", + # We put the description at the end as it's the longest value + # which can help improve readability of other fields + "summary_description", +] +# Same as above, but faster lookup +CSV_FIELDS_SET = set(CSV_FIELDS) + + +def _process_assistance_listing(assistance_listings: list[dict]) -> str: + return ";".join( + [f"{a['assistance_listing_number']}|{a['program_title']}" for a in assistance_listings] + ) + + +def opportunity_to_csv(opportunities: Sequence[dict]) -> io.StringIO: + opportunities_to_write: list[dict] = [] + + for opportunity in opportunities: + opp = flatten_dict(opportunity) + + out_opportunity = {} + for k, v in opp.items(): + # Remove prefixes from nested data structures + k = k.removeprefix("summary.") + k = k.removeprefix("assistance_listings.") + + # Remove fields we haven't configured + if k not in CSV_FIELDS_SET: + continue + + if k == "opportunity_assistance_listings": + v = _process_assistance_listing(v) + + if k in ["funding_instruments", "funding_categories", "applicant_types"]: + v = ";".join(v) + + out_opportunity[k] = v + + opportunities_to_write.append(out_opportunity) + + output = io.StringIO() + + writer = csv.DictWriter(output, fieldnames=CSV_FIELDS, quoting=csv.QUOTE_ALL) + writer.writeheader() + writer.writerows(opportunities_to_write) + + return output diff --git a/api/src/services/opportunities_v1/search_opportunities.py b/api/src/services/opportunities_v1/search_opportunities.py index 92a71344c..e3252e90e 100644 --- a/api/src/services/opportunities_v1/search_opportunities.py +++ b/api/src/services/opportunities_v1/search_opportunities.py @@ -83,10 +83,10 @@ def _add_search_filters(builder: search.SearchQueryBuilder, filters: dict | None def _add_aggregations(builder: search.SearchQueryBuilder) -> None: # TODO - we'll likely want to adjust the total number of values returned, especially # for agency as there could be hundreds of different agencies, and currently it's limited to 25. - builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_types")) - builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_types")) - builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instruments")) - builder.aggregation_terms("funding_category", _adjust_field_name("funding_categories")) + builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_type")) + builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_type")) + builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instrument")) + builder.aggregation_terms("funding_category", _adjust_field_name("funding_category")) builder.aggregation_terms("agency", _adjust_field_name("agency_code")) diff --git a/api/tests/src/api/opportunities_v1/conftest.py b/api/tests/src/api/opportunities_v1/conftest.py index e96c14df7..402a636d0 100644 --- a/api/tests/src/api/opportunities_v1/conftest.py +++ b/api/tests/src/api/opportunities_v1/conftest.py @@ -37,6 +37,7 @@ def get_search_request( applicant_type_one_of: list[ApplicantType] | None = None, opportunity_status_one_of: list[OpportunityStatus] | None = None, agency_one_of: list[str] | None = None, + format: str | None = None, ): req = { "pagination": { @@ -70,6 +71,9 @@ def get_search_request( if query is not None: req["query"] = query + if format is not None: + req["format"] = format + return req diff --git a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py index e3becbfb3..f889d6390 100644 --- a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py +++ b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py @@ -1,3 +1,4 @@ +import csv from datetime import date import pytest @@ -23,22 +24,42 @@ def validate_search_response( - search_response, expected_results: list[Opportunity], expected_status_code: int = 200 + search_response, + expected_results: list[Opportunity], + expected_status_code: int = 200, + is_csv_response: bool = False, ): assert search_response.status_code == expected_status_code - response_json = search_response.get_json() + expected_ids = [exp.opportunity_id for exp in expected_results] - opportunities = response_json["data"] + if is_csv_response: + reader = csv.DictReader(search_response.text.split("\n")) + opportunities = [record for record in reader] + else: + response_json = search_response.get_json() + opportunities = response_json["data"] - response_ids = [opp["opportunity_id"] for opp in opportunities] - expected_ids = [exp.opportunity_id for exp in expected_results] + response_ids = [int(opp["opportunity_id"]) for opp in opportunities] assert ( response_ids == expected_ids ), f"Actual opportunities:\n {'\n'.join([opp['opportunity_title'] for opp in opportunities])}" +def call_search_and_validate(client, api_auth_token, search_request, expected_results): + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results) + + search_request["format"] = "csv" + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results, is_csv_response=True) + + def build_opp( opportunity_title: str, opportunity_number: str, @@ -470,10 +491,7 @@ def setup_search_data(self, opportunity_index, opportunity_index_alias, search_c def test_sorting_and_pagination_200( self, client, api_auth_token, setup_search_data, search_request, expected_results ): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - validate_search_response(resp, expected_results) + call_search_and_validate(client, api_auth_token, search_request, expected_results) @pytest.mark.parametrize( "search_request, expected_results", @@ -690,10 +708,7 @@ def test_sorting_and_pagination_200( def test_search_filters_200( self, client, api_auth_token, setup_search_data, search_request, expected_results ): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - validate_search_response(resp, expected_results) + call_search_and_validate(client, api_auth_token, search_request, expected_results) @pytest.mark.parametrize( "search_request, expected_results", @@ -758,7 +773,4 @@ def test_search_query_200( ): # This test isn't looking to validate opensearch behavior, just that we've connected fields properly and # results being returned are as expected. - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - validate_search_response(resp, expected_results) + call_search_and_validate(client, api_auth_token, search_request, expected_results) diff --git a/api/tests/src/db/models/factories.py b/api/tests/src/db/models/factories.py index fd6d5962d..9aa87663f 100644 --- a/api/tests/src/db/models/factories.py +++ b/api/tests/src/db/models/factories.py @@ -94,7 +94,7 @@ class CustomProvider(BaseProvider): AGENCY_CONTACT_DESC_FORMATS = [ "{{name}}\n{{job}}\n555-###-####\n{{email}}", "{{relevant_url}} Contact Center\nHours of operation are 24 hours a day, 7 days a week.\n{{email}}", - "{{agency}} Webmaster\n{{email}}", + "Webmaster\n{{email}}", ] # Rather than generate any random URL in our data, use those @@ -367,7 +367,7 @@ class Meta: no_declaration=None, ) - agency_code = factory.Faker("agency") + agency_code = factory.LazyAttribute(lambda s: s.opportunity.agency) agency_name = factory.Faker("agency_name") agency_phone_number = Generators.PhoneNumber agency_contact_description = factory.Faker("agency_contact_description")