Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
[Issue #86] Download the search response as a CSV file
Browse files Browse the repository at this point in the history
  • Loading branch information
chouinar committed Jun 17, 2024
1 parent 354654c commit 103eab5
Show file tree
Hide file tree
Showing 7 changed files with 194 additions and 25 deletions.
40 changes: 38 additions & 2 deletions api/src/api/opportunities_v1/opportunity_routes.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import logging

from flask import Response

import src.adapters.db as db
import src.adapters.db.flask_db as flask_db
import src.adapters.search as search
import src.adapters.search.flask_opensearch as flask_opensearch
import src.api.opportunities_v1.opportunity_schemas as opportunity_schemas
import src.api.response as response
import src.util.datetime_util as datetime_util
from src.api.opportunities_v1.opportunity_blueprint import opportunity_blueprint
from src.auth.api_key_auth import api_key_auth
from src.logging.flask_logger import add_extra_data_to_current_request_logs
from src.services.opportunities_v1.get_opportunity import get_opportunity
from src.services.opportunities_v1.opportunity_to_csv import opportunity_to_csv
from src.services.opportunities_v1.search_opportunities import search_opportunities
from src.util.dict_util import flatten_dict

Expand Down Expand Up @@ -74,6 +78,21 @@
},
},
},
"example4": {
"summary": "CSV file response",
"value": {
"format": "csv",
"filters": {
"opportunity_status": {"one_of": ["forecasted", "posted"]},
},
"pagination": {
"order_by": "opportunity_id",
"page_offset": 1,
"page_size": 100,
"sort_direction": "ascending",
},
},
},
}


Expand All @@ -85,11 +104,16 @@
)
@opportunity_blueprint.output(opportunity_schemas.OpportunitySearchResponseV1Schema())
@opportunity_blueprint.auth_required(api_key_auth)
@opportunity_blueprint.doc(description=SHARED_ALPHA_DESCRIPTION)
@opportunity_blueprint.doc(
description=SHARED_ALPHA_DESCRIPTION,
# This adds a file response schema
# in addition to the one added by the output decorator
responses={200: {"content": {"application/octet-stream": {}}}}, # type: ignore
)
@flask_opensearch.with_search_client()
def opportunity_search(
search_client: search.SearchClient, search_params: dict
) -> response.ApiResponse:
) -> response.ApiResponse | Response:
add_extra_data_to_current_request_logs(flatten_dict(search_params, prefix="request.body"))
logger.info("POST /v1/opportunities/search")

Expand All @@ -105,6 +129,18 @@ def opportunity_search(
)
logger.info("Successfully fetched opportunities")

if search_params.get("format") == opportunity_schemas.SearchResponseFormat.CSV:
# Convert the response into a CSV and return the contents
output = opportunity_to_csv(opportunities)
timestamp = datetime_util.utcnow().strftime("%Y%m%d-%H%M%S")
return Response(
output.getvalue().encode("utf-8"),
content_type="text/csv",
headers={
"Content-Disposition": f"attachment; filename=opportunity_search_results_{timestamp}.csv"
},
)

return response.ApiResponse(
message="Success",
data=opportunities,
Expand Down
19 changes: 19 additions & 0 deletions api/src/api/opportunities_v1/opportunity_schemas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from enum import StrEnum

from src.api.schemas.extension import Schema, fields, validators
from src.api.schemas.response_schema import AbstractResponseSchema, PaginationMixinSchema
from src.api.schemas.search_schema import StrSearchSchemaBuilder
Expand All @@ -11,6 +13,11 @@
from src.pagination.pagination_schema import generate_pagination_schema


class SearchResponseFormat(StrEnum):
JSON = "json"
CSV = "csv"


class OpportunitySummaryV1Schema(Schema):
summary_description = fields.String(
allow_none=True,
Expand Down Expand Up @@ -204,6 +211,9 @@ class OpportunitySummaryV1Schema(Schema):
funding_categories = fields.List(fields.Enum(FundingCategory))
applicant_types = fields.List(fields.Enum(ApplicantType))

created_at = fields.DateTime(metadata={"description": "TODO"})
updated_at = fields.DateTime(metadata={"description": "TODO"})


class OpportunityAssistanceListingV1Schema(Schema):
program_title = fields.String(
Expand Down Expand Up @@ -378,6 +388,15 @@ class OpportunitySearchRequestV1Schema(Schema):
required=True,
)

format = fields.Enum(
SearchResponseFormat,
load_default=SearchResponseFormat.JSON,
metadata={
"description": "The format of the response",
"default": SearchResponseFormat.JSON,
},
)


class OpportunityGetResponseV1Schema(AbstractResponseSchema):
data = fields.Nested(OpportunityV1Schema())
Expand Down
93 changes: 93 additions & 0 deletions api/src/services/opportunities_v1/opportunity_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import csv
import io
from typing import Sequence

from src.util.dict_util import flatten_dict

CSV_FIELDS = [
"opportunity_id",
"opportunity_number",
"opportunity_title",
"opportunity_status",
"agency",
"category",
"category_explanation",
"post_date",
"close_date",
"close_date_description",
"archive_date",
"is_cost_sharing",
"expected_number_of_awards",
"estimated_total_program_funding",
"award_floor",
"award_ceiling",
"additional_info_url",
"additional_info_url_description",
"opportunity_assistance_listings",
"funding_instruments",
"funding_categories",
"funding_category_description",
"applicant_types",
"applicant_eligibility_description",
"agency_code",
"agency_name",
"agency_phone_number",
"agency_contact_description",
"agency_email_address",
"agency_email_address_description",
"is_forecast",
"forecasted_post_date",
"forecasted_close_date",
"forecasted_close_date_description",
"forecasted_award_date",
"forecasted_project_start_date",
"fiscal_year",
"created_at",
"updated_at",
# We put the description at the end as it's the longest value
# which can help improve readability of other fields
"summary_description",
]
# Same as above, but faster lookup
CSV_FIELDS_SET = set(CSV_FIELDS)


def _process_assistance_listing(assistance_listings: list[dict]) -> str:
return ";".join(
[f"{a['assistance_listing_number']}|{a['program_title']}" for a in assistance_listings]
)


def opportunity_to_csv(opportunities: Sequence[dict]) -> io.StringIO:
opportunities_to_write: list[dict] = []

for opportunity in opportunities:
opp = flatten_dict(opportunity)

out_opportunity = {}
for k, v in opp.items():
# Remove prefixes from nested data structures
k = k.removeprefix("summary.")
k = k.removeprefix("assistance_listings.")

# Remove fields we haven't configured
if k not in CSV_FIELDS_SET:
continue

if k == "opportunity_assistance_listings":
v = _process_assistance_listing(v)

if k in ["funding_instruments", "funding_categories", "applicant_types"]:
v = ";".join(v)

out_opportunity[k] = v

opportunities_to_write.append(out_opportunity)

output = io.StringIO()

writer = csv.DictWriter(output, fieldnames=CSV_FIELDS, quoting=csv.QUOTE_ALL)
writer.writeheader()
writer.writerows(opportunities_to_write)

return output
8 changes: 4 additions & 4 deletions api/src/services/opportunities_v1/search_opportunities.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ def _add_search_filters(builder: search.SearchQueryBuilder, filters: dict | None
def _add_aggregations(builder: search.SearchQueryBuilder) -> None:
# TODO - we'll likely want to adjust the total number of values returned, especially
# for agency as there could be hundreds of different agencies, and currently it's limited to 25.
builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_types"))
builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_types"))
builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instruments"))
builder.aggregation_terms("funding_category", _adjust_field_name("funding_categories"))
builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_type"))
builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_type"))
builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instrument"))
builder.aggregation_terms("funding_category", _adjust_field_name("funding_category"))
builder.aggregation_terms("agency", _adjust_field_name("agency_code"))


Expand Down
4 changes: 4 additions & 0 deletions api/tests/src/api/opportunities_v1/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def get_search_request(
applicant_type_one_of: list[ApplicantType] | None = None,
opportunity_status_one_of: list[OpportunityStatus] | None = None,
agency_one_of: list[str] | None = None,
format: str | None = None,
):
req = {
"pagination": {
Expand Down Expand Up @@ -55,6 +56,9 @@ def get_search_request(
if query is not None:
req["query"] = query

if format is not None:
req["format"] = format

return req


Expand Down
46 changes: 29 additions & 17 deletions api/tests/src/api/opportunities_v1/test_opportunity_route_search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
from datetime import date

import pytest
Expand All @@ -23,22 +24,42 @@


def validate_search_response(
search_response, expected_results: list[Opportunity], expected_status_code: int = 200
search_response,
expected_results: list[Opportunity],
expected_status_code: int = 200,
is_csv_response: bool = False,
):
assert search_response.status_code == expected_status_code

response_json = search_response.get_json()
expected_ids = [exp.opportunity_id for exp in expected_results]

opportunities = response_json["data"]
if is_csv_response:
reader = csv.DictReader(search_response.text.split("\n"))
opportunities = [record for record in reader]
else:
response_json = search_response.get_json()
opportunities = response_json["data"]

response_ids = [opp["opportunity_id"] for opp in opportunities]
expected_ids = [exp.opportunity_id for exp in expected_results]
response_ids = [int(opp["opportunity_id"]) for opp in opportunities]

assert (
response_ids == expected_ids
), f"Actual opportunities:\n {'\n'.join([opp['opportunity_title'] for opp in opportunities])}"


def call_search_and_validate(client, api_auth_token, search_request, expected_results):
resp = client.post(
"/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token}
)
validate_search_response(resp, expected_results)

search_request["format"] = "csv"
resp = client.post(
"/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token}
)
validate_search_response(resp, expected_results, is_csv_response=True)


def build_opp(
opportunity_title: str,
opportunity_number: str,
Expand Down Expand Up @@ -470,10 +491,7 @@ def setup_search_data(self, opportunity_index, opportunity_index_alias, search_c
def test_sorting_and_pagination_200(
self, client, api_auth_token, setup_search_data, search_request, expected_results
):
resp = client.post(
"/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token}
)
validate_search_response(resp, expected_results)
call_search_and_validate(client, api_auth_token, search_request, expected_results)

@pytest.mark.parametrize(
"search_request, expected_results",
Expand Down Expand Up @@ -690,10 +708,7 @@ def test_sorting_and_pagination_200(
def test_search_filters_200(
self, client, api_auth_token, setup_search_data, search_request, expected_results
):
resp = client.post(
"/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token}
)
validate_search_response(resp, expected_results)
call_search_and_validate(client, api_auth_token, search_request, expected_results)

@pytest.mark.parametrize(
"search_request, expected_results",
Expand Down Expand Up @@ -758,7 +773,4 @@ def test_search_query_200(
):
# This test isn't looking to validate opensearch behavior, just that we've connected fields properly and
# results being returned are as expected.
resp = client.post(
"/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token}
)
validate_search_response(resp, expected_results)
call_search_and_validate(client, api_auth_token, search_request, expected_results)
9 changes: 7 additions & 2 deletions api/tests/src/db/models/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class CustomProvider(BaseProvider):
AGENCY_CONTACT_DESC_FORMATS = [
"{{name}}\n{{job}}\n555-###-####\n{{email}}",
"{{relevant_url}} Contact Center\nHours of operation are 24 hours a day, 7 days a week.\n{{email}}",
"{{agency}} Webmaster\n{{email}}",
"Webmaster\n{{email}}",
]

# Rather than generate any random URL in our data, use those
Expand Down Expand Up @@ -367,7 +367,7 @@ class Meta:
no_declaration=None,
)

agency_code = factory.Faker("agency")
agency_code = factory.LazyAttribute(lambda s: s.opportunity.agency)
agency_name = factory.Faker("agency_name")
agency_phone_number = Generators.PhoneNumber
agency_contact_description = factory.Faker("agency_contact_description")
Expand Down Expand Up @@ -438,6 +438,11 @@ class Meta:
unique=True,
)

created_at = factory.Faker("date_time")
updated_at = factory.LazyAttribute(
lambda o: fake.date_time_between(start_date=o.created_at, end_date="now")
)

class Params:
# These are common overrides we might want for an opportunity summary.
# Simply pass the in `trait_name=True` to the factory when making an object
Expand Down

0 comments on commit 103eab5

Please sign in to comment.