Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

[Issue #86] Download the search response as a CSV file #87

Merged
merged 23 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
62ba7f1
[Issue #9] Setup opensearch locally
chouinar May 16, 2024
1922340
Some rearranging of files
chouinar May 17, 2024
649339c
Dependency fixes
chouinar May 17, 2024
2126171
Trying something else for the network setup?
chouinar May 17, 2024
8f80852
Simplify the networking/docker setup
chouinar May 17, 2024
f02f3d3
[Issue #10] Populate the search index from the opportunity tables
chouinar May 21, 2024
49c2a2b
Slightly tidying up
chouinar May 21, 2024
25edfab
[Issue #14] Setup utils for creating requests and parsing responses f…
chouinar May 22, 2024
1058287
Merge branch 'main' into chouinar/14-req-resp-tools
chouinar May 22, 2024
327f242
A lot of tests / comments / cleanup
chouinar May 28, 2024
eaba30d
Add an example
chouinar May 28, 2024
641ebd1
[Issue #16] Connect the API to use the search index
chouinar May 30, 2024
bba9a52
Docs and logging
chouinar May 30, 2024
3b9fec9
Update OpenAPI spec
nava-platform-bot May 30, 2024
01a5bc0
Adjust the allow_none logic
chouinar May 30, 2024
3d933e8
Update OpenAPI spec
nava-platform-bot May 30, 2024
28e106b
Merge branch 'main' into chouinar/14-req-resp-tools
chouinar Jun 7, 2024
354654c
Merge branch 'chouinar/14-req-resp-tools' into chouinar/16-actual-impl
chouinar Jun 7, 2024
103eab5
[Issue #86] Download the search response as a CSV file
chouinar Jun 17, 2024
12be289
Update OpenAPI spec
nava-platform-bot Jun 17, 2024
a3add37
Fix a TODO
chouinar Jun 17, 2024
2f2dc17
Update OpenAPI spec
nava-platform-bot Jun 17, 2024
54c6519
Merge branch 'main' into chouinar/86-csv-download
chouinar Jun 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions api/openapi.generated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/OpportunitySearchResponseV1'
application/octet-stream: {}
description: Successful response
'422':
content:
Expand Down Expand Up @@ -200,6 +201,20 @@ paths:
page_offset: 1
page_size: 25
sort_direction: descending
example4:
summary: CSV file response
value:
format: csv
filters:
opportunity_status:
one_of:
- forecasted
- posted
pagination:
order_by: opportunity_id
page_offset: 1
page_size: 100
sort_direction: ascending
security:
- ApiKeyAuth: []
/v0.1/opportunities/search:
Expand Down Expand Up @@ -894,6 +909,15 @@ components:
- object
allOf:
- $ref: '#/components/schemas/OpportunityPaginationV1'
format:
default: !!python/object/apply:src.api.opportunities_v1.opportunity_schemas.SearchResponseFormat
- json
description: The format of the response
enum:
- json
- csv
type:
- string
required:
- pagination
OpportunityAssistanceListingV1:
Expand Down Expand Up @@ -1153,6 +1177,14 @@ components:
- unrestricted
type:
- string
created_at:
type: string
format: date-time
description: When the opportunity summary was created
updated_at:
type: string
format: date-time
description: When the opportunity summary was last updated
OpportunityV1:
type: object
properties:
Expand Down
40 changes: 38 additions & 2 deletions api/src/api/opportunities_v1/opportunity_routes.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import logging

from flask import Response

import src.adapters.db as db
import src.adapters.db.flask_db as flask_db
import src.adapters.search as search
import src.adapters.search.flask_opensearch as flask_opensearch
import src.api.opportunities_v1.opportunity_schemas as opportunity_schemas
import src.api.response as response
import src.util.datetime_util as datetime_util
from src.api.opportunities_v1.opportunity_blueprint import opportunity_blueprint
from src.auth.api_key_auth import api_key_auth
from src.logging.flask_logger import add_extra_data_to_current_request_logs
from src.services.opportunities_v1.get_opportunity import get_opportunity, get_opportunity_versions
from src.services.opportunities_v1.opportunity_to_csv import opportunity_to_csv
from src.services.opportunities_v1.search_opportunities import search_opportunities
from src.util.dict_util import flatten_dict

Expand Down Expand Up @@ -74,6 +78,21 @@
},
},
},
"example4": {
"summary": "CSV file response",
"value": {
"format": "csv",
"filters": {
"opportunity_status": {"one_of": ["forecasted", "posted"]},
},
"pagination": {
"order_by": "opportunity_id",
"page_offset": 1,
"page_size": 100,
"sort_direction": "ascending",
},
},
},
}


Expand All @@ -85,11 +104,16 @@
)
@opportunity_blueprint.output(opportunity_schemas.OpportunitySearchResponseV1Schema())
@opportunity_blueprint.auth_required(api_key_auth)
@opportunity_blueprint.doc(description=SHARED_ALPHA_DESCRIPTION)
@opportunity_blueprint.doc(
description=SHARED_ALPHA_DESCRIPTION,
# This adds a file response schema
# in addition to the one added by the output decorator
responses={200: {"content": {"application/octet-stream": {}}}}, # type: ignore
)
@flask_opensearch.with_search_client()
def opportunity_search(
search_client: search.SearchClient, search_params: dict
) -> response.ApiResponse:
) -> response.ApiResponse | Response:
add_extra_data_to_current_request_logs(flatten_dict(search_params, prefix="request.body"))
logger.info("POST /v1/opportunities/search")

Expand All @@ -105,6 +129,18 @@ def opportunity_search(
)
logger.info("Successfully fetched opportunities")

if search_params.get("format") == opportunity_schemas.SearchResponseFormat.CSV:
# Convert the response into a CSV and return the contents
output = opportunity_to_csv(opportunities)
timestamp = datetime_util.utcnow().strftime("%Y%m%d-%H%M%S")
return Response(
output.getvalue().encode("utf-8"),
content_type="text/csv",
headers={
"Content-Disposition": f"attachment; filename=opportunity_search_results_{timestamp}.csv"
},
)

return response.ApiResponse(
message="Success",
data=opportunities,
Expand Down
23 changes: 23 additions & 0 deletions api/src/api/opportunities_v1/opportunity_schemas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from enum import StrEnum

from src.api.schemas.extension import Schema, fields, validators
from src.api.schemas.response_schema import AbstractResponseSchema, PaginationMixinSchema
from src.api.schemas.search_schema import StrSearchSchemaBuilder
Expand All @@ -11,6 +13,11 @@
from src.pagination.pagination_schema import generate_pagination_schema


class SearchResponseFormat(StrEnum):
JSON = "json"
CSV = "csv"


class OpportunitySummaryV1Schema(Schema):
summary_description = fields.String(
allow_none=True,
Expand Down Expand Up @@ -208,6 +215,13 @@ class OpportunitySummaryV1Schema(Schema):
funding_categories = fields.List(fields.Enum(FundingCategory))
applicant_types = fields.List(fields.Enum(ApplicantType))

created_at = fields.DateTime(
metadata={"description": "When the opportunity summary was created"}
)
updated_at = fields.DateTime(
metadata={"description": "When the opportunity summary was last updated"}
)


class OpportunityAssistanceListingV1Schema(Schema):
program_title = fields.String(
Expand Down Expand Up @@ -382,6 +396,15 @@ class OpportunitySearchRequestV1Schema(Schema):
required=True,
)

format = fields.Enum(
SearchResponseFormat,
load_default=SearchResponseFormat.JSON,
metadata={
"description": "The format of the response",
"default": SearchResponseFormat.JSON,
},
)


class OpportunityGetResponseV1Schema(AbstractResponseSchema):
data = fields.Nested(OpportunityV1Schema())
Expand Down
93 changes: 93 additions & 0 deletions api/src/services/opportunities_v1/opportunity_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import csv
import io
from typing import Sequence

from src.util.dict_util import flatten_dict

CSV_FIELDS = [
"opportunity_id",
"opportunity_number",
"opportunity_title",
"opportunity_status",
"agency",
"category",
"category_explanation",
"post_date",
"close_date",
"close_date_description",
"archive_date",
"is_cost_sharing",
"expected_number_of_awards",
"estimated_total_program_funding",
"award_floor",
"award_ceiling",
"additional_info_url",
"additional_info_url_description",
"opportunity_assistance_listings",
"funding_instruments",
"funding_categories",
"funding_category_description",
"applicant_types",
"applicant_eligibility_description",
"agency_code",
"agency_name",
"agency_phone_number",
"agency_contact_description",
"agency_email_address",
"agency_email_address_description",
"is_forecast",
"forecasted_post_date",
"forecasted_close_date",
"forecasted_close_date_description",
"forecasted_award_date",
"forecasted_project_start_date",
"fiscal_year",
"created_at",
"updated_at",
# We put the description at the end as it's the longest value
# which can help improve readability of other fields
"summary_description",
]
# Same as above, but faster lookup
CSV_FIELDS_SET = set(CSV_FIELDS)


def _process_assistance_listing(assistance_listings: list[dict]) -> str:
return ";".join(
[f"{a['assistance_listing_number']}|{a['program_title']}" for a in assistance_listings]
)


def opportunity_to_csv(opportunities: Sequence[dict]) -> io.StringIO:
opportunities_to_write: list[dict] = []

for opportunity in opportunities:
opp = flatten_dict(opportunity)

out_opportunity = {}
for k, v in opp.items():
# Remove prefixes from nested data structures
k = k.removeprefix("summary.")
k = k.removeprefix("assistance_listings.")

# Remove fields we haven't configured
if k not in CSV_FIELDS_SET:
continue

if k == "opportunity_assistance_listings":
v = _process_assistance_listing(v)

if k in ["funding_instruments", "funding_categories", "applicant_types"]:
v = ";".join(v)

out_opportunity[k] = v

opportunities_to_write.append(out_opportunity)

output = io.StringIO()

writer = csv.DictWriter(output, fieldnames=CSV_FIELDS, quoting=csv.QUOTE_ALL)
writer.writeheader()
writer.writerows(opportunities_to_write)

return output
8 changes: 4 additions & 4 deletions api/src/services/opportunities_v1/search_opportunities.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ def _add_search_filters(builder: search.SearchQueryBuilder, filters: dict | None
def _add_aggregations(builder: search.SearchQueryBuilder) -> None:
# TODO - we'll likely want to adjust the total number of values returned, especially
# for agency as there could be hundreds of different agencies, and currently it's limited to 25.
builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_types"))
builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_types"))
builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instruments"))
builder.aggregation_terms("funding_category", _adjust_field_name("funding_categories"))
builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_type"))
builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_type"))
builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instrument"))
builder.aggregation_terms("funding_category", _adjust_field_name("funding_category"))
builder.aggregation_terms("agency", _adjust_field_name("agency_code"))


Expand Down
4 changes: 4 additions & 0 deletions api/tests/src/api/opportunities_v1/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def get_search_request(
applicant_type_one_of: list[ApplicantType] | None = None,
opportunity_status_one_of: list[OpportunityStatus] | None = None,
agency_one_of: list[str] | None = None,
format: str | None = None,
):
req = {
"pagination": {
Expand Down Expand Up @@ -70,6 +71,9 @@ def get_search_request(
if query is not None:
req["query"] = query

if format is not None:
req["format"] = format

return req


Expand Down
Loading
Loading