Skip to content

Commit

Permalink
Merge pull request #5811 from uktrade/fix-ingestion-great-company-reg…
Browse files Browse the repository at this point in the history
…istration-number

Fix ingestion of great-company-registration-number
  • Loading branch information
bau123 authored Nov 21, 2024
2 parents 12ff05e + 2bb6899 commit 29c4ceb
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
13 changes: 11 additions & 2 deletions datahub/company_activity/tasks/ingest_great_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ def ingest_great_data(bucket, file):
logger.info(f'Ingesting file: {file} finished')


def validate_company_registration_number(company_registration_number):
company_registration_number_str = str(company_registration_number)
if len(company_registration_number_str) > 10:
return None
return company_registration_number_str


class GreatIngestionTask:
def __init__(self):
self._countries = None
Expand All @@ -45,7 +52,9 @@ def _already_ingested(self, id):
def _create_company(self, data, form_id):
company = Company.objects.create(
name=data.get('business_name', ''),
company_number=data.get('company_registration_number', ''),
company_number=validate_company_registration_number(
data.get('company_registration_number', ''),
),
turnover_range=self._get_turnover_range(data.get('annual_turnover')),
business_type=self._get_business_type(data.get('type')),
employee_range=self._get_business_size(data.get('number_of_employees')),
Expand All @@ -72,7 +81,7 @@ def _create_contact(self, data, company, form_id):

def _get_company(self, data, form_id):
company = self._get_company_by_companies_house_num(
data.get('company_registration_number'),
validate_company_registration_number(data.get('company_registration_number')),
)
if company:
return company
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from datahub.company_activity.models import GreatExportEnquiry, IngestedFile
from datahub.company_activity.tasks.constants import BUCKET, GREAT_PREFIX, REGION
from datahub.company_activity.tasks.ingest_great_data import (
GreatIngestionTask, ingest_great_data,
GreatIngestionTask,
ingest_great_data,
)
from datahub.company_activity.tests.factories import (
GreatExportEnquiryFactory,
Expand All @@ -25,7 +26,9 @@

@pytest.fixture
def test_file():
filepath = 'datahub/company_activity/tests/test_tasks/fixtures/great/20241023T000346.jsonl.gz'
filepath = (
'datahub/company_activity/tests/test_tasks/fixtures/great/20241023T000346.jsonl.gz'
)
return open(filepath, 'rb')


Expand Down Expand Up @@ -164,7 +167,7 @@ def test_company_name_mapping(self):
"id": "5250",
"created_at": "2024-09-19T14:00:34.069",
"data": {{
"company_registration_number": 994349,
"company_registration_number": "994349",
"business_name": "{company.name}"
}}
}}
Expand Down Expand Up @@ -563,7 +566,7 @@ def test_boolean_field_mapping(self):

@pytest.mark.django_db
@mock_aws
def test_long_field_values(self, test_file_path):
def test_long_field_values(self):
"""
Test that we can ingest records with long field values
"""
Expand All @@ -575,6 +578,7 @@ def test_long_field_values(self, test_file_path):
'that either need to be stored as TextFields if we need'
'the full value or truncated if we do not. Long long long.'
)

data = f"""
{{
"id": "5249",
Expand All @@ -589,10 +593,18 @@ def test_long_field_values(self, test_file_path):
"product_or_service_2": "{long_text}",
"product_or_service_3": "{long_text}",
"product_or_service_4": "{long_text}",
"product_or_service_5": "{long_text}"
"product_or_service_5": "{long_text}",
"company_registration_number": "{long_text}"
}}
}}
"""
task = GreatIngestionTask()
task.json_to_model(json.loads(data))
assert GreatExportEnquiry.objects.count() == initial_count + 1

result = GreatExportEnquiry.objects.get(form_id='5249').company_id

company_result = Company.objects.get(id=result)

assert company_result.company_number is None

0 comments on commit 29c4ceb

Please sign in to comment.