From 6da04d882ce5b0e32783ff76442e0d58b2251b1a Mon Sep 17 00:00:00 2001 From: Sam Kleiner Date: Mon, 29 Jul 2024 20:54:05 -0400 Subject: [PATCH 1/4] add entity address parser --- whoisit/parser.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/whoisit/parser.py b/whoisit/parser.py index 5b40b61..5e4cc11 100644 --- a/whoisit/parser.py +++ b/whoisit/parser.py @@ -25,10 +25,31 @@ def clean(s): return s.strip() +def clean_address(a): + if a is None: + a = '' + if isinstance(a, list): + a = ' '.join(a) + if not isinstance(a, str): + a = str(a) + return a.strip() + + class VCardArrayDataDict(TypedDict, total=False): name: str email: str tel: str + address: list[str] + + +class VCardArrayAddressDataDict(TypedDict, total=False): + po_box: str + ext_address: str + street_address: str + locality: str + region: str + postal_code: str + country: str class Parser: @@ -95,6 +116,16 @@ def parse_vcard_array(self, vcard) -> Optional[VCardArrayDataDict]: v_card_array_data_dict["email"] = clean(entry_label) elif entry_field == 'tel': v_card_array_data_dict["tel"] = clean(entry_label) + elif entry_field == 'adr' and isinstance(entry_label, list) and len(entry_label) == 7: + v_card_array_data_dict['address'] = VCardArrayAddressDataDict( + po_box= clean_address(entry_label[0]), + ext_address= clean_address(entry_label[1]), + street_address= clean_address(entry_label[2]), + locality= clean_address(entry_label[3]), + region= clean_address(entry_label[4]), + postal_code= clean_address(entry_label[5]), + country= clean_address(entry_label[6]) + ) return v_card_array_data_dict or None From 33a45d4e40c56e631e375703a78f786bf0b627d9 Mon Sep 17 00:00:00 2001 From: Sam Kleiner Date: Mon, 29 Jul 2024 20:54:15 -0400 Subject: [PATCH 2/4] update tests --- tests/test_parser.py | 167 +++++++++++++++++++++++++++++---- tests/test_public_interface.py | 22 +++++ 2 files changed, 173 insertions(+), 16 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index ef3a94e..5fa3914 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -58,7 +58,16 @@ def test_autnum_response_parser(self): 'type': 'entity', 'whois_server': 'whois.arin.net', 'name': 'Cloudflare, Inc.', - 'rir': 'arin' + 'rir': 'arin', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -72,7 +81,16 @@ def test_autnum_response_parser(self): 'name': 'Abuse', 'email': 'abuse@cloudflare.com', 'tel': '+1-650-319-8930', - 'rir': 'arin' + 'rir': 'arin', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -86,7 +104,16 @@ def test_autnum_response_parser(self): 'name': 'NOC', 'email': 'noc@cloudflare.com', 'tel': '+1-650-319-8930', - 'rir': 'arin' + 'rir': 'arin', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -100,7 +127,16 @@ def test_autnum_response_parser(self): 'name': 'Admin', 'email': 'rir@cloudflare.com', 'tel': '+1-650-319-8930', - 'rir': 'arin' + 'rir': 'arin', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -176,7 +212,16 @@ def test_domain_response_parser(self): 'url': 'https://rdap.norid.no/entity/reg42-NORID', 'type': 'entity', 'name': 'Domeneshop AS', - 'email': 'kundeservice@domeneshop.no' + 'email': 'kundeservice@domeneshop.no', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': 'Christian Krohgs gate 16', + 'locality': 'Oslo', + 'region': '', + 'postal_code': 'NO-0186', + 'country': 'NORWAY', + } } ] ) @@ -187,7 +232,16 @@ def test_domain_response_parser(self): 'url': 'https://rdap.norid.no/entity/DH21326R-NORID', 'type': 'entity', 'name': 'Domeneshop Hostmaster', - 'email': 'hostmaster@domeneshop.no' + 'email': 'hostmaster@domeneshop.no', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': 'NORWAY', + } } ] ) @@ -210,7 +264,7 @@ def test_domain_response_parser(self): { 'handle': '1647', 'type': 'entity', - 'name': 'Hosting Concepts B.V. d/b/a Registrar.eu', + 'name': 'Hosting Concepts B.V. d/b/a Registrar.eu' } ] ) @@ -255,7 +309,16 @@ def test_ip_response_parser(self): 'name': 'APNIC RESEARCH', 'email': 'research@apnic.net', 'tel': '+61-7-3858-3199', - 'rir': 'apnic' + 'rir': 'apnic', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -315,7 +378,16 @@ def test_ip_response_parser(self): 'name': 'APNIC RESEARCH', 'email': 'research@apnic.net', 'tel': '+61-7-3858-3199', - 'rir': 'apnic' + 'rir': 'apnic', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -348,7 +420,16 @@ def test_ip_response_parser(self): 'rir': 'arin', 'type': 'entity', 'url': 'https://rdap.arin.net/registry/entity/GOGL', - 'whois_server': 'whois.arin.net' + 'whois_server': 'whois.arin.net', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -362,7 +443,16 @@ def test_ip_response_parser(self): 'type': 'entity', 'url': 'https://rdap.arin.net/registry/entity/ZG39-ARIN', 'tel': '+1-650-253-0000', - 'whois_server': 'whois.arin.net' + 'whois_server': 'whois.arin.net', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -376,7 +466,16 @@ def test_ip_response_parser(self): 'type': 'entity', 'url': 'https://rdap.arin.net/registry/entity/ZG39-ARIN', 'tel': '+1-650-253-0000', - 'whois_server': 'whois.arin.net' + 'whois_server': 'whois.arin.net', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -390,7 +489,16 @@ def test_ip_response_parser(self): 'name': 'Abuse', 'email': 'network-abuse@google.com', 'tel': '+1-650-253-0000', - 'rir': 'arin' + 'rir': 'arin', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } }, { 'handle': 'ZG39-ARIN', @@ -400,7 +508,16 @@ def test_ip_response_parser(self): 'name': 'Google LLC', 'email': 'arin-contact@google.com', 'tel': '+1-650-253-0000', - 'rir': 'arin' + 'rir': 'arin', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -432,7 +549,16 @@ def test_ip_response_parser(self): 'rir': 'arin', 'type': 'entity', 'url': 'https://rdap.arin.net/registry/entity/GOGL', - 'whois_server': 'whois.arin.net' + 'whois_server': 'whois.arin.net', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) @@ -464,7 +590,16 @@ def test_entity_response_parser(self): 'type': 'entity', 'url': 'https://rdap.arin.net/registry/entity/GTS7-ARIN', 'tel': '+1-519-254-5115', - 'whois_server': 'whois.arin.net' + 'whois_server': 'whois.arin.net', + 'address': { + 'po_box': '', + 'ext_address': '', + 'street_address': '', + 'locality': '', + 'region': '', + 'postal_code': '', + 'country': '', + } } ] ) diff --git a/tests/test_public_interface.py b/tests/test_public_interface.py index 4b2678d..21a7630 100644 --- a/tests/test_public_interface.py +++ b/tests/test_public_interface.py @@ -91,6 +91,17 @@ def test_domain_interface_1(self): ) self.assertTrue(isinstance(resp["entities"]["registrar"], list)) self.assertTrue(len(resp["entities"]["registrar"]) > 0) + self.assertTrue(isinstance(resp["entities"]["registrant"], list)) + self.assertTrue(len(resp["entities"]["registrant"]) > 0) + self.assertEqual(resp["entities"]["registrant"][0]['address'], { + 'country': "US", + 'ext_address': '', + 'locality': '', + 'po_box': '', + 'postal_code': '', + 'region': 'CA', + 'street_address': '', + }) @responses.activate # @_recorder.record(file_path=RESPONSES / 'ip-v4-1.yaml') @@ -314,6 +325,17 @@ async def test_domain_interface_1(self): ) self.assertTrue(isinstance(resp["entities"]["registrar"], list)) self.assertTrue(len(resp["entities"]["registrar"]) > 0) + self.assertTrue(isinstance(resp["entities"]["registrant"], list)) + self.assertTrue(len(resp["entities"]["registrant"]) > 0) + self.assertEqual(resp["entities"]["registrant"][0]['address'], { + 'country': "US", + 'ext_address': '', + 'locality': '', + 'po_box': '', + 'postal_code': '', + 'region': 'CA', + 'street_address': '', + }) @pytest.mark.asyncio @pytest.mark.usefixtures("mock_httpx") From f3505dfee3d77a9b65dabf666f207e3460b4d9a1 Mon Sep 17 00:00:00 2001 From: Sam Kleiner Date: Mon, 29 Jul 2024 21:02:37 -0400 Subject: [PATCH 3/4] cleanup --- whoisit/parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/whoisit/parser.py b/whoisit/parser.py index 5e4cc11..755d1cb 100644 --- a/whoisit/parser.py +++ b/whoisit/parser.py @@ -118,13 +118,13 @@ def parse_vcard_array(self, vcard) -> Optional[VCardArrayDataDict]: v_card_array_data_dict["tel"] = clean(entry_label) elif entry_field == 'adr' and isinstance(entry_label, list) and len(entry_label) == 7: v_card_array_data_dict['address'] = VCardArrayAddressDataDict( - po_box= clean_address(entry_label[0]), - ext_address= clean_address(entry_label[1]), - street_address= clean_address(entry_label[2]), - locality= clean_address(entry_label[3]), - region= clean_address(entry_label[4]), - postal_code= clean_address(entry_label[5]), - country= clean_address(entry_label[6]) + po_box=clean_address(entry_label[0]), + ext_address=clean_address(entry_label[1]), + street_address=clean_address(entry_label[2]), + locality=clean_address(entry_label[3]), + region=clean_address(entry_label[4]), + postal_code=clean_address(entry_label[5]), + country=clean_address(entry_label[6]) ) return v_card_array_data_dict or None From b9d661ffb792bb5a69f7d04fd1c51ecbfe169302 Mon Sep 17 00:00:00 2001 From: Sam Kleiner Date: Mon, 29 Jul 2024 21:04:40 -0400 Subject: [PATCH 4/4] fix python 3.8 list type --- whoisit/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/whoisit/parser.py b/whoisit/parser.py index 755d1cb..d3e7bf1 100644 --- a/whoisit/parser.py +++ b/whoisit/parser.py @@ -5,7 +5,7 @@ IPv4Network, IPv6Network ) -from typing import Optional +from typing import Optional, List from typing_extensions import TypedDict from dateutil.parser import parse as dateutil_parse @@ -39,7 +39,7 @@ class VCardArrayDataDict(TypedDict, total=False): name: str email: str tel: str - address: list[str] + address: List[str] class VCardArrayAddressDataDict(TypedDict, total=False):