Skip to content

Commit

Permalink
add python lib
Browse files Browse the repository at this point in the history
  • Loading branch information
gmweaver committed Jun 5, 2024
0 parents commit 1d7e09c
Show file tree
Hide file tree
Showing 14 changed files with 550 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Build

on: [push, pull_request]

jobs:
build_wheels:
name: Build wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13, macos-14]

steps:
- uses: actions/checkout@v4

# Used to host cibuildwheel
- uses: actions/setup-python@v5

- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.18.1

- name: Build wheels
run: python -m cibuildwheel --output-dir wheelhouse
# to supply options, put them in 'env', like:
# env:
# CIBW_SOME_OPTION: value

- uses: actions/upload-artifact@v4
with:
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
path: ./wheelhouse/*.whl
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__pycache__
*.egg-info
*.so
build
dist
.venv
.vscode
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "libpostal"]
path = libpostal
url = https://github.com/openvenues/libpostal.git
1 change: 1 addition & 0 deletions MANIFEST.IN
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include src/pyutils.h
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# pylibpostal

Python wrapper for open-source libpostal project. Custom libary built internally due to lack of continued support for current Python wrapper libraries.

## Usage

### Install libpostal C library

By default, libpostal will be installed when the Python package is installed, but without the data.

The commands run to install are below.

```
git clone https://github.com/openvenues/libpostal \
&& cd libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/tmp/libpostal_data_files --disable-data-download --disable-sse2 \
&& make -j4 \
&& make install \
&& ldconfig
```

- `--disable-data-download` disables downloading data when installing.
- `--disable-sse2` required for Mac M1.
- `ldconfig` only needed for linux.

See https://github.com/openvenues/libpostal?tab=readme-ov-file#installation-maclinux for more details.

### Downloading libpostal data

```
libpostal_data download all <data-dir>
```

## Contributing
To test the project, run `poetry test`. Test files may live together with the code or in a separate
directory, but in order for them to be discovered, they should end with `_test.py`
(e.g. `pylibpostal/something_test.py` or `pylibpostal_test/something_test.py`).
39 changes: 39 additions & 0 deletions install_libpostal.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

OS=$(uname -s)

case $OS in
Linux)
echo "Detected Linux"
# Linux-specific commands here
;;
Darwin)
echo "Detected macOS"
# macOS-specific commands here
;;
FreeBSD)
echo "Detected FreeBSD"
# FreeBSD-specific commands here
;;
*)
echo "OS not supported"
# Handle unsupported OS here
;;
esac

if [ "$OS" = "Linux" ]; then
sudo apt-get install clang curl autoconf automake libtool pkg-config
elif [ "$OS" = "Darwin" ]; then
brew install curl autoconf automake libtool pkg-config
fi

cd libpostal
git checkout tags/v1.1
./bootstrap.sh
./configure --datadir=/tmp/libpostal_data_files --disable-data-download --disable-sse2
make -j4
sudo make install

if [ "$OS" = "Linux" ]; then
sudo ldconfig
fi
1 change: 1 addition & 0 deletions libpostal
Submodule libpostal added at 8f2066
Empty file added libpypostal/__init__.py
Empty file.
69 changes: 69 additions & 0 deletions libpypostal/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Python bindings to libpostal parse_address."""
from enum import Enum
from typing import Dict, List, Optional, Tuple

from libpypostal import _parser # type: ignore # pylint: disable=no-name-in-module


class LibpostalAddressComponent(str, Enum):
"""Libpostal address component."""

CATEGORY = "category"
CITY = "city"
CITY_DISTRICT = "city_district"
COUNTRY = "country"
COUNTRY_REGION = "country_region"
ENTRANCE = "entrance"
HOUSE = "house"
HOUSE_NUMBER = "house_number"
ISLAND = "island"
LEVEL = "level"
NEAR = "near"
PO_BOX = "po_box"
POSTCODE = "postcode"
ROAD = "road"
STAIRCASE = "staircase"
STATE = "state"
STATE_DISTRICT = "state_district"
SUBURB = "suburb"
UNIT = "unit"
WORLD_REGION = "world_region"


def parse_address(
address: str, language: Optional[str] = None, country_code: Optional[str] = None
) -> Dict[str, List[str]]:
"""Parses address into components.
Arguments:
address: the address to parse.
language: optional language code to help localize parsing.
country_code: optional country code to help localize parsing.
Returns:
Dictionary of address components with format {<address component>: parsed value}.
Generally, address component lists will only have one element, but there is a
possibility of multiple matches. Address components not found in the input are
set to empty lists.
"""
address_component_tuples: List[
Tuple[str, str]
] = _parser.parse_address( # pylint: disable=c-extension-no-member
address, language=language, country=country_code
)

parsed_address_components: Dict[str, List[str]] = {}

for address_component_tuple in address_component_tuples:
component_value, component_name = address_component_tuple

if component_name in parsed_address_components:
parsed_address_components[component_name].append(component_value)
else:
parsed_address_components[component_name] = [component_value]

for libpostal_address_component in LibpostalAddressComponent:
if libpostal_address_component.value not in parsed_address_components:
parsed_address_components[libpostal_address_component.value] = []

return parsed_address_components
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[project]
name = "libpypostal"
version = "1.1.0"

[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
include = ["libpypostal*"]
38 changes: 38 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import ctypes.util
import subprocess
import tempfile

from setuptools import setup, Extension


def _libpostal_installed() -> bool:
"""Checks if libpostal is installed."""
return ctypes.util.find_library("postal") is not None


def _install_libpostal() -> None:
"""Installs libpostal."""
with tempfile.TemporaryDirectory() as tempdir:
subprocess.run(
["./install_libpostal.sh", tempdir],
text=True,
capture_output=True,
check=True,
)


if not _libpostal_installed():
_install_libpostal()

ext_modules = [
Extension(
"libpypostal._parser",
sources=["src/pyparser.c", "src/pyutils.c"],
libraries=["postal"],
include_dirs=["/usr/local/include", "src/"],
library_dirs=["/usr/local/lib"],
extra_compile_args=["-std=c99"],
),
]

setup(ext_modules=ext_modules)
Loading

0 comments on commit 1d7e09c

Please sign in to comment.