Skip to content

Commit

Permalink
Merge branch 'release/v0.0.2a0'
Browse files Browse the repository at this point in the history
  • Loading branch information
mdmmn378 committed Nov 25, 2023
2 parents b6353e3 + cc5b14a commit 29c15bc
Show file tree
Hide file tree
Showing 24 changed files with 478 additions and 281 deletions.
178 changes: 88 additions & 90 deletions .github/workflows/build-publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ on:
- main
workflow_dispatch:


jobs:
build_and_test:
name: Test
Expand All @@ -24,8 +23,8 @@ jobs:
macos:
runs-on: macos-latest
strategy:
matrix:
python-version: [ '3.7', '3.8', '3.9']
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand All @@ -47,77 +46,76 @@ jobs:
name: wheels
path: dist


linux:
runs-on: ubuntu-latest
strategy:
matrix:
target: [x86_64]
python-version: [ '3.7', '3.8', '3.9']
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Build source wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: auto
args: --release --out dist --sdist -m Cargo.toml -i ${{ matrix.python-version }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: auto
args: --release --out dist -m Cargo.toml -i ${{ matrix.python-version }}
- name: Install built wheel
if: matrix.target == 'x86_64'
run: |
pip install texy --no-index --find-links dist --force-reinstall
python -c "import texy"
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Build source wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: auto
args: --release --out dist --sdist -m Cargo.toml -i ${{ matrix.python-version }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: auto
args: --release --out dist -m Cargo.toml -i ${{ matrix.python-version }}
- name: Install built wheel
if: matrix.target == 'x86_64'
run: |
pip install texy --no-index --find-links dist --force-reinstall
python -c "import texy"
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist

musllinux:
runs-on: ubuntu-latest
strategy:
matrix:
target:
- x86_64-unknown-linux-musl
python-version: [ '3.7', '3.8', '3.9']
python-version: ["3.7", "3.8", "3.9"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: musllinux_1_2
args: --release --out dist -m Cargo.toml -i ${{ matrix.python-version }}
- name: Install built wheel
if: matrix.target == 'x86_64-unknown-linux-musl'
uses: addnab/docker-run-action@v3
with:
image: alpine:latest
options: -v ${{ github.workspace }}:/io -w /io
run: |
apk add py3-pip
pip3 install -U pip
pip3 install texy --no-index --find-links /io/dist/ --force-reinstall
python3 -c "import texy"
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: musllinux_1_2
args: --release --out dist -m Cargo.toml -i ${{ matrix.python-version }}
- name: Install built wheel
if: matrix.target == 'x86_64-unknown-linux-musl'
uses: addnab/docker-run-action@v3
with:
image: alpine:latest
options: -v ${{ github.workspace }}:/io -w /io
run: |
apk add py3-pip
pip3 install -U pip
pip3 install texy --no-index --find-links /io/dist/ --force-reinstall
python3 -c "import texy"
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist

musllinux-cross:
runs-on: ubuntu-latest
Expand All @@ -128,41 +126,41 @@ jobs:
arch: aarch64
- target: armv7-unknown-linux-musleabihf
arch: armv7
python-version: [ '3.7', '3.8', '3.9']
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
manylinux: musllinux_1_2
args: --release --out dist -m Cargo.toml -i ${{ matrix.python-version }}
- uses: uraimo/[email protected]
name: Install built wheel
with:
arch: ${{ matrix.platform.arch }}
distro: alpine_latest
githubToken: ${{ github.token }}
install: |
apk add py3-pip
pip3 install -U pip
run: |
pip3 install texy --no-index --find-links dist/ --force-reinstall
python3 -c "import texy"
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
manylinux: musllinux_1_2
args: --release --out dist -m Cargo.toml -i ${{ matrix.python-version }}
- uses: uraimo/[email protected]
name: Install built wheel
with:
arch: ${{ matrix.platform.arch }}
distro: alpine_latest
githubToken: ${{ github.token }}
install: |
apk add py3-pip
pip3 install -U pip
run: |
pip3 install texy --no-index --find-links dist/ --force-reinstall
python3 -c "import texy"
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist

release:
name: Release
runs-on: ubuntu-latest
# if: "startsWith(github.ref, 'refs/tags/')"
needs: [ macos, linux, musllinux, musllinux-cross]
needs: [macos, linux, musllinux, musllinux-cross]
steps:
- uses: actions/download-artifact@v3
with:
Expand All @@ -173,4 +171,4 @@ jobs:
uses: PyO3/maturin-action@v1
with:
command: upload
args: --skip-existing *
args: --skip-existing *
6 changes: 0 additions & 6 deletions .vscode/settings.json

This file was deleted.

9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "texy-process"
version = "0.0.1-alpha2"
version = "0.0.2-alpha"
edition = "2021"

[lib]
Expand All @@ -9,15 +9,16 @@ crate-type = ["cdylib", "rlib"]

[dependencies]
regex = "1.7.0"
pyo3 = { version = "0.17.3", features = ["abi3-py37", "extension-module"] }
lazy_static = "1.4.0"
serde_json = "1.0.89"
rayon = "1.6.1"
procspawn = "0.10.1"

[dependencies.pyo3]
version = "0.19.0"
features = ["abi3-py37"]

[features]
extension-module = ["pyo3/extension-module"]
default = ["extension-module"]

[package.metadata.maturin]
name = "texy._internal"
30 changes: 30 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
.SHELL := /bin/bash
.PHONY: all format lint test test-rs build-dev build-release

all: lint format test test-rs
@echo "All done!"

format:
@echo "Running formatter..."
isort .
black .

lint:
@echo "Running linter..."
-ruff . --fix

test:
@echo "Running tests..."
-pytest -vs tests/

test-rs:
@echo "Running tests..."
-cargo test -- --nocapture

build-dev:
@echo "Building dev..."
-maturin develop

build-release:
@echo "Building release..."
-maturin build --release
42 changes: 36 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,45 @@

[![Python](https://github.com/mdmmn378/texy/actions/workflows/build-publish.yaml/badge.svg)](https://github.com/mdmmn378/texy/actions/workflows/build-publish.yaml)

> Do text processing without getting your hands dirty
> A utility library for quickly cleaning texts
## Installation

> `pip install texy`
Python version in the dev environment: `3.11.5`

## Example:
> `pip install -U texy`
```py
from texy.pipelines import run_101
run_101(["💩🔙🙈🚘🚺🦷🦨🧒🏻🫸🕑🐠🧷🍈🐟🐡🦈🐬 <hello>what's up</hello>,"])
## Usage

Pipelines with parallelization in Rust:

```python
>>> from texy.pipelines import extreme_clean, strict_clean, relaxed_clean
>>> data = ["hello ;/ from the other side 😊 \t "]
print(extreme_clean(data))
>>> ['hello from the other side']
print(strict_clean(data))
>>> ['hello ;/ from the other side']
print(relaxed_clean(data))
>>> ['hello ;/ from the other side 😊']
```

Parallelize custom functions with Python Multiprocessing:

```python
from texy.pipelines import parallelize

def dummy(x):
return [i[0] for i in x]

data = ["a ", "b ", "c ", "d ", "e ", "f ", "g ", "h ?."] * 100
print(parallelize(dummy, data, 2))
```

## Actions

| Pipeline | Actions |
| --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `relaxed_clean` | `remove_newlines`, `remove_html`, `remove_xml`, `merge_spaces` |
| `strict_clean` | `remove_newlines`, `remove_urls`, `remove_emails`, `remove_html`, `remove_xml`, `remove_emoticons`, `remove_emojis`, `remove_infrequent_punctuations`, `merge_spaces` |
| `extreme_clean` | `remove_newlines`, `remove_urls`, `remove_emails`, `remove_html`, `remove_xml`, `remove_emoticons`, `remove_emojis`, `remove_all_punctuations`, `merge_spaces` |
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "texy"
description = "Supercharge text processing"
readme = "README.md"
requires-python = ">=3.6"
version = "0.0.1-alpha2"
version = "0.0.2-alpha"
authors = [
{ name="Mamunur Rahaman Mamun", email="[email protected]" },
]
Expand All @@ -13,8 +13,11 @@ homepage = "https://github.com/mdmmn378/texy"
repository = "https://github.com/mdmmn378/texy"

[build-system]
requires = ["maturin>=0.14,<0.15"]
requires = ["maturin>=1.0.0,<=2.0.0"]
build-backend = "maturin"

[tool.maturin]
features = ["pyo3/extension-module"]

[tool.isort]
profile = "black"
Loading

0 comments on commit 29c15bc

Please sign in to comment.