-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from mdmmn378/develop
Develop
- Loading branch information
Showing
14 changed files
with
189 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
pre-commit: | ||
commands: | ||
lint: | ||
run: make lint | ||
format: | ||
run: make format | ||
type-check: | ||
run: make type | ||
update: | ||
run: git update-index --again |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,8 @@ name = "texy" | |
description = "Supercharge text processing" | ||
readme = "README.md" | ||
requires-python = ">=3.6" | ||
version = "0.0.2-alpha" | ||
authors = [ | ||
{ name="Mamunur Rahaman Mamun", email="[email protected]" }, | ||
] | ||
version = "0.0.2" | ||
authors = [{ name = "Mamunur Rahaman Mamun", email = "[email protected]" }] | ||
|
||
[project.urls] | ||
homepage = "https://github.com/mdmmn378/texy" | ||
|
@@ -20,4 +18,4 @@ build-backend = "maturin" | |
features = ["pyo3/extension-module"] | ||
|
||
[tool.isort] | ||
profile = "black" | ||
profile = "black" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"include": [ | ||
"src" | ||
], | ||
|
||
"exclude": [ | ||
"**/node_modules", | ||
"**/__pycache__", | ||
], | ||
|
||
"ignore": [ | ||
"tests/**/*", | ||
], | ||
|
||
"defineConstant": { | ||
"DEBUG": true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
pub mod components; | ||
pub mod pipelines; | ||
pub mod utils; | ||
use pipelines::blocks::{extreme, relaxed, strict}; | ||
|
||
fn leak() { | ||
let data = [ | ||
"Hello, this is a sample text with\nnewlines.", | ||
"Visit https://example.com for more info!", | ||
"Send your feedback to [email protected]", | ||
"<p>This is an HTML paragraph.</p>", | ||
"<xml>This is some XML content.</xml>", | ||
"😃 Removing emoticons and emojis 😊 🚀", | ||
"This text has infrequent punctuations: !?#", | ||
"Multiple spaces between words.", | ||
]; | ||
let mut v: Vec<String> = Vec::new(); | ||
for _ in 0..100 { | ||
for i in &data { | ||
v.push(i.to_string()); | ||
} | ||
} | ||
println!("Data size: {}", v.len()); | ||
extreme(v.clone()); | ||
relaxed(v.clone()); | ||
strict(v.clone()); | ||
println!("Done!"); | ||
} | ||
|
||
fn main() { | ||
leak(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import copy | ||
import gc | ||
|
||
from memory_profiler import profile | ||
|
||
|
||
def dummy_clean(data): | ||
return copy.deepcopy(data) | ||
|
||
|
||
@profile | ||
def profile_extreme_clean(): | ||
print("Profiling extreme_clean") | ||
from texy.pipelines import extreme_clean | ||
|
||
data = [ | ||
"Hello, this is a sample text with\nnewlines.", | ||
"Visit https://example.com for more info!", | ||
"Send your feedback to [email protected]", | ||
"<p>This is an HTML paragraph.</p>", | ||
"<xml>This is some XML content.</xml>", | ||
"😃 Removing emoticons and emojis 😊 🚀", | ||
"This text has infrequent punctuations: !?#", | ||
"Multiple spaces between words.", | ||
] * 100000 | ||
cleaned_data = extreme_clean(data) | ||
|
||
del cleaned_data | ||
del data | ||
gc.collect() | ||
... | ||
|
||
|
||
@profile | ||
def profile_dummy_clean(): | ||
data = [ | ||
"Hello, this is a sample text with\nnewlines.", | ||
"Visit https://example.com for more info!", | ||
"Send your feedback to [email protected]", | ||
"<p>This is an HTML paragraph.</p>", | ||
"<xml>This is some XML content.</xml>", | ||
"😃 Removing emoticons and emojis 😊 🚀", | ||
"This text has infrequent punctuations: !?#", | ||
"Multiple spaces between words.", | ||
] * 100000 | ||
|
||
cleaned_data = dummy_clean(data) | ||
gc.collect() | ||
del cleaned_data | ||
del data | ||
|
||
|
||
if __name__ == "__main__": | ||
profile_extreme_clean() | ||
# profile_extreme_clean() | ||
# profile_dummy_clean() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "0.0.2-alpha" | ||
__version__ = "0.0.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters