Skip to content

Commit

Permalink
New Splitter trail; revamped io_utils (#51)
Browse files Browse the repository at this point in the history
* new splitter

* formatting

* revamped io utils

* formatting

* removed unused import
  • Loading branch information
soldni authored Jan 31, 2023
1 parent 2ab965f commit 96dbedf
Show file tree
Hide file tree
Showing 7 changed files with 375 additions and 123 deletions.
2 changes: 1 addition & 1 deletion examples/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)
>> sm.TextToWordsMapper(
fields=["question", "context", "answers"],
splitter="whitespace",
splitter="ws",
)
>> sm.SingleSequenceStriderMapper(
field_to_stride=["context"],
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "smashed"
version = "0.16.0"
version = "0.17.0"
description = """\
SMASHED is a toolkit designed to apply transformations to samples in \
datasets, such as fields extraction, tokenization, prompting, batching, \
Expand Down
11 changes: 6 additions & 5 deletions src/smashed/mappers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
BlingFireSplitter,
WhitespacePlusSplitter,
WhitespaceSplitter,
WhitespaceTrailSplitter,
)


Expand Down Expand Up @@ -69,15 +70,15 @@ class TextToWordsMapper(SingleBaseMapper):
def __init__(
self,
fields: Union[str, Sequence[str]],
splitter: Literal[
"blingfire", "whitespace", "whitespace_plus"
] = "whitespace_plus",
splitter: Literal["blingfire", "ws", "plus", "trail"] = "plus",
):
if splitter == "blingfire":
self.splitter = BlingFireSplitter()
elif splitter == "whitespace_plus":
elif splitter == "plus":
self.splitter = WhitespacePlusSplitter()
elif splitter == "whitespace":
elif splitter == "trail":
self.splitter = WhitespaceTrailSplitter()
elif splitter == "ws":
self.splitter = WhitespaceSplitter()
else:
raise ValueError(f"Unknown splitter: {splitter}")
Expand Down
Loading

0 comments on commit 96dbedf

Please sign in to comment.