Skip to content

Commit

Permalink
Minor fixes (#47)
Browse files Browse the repository at this point in the history
* version bump

* access to pipeline

* removed strict dependency on promptsource

* adding a new script to install blingfire

* better message

* fixed metadata

* typos
  • Loading branch information
soldni authored Jan 17, 2023
1 parent 679401d commit 6a9a84a
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 144 deletions.
50 changes: 35 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
[project]
name = "smashed"
version = "0.15.4"
description = "Sequential MAppers for Sequences of HEterogeneous Dictionaries is a set of Python interfaces designed to apply transformations to samples in datasets, which are often implemented as sequences of dictionaries."
authors = [
{name = "Allen Institute for Artificial Intelligence", email = "[email protected]" },
{name = "Luca Soldaini", email = "[email protected]"}
]
version = "0.15.5"
description = """\
SMASHED is a toolkit designed to apply transformations to samples in \
datasets, such as fields extraction, tokenization, prompting, batching, \
and more. Supports datasets from Huggingface, torchdata iterables, or \
simple lists of dictionaries.\
"""
# authors = [
# {name = "Allen Institute for Artificial Intelligence", email = "[email protected]"},
# {name = "Luca Soldaini", email = "[email protected]"}
# ]
license = {text = "Apache-2.0"}
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.9"
dependencies = [
"torch>=1.9",
"transformers>=4.5",
Expand All @@ -17,6 +22,7 @@ dependencies = [
"ftfy>=6.1.1",
"platformdirs>=2.5.0",
"glom>=21.0.0",
"Jinja2>=3.0.3",
]
classifiers = [
"Development Status :: 4 - Beta",
Expand All @@ -32,20 +38,35 @@ keywords = [
"mappers",
"pytorch",
"torch",
"huggingfae",
"huggingface",
"transformers",
"datasets",
"dict",
"datset",
"pipeline",
"preprocessing",
"nlp",
"natural language processing",
"text",
"prompting"
"prompting",
"prefix tuning",
"in context learning"
]

[[project.authors]]
name = "Allen Institute for Artificial Intelligence"
email = "[email protected]"

[[project.authors]]
name = "Luca Soldaini"
email = "[email protected]"

[[project.authors]]
name = "Kyle Lo"
email = "[email protected]"

[[project.maintainers]]
name = "Luca Soldaini"
email = "[email protected]"

[project.urls]
"Homepage" = "https://github.com/allenai/smashed"
Expand All @@ -70,7 +91,7 @@ requires = [

[project.optional-dependencies]
dev = [
"springs>=1.8.3",
"springs>=1.9.1",
"black[jupyter]>=21.12b0",
"isort>=5.8.0",
"mypy>=0.971",
Expand All @@ -87,17 +108,16 @@ remote = [
"boto3>=1.25.5",
]
datasets = [
"datasets>=2.4.0",
"datasets>=2.8.0",
"dill>=0.3.0",
]
prompting = [
"promptsource>=0.2.3",
"blingfire>=0.1.8",
"PyYAML>=6.0.0",
]
torchdata = [
"torch>=1.12.1",
"torchdata>=0.4.1"
"torch>=1.13.1",
"torchdata>=0.5.1"
]
all = [
"smashed[dev]",
Expand Down
File renamed without changes.
8 changes: 3 additions & 5 deletions src/smashed/base/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,15 @@ class ChainableMapperMixIn(AbstractBaseMapper):
fingerprint: str
pipeline: Union["ChainableMapperMixIn", None]

def pip(self, n: Optional[int] = None):
def __getitem__(self, n: int) -> "ChainableMapperMixIn":
"""Return the n-th mapper in the pipeline, or the next if
n is not provided. If n is 0, return this mapper."""
if n is None:
return self.pipeline
elif n == 0:
if n == 0:
return self
elif self.pipeline is None:
raise IndexError("Pipeline index out of range")
else:
return self.pipeline.pip(n - 1)
return self.pipeline[n - 1]

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/smashed/base/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ def make_pipeline(
) -> M:
"""Make a pipeline of mappers."""
for mapper in rest_mappers:
first_mapper = first_mapper.chain(mapper)
first_mapper.chain(mapper)
return first_mapper
Loading

0 comments on commit 6a9a84a

Please sign in to comment.