Skip to content

Commit

Permalink
UniProt datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
0x00b1 committed May 20, 2024
1 parent 5e9c661 commit 5867d0d
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 4 deletions.
6 changes: 5 additions & 1 deletion src/beignet/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .__uniref_dataset import _UniRefDataset
from .__uniprot_dataset import _UniRefDataset
from ._fasta_dataset import FASTADataset
from ._sequence_dataset import SequenceDataset
from ._sized_sequence_dataset import SizedSequenceDataset
from ._swissprot_dataset import SwissProtDataset
from ._trembl_dataset import TrEMBLDataset
from ._uniref50_dataset import UniRef50Dataset
from ._uniref90_dataset import UniRef90Dataset
from ._uniref100_dataset import UniRef100Dataset
Expand All @@ -10,6 +12,8 @@
"FASTADataset",
"SequenceDataset",
"SizedSequenceDataset",
"SwissProtDataset",
"TrEMBLDataset",
"UniRef100Dataset",
"UniRef50Dataset",
"UniRef90Dataset",
Expand Down
File renamed without changes.
39 changes: 39 additions & 0 deletions src/beignet/datasets/_swissprot_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from os import PathLike
from typing import Callable

from beignet.transforms import Transform

from .__uniprot_dataset import _UniRefDataset


class SwissProtDataset(_UniRefDataset):
def __init__(
self,
root: str | PathLike | None = None,
*,
transform: Callable | Transform | None = None,
target_transform: Callable | Transform | None = None,
) -> None:
"""
Parameters
----------
root : str | PathLike, optional
Root directory where the dataset subdirectory exists or, if
`download` is `True`, the directory where the dataset subdirectory
will be created and the dataset downloaded.
transform : Callable, optional
A `Callable` or `Transform` that that maps a sequence to a
transformed sequence (default: `None`).
target_transform : Callable, optional
A `Callable` or `Transform` that maps a target to a transformed
target (default: `None`).
"""
super().__init__(
"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz",
root,
"md5:0766df3e5785fc5f1cfc496aa89e86ad",
transform=transform,
target_transform=target_transform,
)
39 changes: 39 additions & 0 deletions src/beignet/datasets/_trembl_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from os import PathLike
from typing import Callable

from beignet.transforms import Transform

from .__uniprot_dataset import _UniRefDataset


class TrEMBLDataset(_UniRefDataset):
def __init__(
self,
root: str | PathLike | None = None,
*,
transform: Callable | Transform | None = None,
target_transform: Callable | Transform | None = None,
) -> None:
"""
Parameters
----------
root : str | PathLike, optional
Root directory where the dataset subdirectory exists or, if
`download` is `True`, the directory where the dataset subdirectory
will be created and the dataset downloaded.
transform : Callable, optional
A `Callable` or `Transform` that that maps a sequence to a
transformed sequence (default: `None`).
target_transform : Callable, optional
A `Callable` or `Transform` that maps a target (a cluster
identifier) to a transformed target (default: `None`).
"""
super().__init__(
"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz",
root,
"md5:56f0f20479a88d28fb51db7ef4df90ed",
transform=transform,
target_transform=target_transform,
)
2 changes: 1 addition & 1 deletion src/beignet/datasets/_uniref100_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Callable

from ..transforms import Transform
from .__uniref_dataset import _UniRefDataset
from .__uniprot_dataset import _UniRefDataset


class UniRef100Dataset(_UniRefDataset):
Expand Down
2 changes: 1 addition & 1 deletion src/beignet/datasets/_uniref50_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from beignet.transforms import Transform

from .__uniref_dataset import _UniRefDataset
from .__uniprot_dataset import _UniRefDataset


class UniRef50Dataset(_UniRefDataset):
Expand Down
2 changes: 1 addition & 1 deletion src/beignet/datasets/_uniref90_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Callable

from ..transforms import Transform
from .__uniref_dataset import _UniRefDataset
from .__uniprot_dataset import _UniRefDataset


class UniRef90Dataset(_UniRefDataset):
Expand Down

0 comments on commit 5867d0d

Please sign in to comment.