From be4bd7a3868c49c07542277f66a4ae4a745777a9 Mon Sep 17 00:00:00 2001 From: Hao-Wen Dong Date: Fri, 15 Jan 2021 04:48:02 -0800 Subject: [PATCH] Make overwrite default to False (#34) --- docs/_modules/muspy/datasets/base.html | 65 +++++++++++--------------- docs/datasets/base.html | 15 +++--- docs/datasets/local.html | 6 +-- docs/datasets/remote.html | 37 ++++++++------- docs/doc/datasets.html | 52 ++++++++++----------- docs/doc/muspy.html | 52 ++++++++++----------- muspy/datasets/base.py | 65 +++++++++++--------------- muspy/datasets/utils.py | 22 +-------- 8 files changed, 137 insertions(+), 177 deletions(-) diff --git a/docs/_modules/muspy/datasets/base.html b/docs/_modules/muspy/datasets/base.html index d41f945a..41091bd6 100644 --- a/docs/_modules/muspy/datasets/base.html +++ b/docs/_modules/muspy/datasets/base.html @@ -268,7 +268,7 @@

Source code for muspy.datasets.base

 
[docs] def save( self, root: Union[str, Path], - kind: Optional[str] = "json", + kind: str = "json", n_jobs: int = 1, ignore_exceptions: bool = True, verbose: bool = True, @@ -325,7 +325,7 @@

Source code for muspy.datasets.base

         n_digits = len(str(len(self)))
 
         if verbose:
-            print("Start converting and saving the dataset.")
+            print("Converting and saving the dataset...")
         if n_jobs == 1:
             count = 0
             for idx in tqdm(range(len(self))):  # type: ignore
@@ -343,7 +343,7 @@ 

Source code for muspy.datasets.base

             )
             count = results.count(True)
         if verbose:
-            print(f"{count} out of {len(self)} files successfully saved.")
+            print(f"Successfully saved {count} out of {len(self)} files.")
         (root / ".muspy.success").touch(exist_ok=True)
[docs] def split( @@ -596,8 +596,7 @@

Source code for muspy.datasets.base

     download_and_extract : bool, optional
         Whether to download and extract the dataset. Defaults to False.
     overwrite : bool, optional
-        Whether to overwrite existing downloaded files. Defaults to
-        True.
+        Whether to overwrite existing file(s). Defaults to False.
     cleanup : bool, optional
         Whether to remove the source archive(s). Defaults to False.
     verbose : bool, optional
@@ -652,18 +651,11 @@ 

Source code for muspy.datasets.base

     def __init__(
         self,
         root: Union[str, Path],
-        download_and_extract: Optional[bool] = None,
-        overwrite: Optional[bool] = None,
-        cleanup: Optional[bool] = None,
+        download_and_extract: bool = False,
+        overwrite: bool = False,
+        cleanup: bool = False,
         verbose: bool = True,
     ):
-        if download_and_extract is None:
-            download_and_extract = False
-        if overwrite is None:
-            overwrite = True
-        if cleanup is None:
-            cleanup = False
-
         super().__init__()
         self.root = Path(root).expanduser().resolve()
         self.root.mkdir(exist_ok=True)
@@ -705,14 +697,14 @@ 

Source code for muspy.datasets.base

         return True
[docs] def download( - self: RemoteDatasetType, overwrite: bool = True, verbose: bool = True + self: RemoteDatasetType, overwrite: bool = False, verbose: bool = True ) -> RemoteDatasetType: """Download the source datasets. Parameters ---------- overwrite : bool, optional - Whether to overwrite existing files. Defaults to True. + Whether to overwrite existing file(s). Defaults to False. verbose : bool, optional Whether to be verbose. Defaults to True. @@ -762,7 +754,7 @@

Source code for muspy.datasets.base

 
 
[docs] def download_and_extract( self: RemoteDatasetType, - overwrite: bool = True, + overwrite: bool = False, cleanup: bool = False, verbose: bool = True, ) -> RemoteDatasetType: @@ -771,9 +763,9 @@

Source code for muspy.datasets.base

         Parameters
         ----------
         overwrite : bool, optional
-            Whether to overwrite existing files. Defaults to True.
+            Whether to overwrite existing file(s). Defaults to False.
         cleanup : bool, optional
-            Whether to remove the source archive. Defaults to False.
+            Whether to remove the source archive(s). Defaults to False.
         verbose : bool, optional
             Whether to be verbose. Defaults to True.
 
@@ -917,6 +909,8 @@ 

Source code for muspy.datasets.base

     ----------
     download_and_extract : bool, optional
         Whether to download and extract the dataset. Defaults to False.
+    overwrite : bool, optional
+        Whether to overwrite existing file(s). Defaults to False.
     cleanup : bool, optional
         Whether to remove the source archive(s). Defaults to False.
 
@@ -932,8 +926,8 @@ 

Source code for muspy.datasets.base

         self,
         root: Union[str, Path],
         download_and_extract: bool = False,
-        overwrite: Optional[bool] = None,
-        cleanup: Optional[bool] = None,
+        overwrite: bool = False,
+        cleanup: bool = False,
         kind: str = "json",
         verbose: bool = True,
     ):
@@ -981,7 +975,7 @@ 

Source code for muspy.datasets.base

         be helpful if some source files are known to be corrupted.
         Defaults to True.
     use_converted : bool, optional
-        Force to disable on-the-fly mode and use stored converted data
+        Force to disable on-the-fly mode and use converted data.
 
     Important
     ---------
@@ -1013,17 +1007,12 @@ 

Source code for muspy.datasets.base

     def __init__(
         self,
         root: Union[str, Path],
-        convert: Optional[bool] = None,
+        convert: bool = False,
         kind: str = "json",
         n_jobs: int = 1,
-        ignore_exceptions: Optional[bool] = None,
+        ignore_exceptions: bool = True,
         use_converted: Optional[bool] = None,
     ):
-        if convert is None:
-            convert = False
-        if ignore_exceptions is None:
-            ignore_exceptions = True
-
         self.root = Path(root).expanduser().resolve()
         self.kind = kind
 
@@ -1229,12 +1218,12 @@ 

Source code for muspy.datasets.base

         self,
         root: Union[str, Path],
         download_and_extract: bool = False,
-        overwrite: Optional[bool] = None,
-        cleanup: Optional[bool] = None,
-        convert: Optional[bool] = None,
+        overwrite: bool = False,
+        cleanup: bool = False,
+        convert: bool = False,
         kind: str = "json",
         n_jobs: int = 1,
-        ignore_exceptions: Optional[bool] = None,
+        ignore_exceptions: bool = True,
         use_converted: Optional[bool] = None,
         verbose: bool = True,
     ):
@@ -1341,12 +1330,12 @@ 

Source code for muspy.datasets.base

         self,
         root: Union[str, Path],
         download_and_extract: bool = False,
-        overwrite: Optional[bool] = None,
-        cleanup: Optional[bool] = None,
-        convert: Optional[bool] = None,
+        overwrite: bool = False,
+        cleanup: bool = False,
+        convert: bool = False,
         kind: str = "json",
         n_jobs: int = 1,
-        ignore_exceptions: Optional[bool] = None,
+        ignore_exceptions: bool = True,
         use_converted: Optional[bool] = None,
         verbose: bool = True,
     ):
diff --git a/docs/datasets/base.html b/docs/datasets/base.html
index 8072e54d..bcf628b0 100644
--- a/docs/datasets/base.html
+++ b/docs/datasets/base.html
@@ -321,7 +321,7 @@ 

Base Dataset Classes
-class muspy.RemoteDataset(root, download_and_extract=None, overwrite=None, cleanup=None, verbose=True)[source]
+class muspy.RemoteDataset(root, download_and_extract=False, overwrite=False, cleanup=False, verbose=True)[source]

Base class for remote MusPy datasets.

This class extends muspy.Dataset to support remote datasets. To build a custom remote dataset, please refer to the @@ -343,8 +343,7 @@

Base Dataset ClassesParameters
  • download_and_extract (bool, optional) – Whether to download and extract the dataset. Defaults to False.

  • -
  • overwrite (bool, optional) – Whether to overwrite existing downloaded files. Defaults to -True.

  • +
  • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

  • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

  • verbose (bool, optional) – Whether to be verbose. Defaults to True.

@@ -404,12 +403,12 @@

Base Dataset Classes
-download(overwrite=True, verbose=True)[source]
+download(overwrite=False, verbose=True)[source]

Download the source datasets.

Parameters
    -
  • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

  • +
  • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

  • verbose (bool, optional) – Whether to be verbose. Defaults to True.

@@ -445,13 +444,13 @@

Base Dataset Classes
-download_and_extract(overwrite=True, cleanup=False, verbose=True)[source]
+download_and_extract(overwrite=False, cleanup=False, verbose=True)[source]

Download source datasets and extract the downloaded archives.

Parameters
    -
  • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

  • -
  • cleanup (bool, optional) – Whether to remove the source archive. Defaults to False.

  • +
  • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

  • +
  • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

  • verbose (bool, optional) – Whether to be verbose. Defaults to True.

diff --git a/docs/datasets/local.html b/docs/datasets/local.html index a27d3a77..77128aa9 100644 --- a/docs/datasets/local.html +++ b/docs/datasets/local.html @@ -178,7 +178,7 @@

Local Dataset ClassesHere are the classes for local datasets.

-class muspy.FolderDataset(root, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None)[source]
+class muspy.FolderDataset(root, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None)[source]

Class for datasets storing files in a folder.

This class extends muspy.Dataset to support folder datasets. To build a custom folder dataset, please refer to the @@ -211,7 +211,7 @@

Local Dataset Classes

ignore_exceptions (bool, optional) – Whether to ignore errors and skip failed conversions. This can be helpful if some source files are known to be corrupted. Defaults to True.

-
  • use_converted (bool, optional) – Force to disable on-the-fly mode and use stored converted data

  • +
  • use_converted (bool, optional) – Force to disable on-the-fly mode and use converted data.

  • @@ -628,7 +628,7 @@

    Local Dataset Classes
    -class muspy.ABCFolderDataset(root, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None)[source]
    +class muspy.ABCFolderDataset(root, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None)[source]

    Class for datasets storing ABC files in a folder.

    See also

    diff --git a/docs/datasets/remote.html b/docs/datasets/remote.html index c939be35..d290b65a 100644 --- a/docs/datasets/remote.html +++ b/docs/datasets/remote.html @@ -178,7 +178,7 @@

    Remote Dataset ClassesHere are the classes for remote datasets.

    -class muspy.RemoteFolderDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.RemoteFolderDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Base class for remote datasets storing files in a folder.

    @@ -275,12 +275,12 @@

    Remote Dataset Classes
    -download(overwrite=True, verbose=True)
    +download(overwrite=False, verbose=True)

    Download the source datasets.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -295,13 +295,13 @@

    Remote Dataset Classes
    -download_and_extract(overwrite=True, cleanup=False, verbose=True)
    +download_and_extract(overwrite=False, cleanup=False, verbose=True)

    Download source datasets and extract the downloaded archives.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • -
    • cleanup (bool, optional) – Whether to remove the source archive. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • +
    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -510,7 +510,7 @@

    Remote Dataset Classes
    -class muspy.RemoteMusicDataset(root, download_and_extract=False, overwrite=None, cleanup=None, kind='json', verbose=True)[source]
    +class muspy.RemoteMusicDataset(root, download_and_extract=False, overwrite=False, cleanup=False, kind='json', verbose=True)[source]

    Base class for remote datasets of MusPy JSON/YAML files.

    @@ -538,6 +538,7 @@

    Remote Dataset ClassesParameters

    • download_and_extract (bool, optional) – Whether to download and extract the dataset. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    @@ -559,12 +560,12 @@

    Remote Dataset Classes
    -download(overwrite=True, verbose=True)
    +download(overwrite=False, verbose=True)

    Download the source datasets.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -579,13 +580,13 @@

    Remote Dataset Classes
    -download_and_extract(overwrite=True, cleanup=False, verbose=True)
    +download_and_extract(overwrite=False, cleanup=False, verbose=True)

    Download source datasets and extract the downloaded archives.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • -
    • cleanup (bool, optional) – Whether to remove the source archive. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • +
    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -760,7 +761,7 @@

    Remote Dataset Classes
    -class muspy.RemoteABCFolderDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.RemoteABCFolderDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Base class for remote datasets storing ABC files in a folder.

    See also

    @@ -821,12 +822,12 @@

    Remote Dataset Classes
    -download(overwrite=True, verbose=True)
    +download(overwrite=False, verbose=True)

    Download the source datasets.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -841,13 +842,13 @@

    Remote Dataset Classes
    -download_and_extract(overwrite=True, cleanup=False, verbose=True)
    +download_and_extract(overwrite=False, cleanup=False, verbose=True)

    Download source datasets and extract the downloaded archives.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • -
    • cleanup (bool, optional) – Whether to remove the source archive. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • +
    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    diff --git a/docs/doc/datasets.html b/docs/doc/datasets.html index 3e392e7e..f86140c2 100644 --- a/docs/doc/datasets.html +++ b/docs/doc/datasets.html @@ -223,7 +223,7 @@

    Dataset Classes
    -class muspy.datasets.ABCFolderDataset(root, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None)[source]
    +class muspy.datasets.ABCFolderDataset(root, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None)[source]

    Class for datasets storing ABC files in a folder.

    See also

    @@ -405,13 +405,13 @@

    Dataset Classes
    -class muspy.datasets.EssenFolkSongDatabase(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.EssenFolkSongDatabase(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Essen Folk Song Database.

    -class muspy.datasets.FolderDataset(root, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None)[source]
    +class muspy.datasets.FolderDataset(root, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None)[source]

    Class for datasets storing files in a folder.

    This class extends muspy.Dataset to support folder datasets. To build a custom folder dataset, please refer to the @@ -444,7 +444,7 @@

    Dataset Classesbool, optional) – Whether to ignore errors and skip failed conversions. This can be helpful if some source files are known to be corrupted. Defaults to True.

    -
  • use_converted (bool, optional) – Force to disable on-the-fly mode and use stored converted data

  • +
  • use_converted (bool, optional) – Force to disable on-the-fly mode and use converted data.

  • @@ -564,7 +564,7 @@

    Dataset Classes
    -class muspy.datasets.HaydnOp20Dataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.HaydnOp20Dataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Haydn Op.20 Dataset.

    @@ -628,7 +628,7 @@

    Dataset Classes
    -class muspy.datasets.JSBChoralesDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.JSBChoralesDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Johann Sebastian Bach Chorales Dataset.

    @@ -640,7 +640,7 @@

    Dataset Classes
    -class muspy.datasets.LakhMIDIAlignedDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.LakhMIDIAlignedDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Lakh MIDI Dataset - aligned subset.

    @@ -652,7 +652,7 @@

    Dataset Classes
    -class muspy.datasets.LakhMIDIDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.LakhMIDIDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Lakh MIDI Dataset.

    @@ -664,7 +664,7 @@

    Dataset Classes
    -class muspy.datasets.LakhMIDIMatchedDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.LakhMIDIMatchedDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Lakh MIDI Dataset - matched subset.

    @@ -676,7 +676,7 @@

    Dataset Classes
    -class muspy.datasets.MAESTRODatasetV1(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.MAESTRODatasetV1(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    MAESTRO Dataset (MIDI only).

    @@ -688,7 +688,7 @@

    Dataset Classes
    -class muspy.datasets.MAESTRODatasetV2(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.MAESTRODatasetV2(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    MAESTRO Dataset (MIDI only).

    @@ -771,7 +771,7 @@

    Dataset Classes
    -class muspy.datasets.MusicNetDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.MusicNetDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    MusicNet Dataset (MIDI only).

    @@ -783,7 +783,7 @@

    Dataset Classes
    -class muspy.datasets.NESMusicDatabase(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.NESMusicDatabase(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    NES Music Database.

    @@ -795,13 +795,13 @@

    Dataset Classes
    -class muspy.datasets.NottinghamDatabase(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.NottinghamDatabase(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Nottingham Database.

    -class muspy.datasets.RemoteABCFolderDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.RemoteABCFolderDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Base class for remote datasets storing ABC files in a folder.

    See also

    @@ -816,7 +816,7 @@

    Dataset Classes
    -class muspy.datasets.RemoteDataset(root, download_and_extract=None, overwrite=None, cleanup=None, verbose=True)[source]
    +class muspy.datasets.RemoteDataset(root, download_and_extract=False, overwrite=False, cleanup=False, verbose=True)[source]

    Base class for remote MusPy datasets.

    This class extends muspy.Dataset to support remote datasets. To build a custom remote dataset, please refer to the @@ -838,8 +838,7 @@

    Dataset ClassesParameters
    • download_and_extract (bool, optional) – Whether to download and extract the dataset. Defaults to False.

    • -
    • overwrite (bool, optional) – Whether to overwrite existing downloaded files. Defaults to -True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -899,12 +898,12 @@

    Dataset Classes
    -download(overwrite=True, verbose=True)[source]
    +download(overwrite=False, verbose=True)[source]

    Download the source datasets.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -940,13 +939,13 @@

    Dataset Classes
    -download_and_extract(overwrite=True, cleanup=False, verbose=True)[source]
    +download_and_extract(overwrite=False, cleanup=False, verbose=True)[source]

    Download source datasets and extract the downloaded archives.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • -
    • cleanup (bool, optional) – Whether to remove the source archive. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • +
    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -963,7 +962,7 @@

    Dataset Classes
    -class muspy.datasets.RemoteFolderDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.RemoteFolderDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Base class for remote datasets storing files in a folder.

    @@ -1014,7 +1013,7 @@

    Dataset Classes
    -class muspy.datasets.RemoteMusicDataset(root, download_and_extract=False, overwrite=None, cleanup=None, kind='json', verbose=True)[source]
    +class muspy.datasets.RemoteMusicDataset(root, download_and_extract=False, overwrite=False, cleanup=False, kind='json', verbose=True)[source]

    Base class for remote datasets of MusPy JSON/YAML files.

    @@ -1042,6 +1041,7 @@

    Dataset ClassesParameters

    • download_and_extract (bool, optional) – Whether to download and extract the dataset. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    @@ -1059,7 +1059,7 @@

    Dataset Classes
    -class muspy.datasets.WikifoniaDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.datasets.WikifoniaDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Wikifonia dataset.

    diff --git a/docs/doc/muspy.html b/docs/doc/muspy.html index 29b9b747..89430cf1 100644 --- a/docs/doc/muspy.html +++ b/docs/doc/muspy.html @@ -1507,7 +1507,7 @@

    Features
    -class muspy.ABCFolderDataset(root, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None)[source]
    +class muspy.ABCFolderDataset(root, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None)[source]

    Class for datasets storing ABC files in a folder.

    See also

    @@ -1689,13 +1689,13 @@

    Features
    -class muspy.EssenFolkSongDatabase(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.EssenFolkSongDatabase(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Essen Folk Song Database.

    -class muspy.FolderDataset(root, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None)[source]
    +class muspy.FolderDataset(root, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None)[source]

    Class for datasets storing files in a folder.

    This class extends muspy.Dataset to support folder datasets. To build a custom folder dataset, please refer to the @@ -1728,7 +1728,7 @@

    Featuresbool, optional) – Whether to ignore errors and skip failed conversions. This can be helpful if some source files are known to be corrupted. Defaults to True.

    -
  • use_converted (bool, optional) – Force to disable on-the-fly mode and use stored converted data

  • +
  • use_converted (bool, optional) – Force to disable on-the-fly mode and use converted data.

  • @@ -1848,7 +1848,7 @@

    Features
    -class muspy.HaydnOp20Dataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.HaydnOp20Dataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Haydn Op.20 Dataset.

    @@ -1912,7 +1912,7 @@

    Features
    -class muspy.JSBChoralesDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.JSBChoralesDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Johann Sebastian Bach Chorales Dataset.

    @@ -1924,7 +1924,7 @@

    Features
    -class muspy.LakhMIDIAlignedDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.LakhMIDIAlignedDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Lakh MIDI Dataset - aligned subset.

    @@ -1936,7 +1936,7 @@

    Features
    -class muspy.LakhMIDIDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.LakhMIDIDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Lakh MIDI Dataset.

    @@ -1948,7 +1948,7 @@

    Features
    -class muspy.LakhMIDIMatchedDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.LakhMIDIMatchedDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Lakh MIDI Dataset - matched subset.

    @@ -1960,7 +1960,7 @@

    Features
    -class muspy.MAESTRODatasetV1(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.MAESTRODatasetV1(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    MAESTRO Dataset (MIDI only).

    @@ -1972,7 +1972,7 @@

    Features
    -class muspy.MAESTRODatasetV2(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.MAESTRODatasetV2(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    MAESTRO Dataset (MIDI only).

    @@ -2055,7 +2055,7 @@

    Features
    -class muspy.MusicNetDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.MusicNetDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    MusicNet Dataset (MIDI only).

    @@ -2067,7 +2067,7 @@

    Features
    -class muspy.NESMusicDatabase(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.NESMusicDatabase(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    NES Music Database.

    @@ -2079,13 +2079,13 @@

    Features
    -class muspy.NottinghamDatabase(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.NottinghamDatabase(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Nottingham Database.

    -class muspy.RemoteABCFolderDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.RemoteABCFolderDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Base class for remote datasets storing ABC files in a folder.

    See also

    @@ -2100,7 +2100,7 @@

    Features
    -class muspy.RemoteDataset(root, download_and_extract=None, overwrite=None, cleanup=None, verbose=True)[source]
    +class muspy.RemoteDataset(root, download_and_extract=False, overwrite=False, cleanup=False, verbose=True)[source]

    Base class for remote MusPy datasets.

    This class extends muspy.Dataset to support remote datasets. To build a custom remote dataset, please refer to the @@ -2122,8 +2122,7 @@

    FeaturesParameters
    • download_and_extract (bool, optional) – Whether to download and extract the dataset. Defaults to False.

    • -
    • overwrite (bool, optional) – Whether to overwrite existing downloaded files. Defaults to -True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -2183,12 +2182,12 @@

    Features
    -download(overwrite=True, verbose=True)[source]
    +download(overwrite=False, verbose=True)[source]

    Download the source datasets.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -2224,13 +2223,13 @@

    Features
    -download_and_extract(overwrite=True, cleanup=False, verbose=True)[source]
    +download_and_extract(overwrite=False, cleanup=False, verbose=True)[source]

    Download source datasets and extract the downloaded archives.

    Parameters
      -
    • overwrite (bool, optional) – Whether to overwrite existing files. Defaults to True.

    • -
    • cleanup (bool, optional) – Whether to remove the source archive. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • +
    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    • verbose (bool, optional) – Whether to be verbose. Defaults to True.

    @@ -2247,7 +2246,7 @@

    Features
    -class muspy.RemoteFolderDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.RemoteFolderDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Base class for remote datasets storing files in a folder.

    @@ -2298,7 +2297,7 @@

    Features
    -class muspy.RemoteMusicDataset(root, download_and_extract=False, overwrite=None, cleanup=None, kind='json', verbose=True)[source]
    +class muspy.RemoteMusicDataset(root, download_and_extract=False, overwrite=False, cleanup=False, kind='json', verbose=True)[source]

    Base class for remote datasets of MusPy JSON/YAML files.

    @@ -2326,6 +2325,7 @@

    FeaturesParameters

    • download_and_extract (bool, optional) – Whether to download and extract the dataset. Defaults to False.

    • +
    • overwrite (bool, optional) – Whether to overwrite existing file(s). Defaults to False.

    • cleanup (bool, optional) – Whether to remove the source archive(s). Defaults to False.

    @@ -2343,7 +2343,7 @@

    Features
    -class muspy.WikifoniaDataset(root, download_and_extract=False, overwrite=None, cleanup=None, convert=None, kind='json', n_jobs=1, ignore_exceptions=None, use_converted=None, verbose=True)[source]
    +class muspy.WikifoniaDataset(root, download_and_extract=False, overwrite=False, cleanup=False, convert=False, kind='json', n_jobs=1, ignore_exceptions=True, use_converted=None, verbose=True)[source]

    Wikifonia dataset.

    diff --git a/muspy/datasets/base.py b/muspy/datasets/base.py index f1234ee3..f6146cb3 100644 --- a/muspy/datasets/base.py +++ b/muspy/datasets/base.py @@ -107,7 +107,7 @@ def citation(cls): def save( self, root: Union[str, Path], - kind: Optional[str] = "json", + kind: str = "json", n_jobs: int = 1, ignore_exceptions: bool = True, verbose: bool = True, @@ -164,7 +164,7 @@ def _saver(idx): n_digits = len(str(len(self))) if verbose: - print("Start converting and saving the dataset.") + print("Converting and saving the dataset...") if n_jobs == 1: count = 0 for idx in tqdm(range(len(self))): # type: ignore @@ -182,7 +182,7 @@ def _saver(idx): ) count = results.count(True) if verbose: - print(f"{count} out of {len(self)} files successfully saved.") + print(f"Successfully saved {count} out of {len(self)} files.") (root / ".muspy.success").touch(exist_ok=True) def split( @@ -435,8 +435,7 @@ class RemoteDataset(Dataset): download_and_extract : bool, optional Whether to download and extract the dataset. Defaults to False. overwrite : bool, optional - Whether to overwrite existing downloaded files. Defaults to - True. + Whether to overwrite existing file(s). Defaults to False. cleanup : bool, optional Whether to remove the source archive(s). Defaults to False. verbose : bool, optional @@ -491,18 +490,11 @@ class RemoteDataset(Dataset): def __init__( self, root: Union[str, Path], - download_and_extract: Optional[bool] = None, - overwrite: Optional[bool] = None, - cleanup: Optional[bool] = None, + download_and_extract: bool = False, + overwrite: bool = False, + cleanup: bool = False, verbose: bool = True, ): - if download_and_extract is None: - download_and_extract = False - if overwrite is None: - overwrite = True - if cleanup is None: - cleanup = False - super().__init__() self.root = Path(root).expanduser().resolve() self.root.mkdir(exist_ok=True) @@ -544,14 +536,14 @@ def source_exists(self) -> bool: return True def download( - self: RemoteDatasetType, overwrite: bool = True, verbose: bool = True + self: RemoteDatasetType, overwrite: bool = False, verbose: bool = True ) -> RemoteDatasetType: """Download the source datasets. Parameters ---------- overwrite : bool, optional - Whether to overwrite existing files. Defaults to True. + Whether to overwrite existing file(s). Defaults to False. verbose : bool, optional Whether to be verbose. Defaults to True. @@ -601,7 +593,7 @@ def extract( def download_and_extract( self: RemoteDatasetType, - overwrite: bool = True, + overwrite: bool = False, cleanup: bool = False, verbose: bool = True, ) -> RemoteDatasetType: @@ -610,9 +602,9 @@ def download_and_extract( Parameters ---------- overwrite : bool, optional - Whether to overwrite existing files. Defaults to True. + Whether to overwrite existing file(s). Defaults to False. cleanup : bool, optional - Whether to remove the source archive. Defaults to False. + Whether to remove the source archive(s). Defaults to False. verbose : bool, optional Whether to be verbose. Defaults to True. @@ -756,6 +748,8 @@ class RemoteMusicDataset(MusicDataset, RemoteDataset): ---------- download_and_extract : bool, optional Whether to download and extract the dataset. Defaults to False. + overwrite : bool, optional + Whether to overwrite existing file(s). Defaults to False. cleanup : bool, optional Whether to remove the source archive(s). Defaults to False. @@ -771,8 +765,8 @@ def __init__( self, root: Union[str, Path], download_and_extract: bool = False, - overwrite: Optional[bool] = None, - cleanup: Optional[bool] = None, + overwrite: bool = False, + cleanup: bool = False, kind: str = "json", verbose: bool = True, ): @@ -820,7 +814,7 @@ class FolderDataset(Dataset): be helpful if some source files are known to be corrupted. Defaults to True. use_converted : bool, optional - Force to disable on-the-fly mode and use stored converted data + Force to disable on-the-fly mode and use converted data. Important --------- @@ -852,17 +846,12 @@ class FolderDataset(Dataset): def __init__( self, root: Union[str, Path], - convert: Optional[bool] = None, + convert: bool = False, kind: str = "json", n_jobs: int = 1, - ignore_exceptions: Optional[bool] = None, + ignore_exceptions: bool = True, use_converted: Optional[bool] = None, ): - if convert is None: - convert = False - if ignore_exceptions is None: - ignore_exceptions = True - self.root = Path(root).expanduser().resolve() self.kind = kind @@ -1068,12 +1057,12 @@ def __init__( self, root: Union[str, Path], download_and_extract: bool = False, - overwrite: Optional[bool] = None, - cleanup: Optional[bool] = None, - convert: Optional[bool] = None, + overwrite: bool = False, + cleanup: bool = False, + convert: bool = False, kind: str = "json", n_jobs: int = 1, - ignore_exceptions: Optional[bool] = None, + ignore_exceptions: bool = True, use_converted: Optional[bool] = None, verbose: bool = True, ): @@ -1180,12 +1169,12 @@ def __init__( self, root: Union[str, Path], download_and_extract: bool = False, - overwrite: Optional[bool] = None, - cleanup: Optional[bool] = None, - convert: Optional[bool] = None, + overwrite: bool = False, + cleanup: bool = False, + convert: bool = False, kind: str = "json", n_jobs: int = 1, - ignore_exceptions: Optional[bool] = None, + ignore_exceptions: bool = True, use_converted: Optional[bool] = None, verbose: bool = True, ): diff --git a/muspy/datasets/utils.py b/muspy/datasets/utils.py index 4c37c90f..20fda196 100644 --- a/muspy/datasets/utils.py +++ b/muspy/datasets/utils.py @@ -119,7 +119,7 @@ def check_sha256( def download_url( url: str, path: Union[str, Path], - overwrite: bool = True, + overwrite: bool = False, size: Optional[int] = None, md5: Optional[str] = None, sha256: Optional[str] = None, @@ -134,7 +134,7 @@ def download_url( path : str or Path Path to save the downloaded file. overwrite : bool, optional - Whether to overwrite existing downloaded file. Defaults to True. + Whether to overwrite existing downloaded file. Defaults to False. size : int, optional Expected size of the downloaded file. Defaults to skip size check. @@ -191,24 +191,6 @@ def download_url( ) -def _get_confirm_token(response): - for key, value in response.cookies.items(): - if key.startswith("download_warning"): - return value - return None - - -def _save_response_content(response, destination, chunk_size=32768): - with open(destination, "wb") as f: - pbar = tqdm(total=None) - progress = 0 - for chunk in response.iter_content(chunk_size): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - progress += len(chunk) - pbar.update(progress - pbar.n) - - def extract_archive( path: Union[str, Path], root: Optional[Union[str, Path]] = None,