Merge branch 'main' into update-embedded-devices-example

adap · Nov 22, 2024 · 9cfa889 · 9cfa889
2 parents 6d8c80c + 67688c6
commit 9cfa889
Show file tree

Hide file tree

Showing 17 changed files with 498 additions and 347 deletions.
diff --git a/.github/workflows/docker-readme.yml b/.github/workflows/docker-readme.yml
@@ -24,7 +24,7 @@ jobs:
             list-files: "json"
             filters: |
               readme:
-                - 'src/docker/**/README.md'
+                - added|modified: 'src/docker/**/README.md'
 
     update:
       if: ${{ needs.collect.outputs.readme_files != '' && toJson(fromJson(needs.collect.outputs.readme_files)) != '[]' }}

diff --git a/.github/workflows/update_translations.yml b/.github/workflows/update_translations.yml
@@ -66,6 +66,7 @@ jobs:
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           branch: '${{ env.branch-name }}'
+          force_with_lease: true
 
       - name: Create Pull Request
         if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60

diff --git a/datasets/doc/source/recommended-fl-datasets.rst b/datasets/doc/source/recommended-fl-datasets.rst
@@ -0,0 +1,167 @@
+Recommended FL Datasets
+=======================
+
+This page lists the recommended datasets for federated learning research, which can be
+used with Flower Datasets ``flwr-datasets``. To learn about the library, see the
+`quickstart tutorial <https://flower.ai/docs/datasets/tutorial-quickstart.html>`_ . To
+see the full FL example with Flower and Flower Datasets open the `quickstart-pytorch
+<https://github.com/adap/flower/tree/main/examples/quickstart-pytorch>`_.
+
+.. note::
+
+    All datasets from `HuggingFace Hub <https://huggingface.co/datasets>`_ can be used with our library. This page presents just a set of datasets we collected that you might find useful.
+
+For more information about any dataset, visit its page by clicking the dataset name. For more information how to use the
+
+Image Datasets
+--------------
+
+.. list-table:: Image Datasets
+   :widths: 40 40 20
+   :header-rows: 1
+
+   * - Name
+     - Size
+     - Image Shape
+   * - `ylecun/mnist <https://huggingface.co/datasets/ylecun/mnist>`_
+     - train 60k;  
+       test 10k
+     - 28x28
+   * - `uoft-cs/cifar10 <https://huggingface.co/datasets/uoft-cs/cifar10>`_
+     - train 50k;  
+       test 10k
+     - 32x32x3
+   * - `uoft-cs/cifar100 <https://huggingface.co/datasets/uoft-cs/cifar100>`_
+     - train 50k;  
+       test 10k
+     - 32x32x3
+   * - `zalando-datasets/fashion_mnist <https://huggingface.co/datasets/zalando-datasets/fashion_mnist>`_
+     - train 60k;  
+       test 10k
+     - 28x28
+   * - `flwrlabs/femnist <https://huggingface.co/datasets/flwrlabs/femnist>`_
+     - train 814k
+     - 28x28
+   * - `zh-plus/tiny-imagenet <https://huggingface.co/datasets/zh-plus/tiny-imagenet>`_
+     - train 100k;  
+       valid 10k
+     - 64x64x3
+   * - `flwrlabs/usps <https://huggingface.co/datasets/flwrlabs/usps>`_
+     - train 7.3k;  
+       test 2k
+     - 16x16
+   * - `flwrlabs/pacs <https://huggingface.co/datasets/flwrlabs/pacs>`_
+     - train 10k
+     - 227x227
+   * - `flwrlabs/cinic10 <https://huggingface.co/datasets/flwrlabs/cinic10>`_
+     - train 90k;  
+       valid 90k;  
+       test 90k
+     - 32x32x3
+   * - `flwrlabs/caltech101 <https://huggingface.co/datasets/flwrlabs/caltech101>`_
+     - train 8.7k
+     - varies
+   * - `flwrlabs/office-home <https://huggingface.co/datasets/flwrlabs/office-home>`_
+     - train 15.6k
+     - varies
+   * - `flwrlabs/fed-isic2019 <https://huggingface.co/datasets/flwrlabs/fed-isic2019>`_
+     - train 18.6k;  
+       test 4.7k
+     - varies
+   * - `ufldl-stanford/svhn <https://huggingface.co/datasets/ufldl-stanford/svhn>`_
+     - train 73.3k;  
+       test 26k;  
+       extra 531k
+     - 32x32x3
+   * - `sasha/dog-food <https://huggingface.co/datasets/sasha/dog-food>`_
+     - train 2.1k;  
+       test 0.9k
+     - varies
+   * - `Mike0307/MNIST-M <https://huggingface.co/datasets/Mike0307/MNIST-M>`_
+     - train 59k;  
+       test 9k
+     - 32x32
+
+Audio Datasets
+--------------
+
+.. list-table:: Audio Datasets
+   :widths: 35 30 15
+   :header-rows: 1
+
+   * - Name
+     - Size
+     - Subset
+   * - `google/speech_commands <https://huggingface.co/datasets/google/speech_commands>`_
+     - train 64.7k
+     - v0.01
+   * - `google/speech_commands <https://huggingface.co/datasets/google/speech_commands>`_
+     - train 105.8k
+     - v0.02
+   * - `flwrlabs/ambient-acoustic-context <https://huggingface.co/datasets/flwrlabs/ambient-acoustic-context>`_
+     - train 70.3k
+     - 
+   * - `fixie-ai/common_voice_17_0 <https://huggingface.co/datasets/fixie-ai/common_voice_17_0>`_
+     - varies
+     - 14 versions
+   * - `fixie-ai/librispeech_asr <https://huggingface.co/datasets/fixie-ai/librispeech_asr>`_
+     - varies
+     - clean/other
+
+Tabular Datasets
+----------------
+
+.. list-table:: Tabular Datasets
+   :widths: 35 30
+   :header-rows: 1
+
+   * - Name
+     - Size
+   * - `scikit-learn/adult-census-income <https://huggingface.co/datasets/scikit-learn/adult-census-income>`_
+     - train 32.6k
+   * - `jlh/uci-mushrooms <https://huggingface.co/datasets/jlh/uci-mushrooms>`_
+     - train 8.1k
+   * - `scikit-learn/iris <https://huggingface.co/datasets/scikit-learn/iris>`_
+     - train 150
+
+Text Datasets
+-------------
+
+.. list-table:: Text Datasets
+   :widths: 40 30 30
+   :header-rows: 1
+
+   * - Name
+     - Size
+     - Category
+   * - `sentiment140 <https://huggingface.co/datasets/sentiment140>`_
+     - train 1.6M;  
+       test 0.5k
+     - Sentiment
+   * - `google-research-datasets/mbpp <https://huggingface.co/datasets/google-research-datasets/mbpp>`_
+     - full 974; sanitized 427
+     - General
+   * - `openai/openai_humaneval <https://huggingface.co/datasets/openai/openai_humaneval>`_
+     - test 164
+     - General
+   * - `lukaemon/mmlu <https://huggingface.co/datasets/lukaemon/mmlu>`_
+     - varies
+     - General
+   * - `takala/financial_phrasebank <https://huggingface.co/datasets/takala/financial_phrasebank>`_
+     - train 4.8k
+     - Financial
+   * - `pauri32/fiqa-2018 <https://huggingface.co/datasets/pauri32/fiqa-2018>`_
+     - train 0.9k; validation 0.1k; test 0.2k
+     - Financial
+   * - `zeroshot/twitter-financial-news-sentiment <https://huggingface.co/datasets/zeroshot/twitter-financial-news-sentiment>`_
+     - train 9.5k; validation 2.4k
+     - Financial
+   * - `bigbio/pubmed_qa <https://huggingface.co/datasets/bigbio/pubmed_qa>`_
+     - train 2M; validation 11k
+     - Medical
+   * - `openlifescienceai/medmcqa <https://huggingface.co/datasets/openlifescienceai/medmcqa>`_
+     - train 183k; validation 4.3k; test 6.2k
+     - Medical
+   * - `bigbio/med_qa <https://huggingface.co/datasets/bigbio/med_qa>`_
+     - train 10.1k; test 1.3k; validation 1.3k
+     - Medical
diff --git a/datasets/flwr_datasets/__init__.py b/datasets/flwr_datasets/__init__.py
@@ -15,7 +15,7 @@
 """Flower Datasets main package."""
 
 
-from flwr_datasets import partitioner, preprocessor
+from flwr_datasets import metrics, partitioner, preprocessor
 from flwr_datasets import utils as utils
 from flwr_datasets import visualization
 from flwr_datasets.common.version import package_version as _package_version

diff --git a/dev/prepare-release-changelog.sh b/dev/prepare-release-changelog.sh