drivendataorg · pjbull · May 22, 2024 · May 15, 2024 · May 15, 2024 · May 15, 2024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -45,7 +45,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"]
+        python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
     defaults:
       run:
         shell: bash
@@ -63,6 +63,12 @@ jobs:
             pyproject.toml
             dev-requirements.txt
 
+      - name: Set up Miniconda  
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          auto-activate-base: true
+          activate-environment: ""
+
       - name: Cache conda packages
         uses: actions/cache@v4
         env:

diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ _A logical, reasonably standardized but flexible project structure for doing and
 
 ## Installation
 
-Cookiecutter Data Science v2 requires Python 3.7+. Since this is a cross-project utility application, we recommend installing it with [pipx](https://pypa.github.io/pipx/). Installation command options:
+Cookiecutter Data Science v2 requires Python 3.8+. Since this is a cross-project utility application, we recommend installing it with [pipx](https://pypa.github.io/pipx/). Installation command options:
 
 ```bash
 # With pipx from PyPI (recommended)

diff --git a/ccds/hook_utils/dependencies.py b/ccds/hook_utils/dependencies.py
@@ -1,3 +1,28 @@
+packages = [
+    "black",
+    "flake8",
+    "isort",
+    "pip",
+    "python-dotenv",
+]
+
+basic = [
+    "ipython",
+    "jupyterlab",
+    "matplotlib",
+    "notebook",
+    "numpy",
+    "pandas",
+    "scikit-learn",
+]
+
+scaffold = [
+    "typer",
+    "loguru",
+    "tqdm",
+]
+
+
 def write_dependencies(
     dependencies, packages, pip_only_packages, repo_name, module_name, python_version
 ):

diff --git a/docs/docs/css/extra.css b/docs/docs/css/extra.css
@@ -79,7 +79,7 @@
     font-size: 0.9rem;
 }
 
-.md-typeset > p, .md-typeset > ul, .md-typeset > ol, .md-typeset > blockquote, .md-typeset > div.admonition {
+.md-typeset > h1, .md-typeset > h2, .md-typeset > h3, .md-typeset > p, .md-typeset > ul, .md-typeset > ol, .md-typeset > blockquote, .md-typeset > div.admonition {
     max-width: 35rem;
 }
 

diff --git a/docs/docs/index.md b/docs/docs/index.md
@@ -6,9 +6,14 @@ _A logical, flexible, and reasonably standardized project structure for doing an
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
 
+!!! info "CCDS V2 Announcement"
+
+    Version 2 of Cookiecutter Data Science has launched recently. To learn more about what's different and what's in progress, see the [announcement blog post for more information](https://drivendata.co/blog/ccds-v2).
+
+
 ## Quickstart
 
-Cookiecutter Data Science v2 requires Python 3.7+. Since this is a cross-project utility application, we recommend installing it with [pipx](https://pypa.github.io/pipx/). Installation command options:
+Cookiecutter Data Science v2 requires Python 3.8+. Since this is a cross-project utility application, we recommend installing it with [pipx](https://pypa.github.io/pipx/). Installation command options:
 
 === "With pipx (recommended)"
 
@@ -46,7 +51,7 @@ Cookiecutter Data Science v2 requires Python 3.7+. Since this is a cross-project
     cookiecutter https://github.com/drivendata/cookiecutter-data-science -c v1
     ```
 
-!!! info "Changes in v2"
+!!! info "Use the ccds command-line tool"
 
     Cookiecutter Data Science v2 now requires installing the new `cookiecutter-data-science` Python package, which extends the functionality of the [`cookiecutter`](https://cookiecutter.readthedocs.io/en/stable/README.html) templating utility. Use the provided `ccds` command-line program instead of `cookiecutter`.
 
@@ -116,16 +121,16 @@ The directory structure of your new project will look something like this (depen
     │
     ├── __init__.py             <- Makes {{ cookiecutter.module_name }} a Python module
     │
-    ├── data                    <- Scripts to download or generate data
-    │   └── make_dataset.py
+    ├── config.py               <- Store useful variables and configuration
+    │
+    ├── dataset.py              <- Scripts to download or generate data
     │
-    ├── features                <- Scripts to turn raw data into features for modeling
-    │   └── build_features.py
+    ├── features.py             <- Code to create features for modeling
     │
-    ├── models                  <- Scripts to train models and then use trained models 
-    │   ├── predict_model.py       to make predictions
-    │   └── train_model.py
+    ├── modeling                
+    │   ├── __init__.py 
+    │   ├── predict.py          <- Code to run model inference with trained models          
+    │   └── train.py            <- Code to train models
     │
-    └── visualization           <- Scripts to create exploratory and results-oriented 
-        └── visualize.py           visualizations
+    └── plots.py                <- Code to create visualizations   
 ```
diff --git a/docs/docs/opinions.md b/docs/docs/opinions.md
@@ -2,11 +2,11 @@
 
 The default project structure reflects certain opinions about how to do collaborative data science work. These opinions grew out of our own experiences with what works and what doesn't. Some of these opinions are about workflows, and others are about tools that can make the process easier. These opinions are discussed below. If you have any thoughts, please [contribute or share them](contributing.md).
 
-## Data analysis is a directed acyclic graph
+### Data analysis is a directed acyclic graph
 
-> Don't _ever_ edit your raw data, especially not manually, and _especially_ not in Excel.
+_Don't ever edit your raw data. Especially not manually. And especially not in Excel._
 
-The most important features of a quality data analysis are **correctness** and **reproducibility**—anyone should be able to re-run your analysis using only your code and raw data and produce the same final products. The best way to ensure correctness is to test your analysis code. **The best way to ensure reproducibility is to treat your data analysis pipeline as a directed acyclic graph ([DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph))**. This means each step of your analysis is a node in a directed graph with no loops. You can run through the graph forwards to recreate any analysis output, or you can trace backwards from an output to examine the combination of code and data that created it.
+The most important features of a quality data analysis are correctness and reproducibility—anyone should be able to re-run your analysis using only your code and raw data and produce the same final products. The best way to ensure correctness is to test your analysis code. **The best way to ensure reproducibility is to treat your data analysis pipeline as a directed acyclic graph ([DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph))**. This means each step of your analysis is a node in a directed graph with no loops. You can run through the graph forwards to recreate any analysis output, or you can trace backwards from an output to examine the combination of code and data that created it.
 
 ### Raw data is immutable
 
@@ -59,7 +59,8 @@ We make it easy to refactor notebook code because the ccds template makes your p
 # OPTIONAL: Load the "autoreload" extension so that code can change
 %load_ext autoreload
 
-# OPTIONAL: always reload modules so that as you change code in {{ cookiecutter.module_name }}, it gets loaded
+# OPTIONAL: always reload modules so that as you change code
+# in {{ cookiecutter.module_name }}, it gets loaded
 %autoreload 2
 
 from {{ cookiecutter.module_name }}.data import make_dataset
@@ -133,6 +134,7 @@ aws_secret_access_key=mysecretkey
 aws_access_key_id=myprojectaccesskey
 aws_secret_access_key=myprojectsecretkey
 ```
+
 You can add the profile name when initialising a project; assuming no applicable environment variables are set, the profile credentials will be used be default.
 
 ## Encourage adaptation from a consistent default

diff --git a/docs/docs/v1.md b/docs/docs/v1.md
@@ -4,7 +4,7 @@ While v1 has been deprecated and we recommend using [v2](index.md) moving forwar
 
 ## Requirements
 
- - Python 3.7+
+ - Python 3.8+
  - [cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0: `pip install cookiecutter`
 
 ## Starting a new project

diff --git a/docs/overrides/partials/nav.html b/docs/overrides/partials/nav.html
@@ -0,0 +1,42 @@
+{#-
+  Vendored from:
+  https://github.com/squidfunk/mkdocs-material/blob/bd708f16bc477f98aad68ad203826605e0298b8a/material/templates/partials/nav.html
+-#}
+{% import "partials/nav-item.html" as item with context %}
+{% set class = "md-nav md-nav--primary" %}
+{% if "navigation.tabs" in features %}
+  {% set class = class ~ " md-nav--lifted" %}
+{% endif %}
+{% if "toc.integrate" in features %}
+  {% set class = class ~ " md-nav--integrated" %}
+{% endif %}
+<nav class="{{ class }}" aria-label="{{ lang.t('nav') }}" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="{{ config.extra.homepage | d(nav.homepage.url, true) | url }}" title="{{ config.site_name | e }}" class="md-nav__button md-logo" aria-label="{{ config.site_name }}" data-md-component="logo">
+      {% include "partials/logo.html" %}
+    </a>
+    {{ config.site_name }}
+  </label>
+  {% if config.repo_url %}
+    <div class="md-nav__source">
+      {% include "partials/source.html" %}
+    </div>
+  {% endif %}
+  <ul class="md-nav__list" data-md-scrollfix>
+    {% for nav_item in nav %}
+      {% set path = "__nav_" ~ loop.index %}
+      {{ item.render(nav_item, path, 1) }}
+    {% endfor %}
+  </ul>
+</nav>
+
+
+{#-
+  Added DD footer
+-#}
+<a href="https://drivendata.co/open-source.html" target="_blank" style="border-bottom: 0; padding-top: 3rem; display: block">
+  <img style="width: 60%" src="https://s3.amazonaws.com/drivendata/images/drivendata-logo.svg">
+</a>
+<p style="width: 60%">
+  Cookiecutter Data Science is a DrivenData project.
+</p>
diff --git a/docs/scripts/configuration-table.py b/docs/scripts/configuration-table.py
@@ -2,6 +2,8 @@
 import re
 from pathlib import Path
 
+from ccds.hook_utils.dependencies import basic
+
 PROJECT_ROOT = Path(__file__).parents[2]
 
 
@@ -94,13 +96,19 @@ def build_help_table_rows(data, help_lookup, lookup_prefix=""):
             for ix, choice in enumerate(top_value):
                 if isinstance(choice, str):
                     item_help = help_lookup[f"{lookup_prefix}{top_key}.{choice}"]
+                    more_info = (
+                        item_help["more_information"]
+                        if choice != "basic"
+                        else item_help["more_information"] + (", ".join(basic))
+                    )
+
                     section.append(
                         _table_row(
                             (
                                 choice,
                                 "",
                                 item_help["description"],
-                                item_help["more_information"],
+                                more_info,
                             )
                         )
                     )

diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py
@@ -1,44 +1,27 @@
 import shutil
+from copy import copy
 from pathlib import Path
 
 # https://github.com/cookiecutter/cookiecutter/issues/824
 #   our workaround is to include these utility functions in the CCDS package
 from ccds.hook_utils.custom_config import write_custom_config
-from ccds.hook_utils.dependencies import write_dependencies
+from ccds.hook_utils.dependencies import basic, packages, scaffold, write_dependencies
 
 #
 #  TEMPLATIZED VARIABLES FILLED IN BY COOKIECUTTER
 #
-packages = [
-    "black",
-    "flake8",
-    "isort",
-    "pip",
-    "python-dotenv",
-]
+packages_to_install = copy(packages)
 
 # {% if cookiecutter.dataset_storage.s3 %}
-packages += ["awscli"]
+packages_to_install += ["awscli"]
 # {% endif %} #
 
 # {% if cookiecutter.include_code_scaffold == "Yes" %}
-packages += [
-    "typer",
-    "loguru",
-    "tqdm",
-]
+packages_to_install += scaffold
 # {% endif %}
 
 # {% if cookiecutter.pydata_packages == "basic" %}
-packages += [
-    "ipython",
-    "jupyterlab",
-    "matplotlib",
-    "notebook",
-    "numpy",
-    "pandas",
-    "scikit-learn",
-]
+packages_to_install += basic
 # {% endif %}
 
 # track packages that are not available through conda
@@ -51,7 +34,7 @@
 # or none if none selected
 docs_path = Path("docs")
 # {% if cookiecutter.docs != "none" %}
-packages += ["{{ cookiecutter.docs }}"]
+packages_to_install += ["{{ cookiecutter.docs }}"]
 pip_only_packages += ["{{ cookiecutter.docs }}"]
 docs_subpath = docs_path / "{{ cookiecutter.docs }}"
 for obj in docs_subpath.iterdir():
@@ -68,7 +51,7 @@
 #
 write_dependencies(
     "{{ cookiecutter.dependency_file }}",
-    packages,
+    packages_to_install,
     pip_only_packages,
     repo_name="{{ cookiecutter.repo_name }}",
     module_name="{{ cookiecutter.module_name }}",

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,6 @@ classifiers = [
   "Intended Audience :: Science/Research",
   "License :: OSI Approved :: MIT License",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.7",
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
@@ -29,7 +28,7 @@ classifiers = [
   "Topic :: Scientific/Engineering",
   "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 dependencies = [
   "click",
   "cookiecutter",