Merge branch 'master' into master

skypilot-org · Feb 10, 2025 · a7948e5 · a7948e5
2 parents a827f14 + 54fe787
commit a7948e5
Show file tree

Hide file tree

Showing 180 changed files with 10,209 additions and 2,103 deletions.
diff --git a/.buildkite/generate_pipeline.py b/.buildkite/generate_pipeline.py
diff --git a/.buildkite/test_buildkite_pipeline_generation.py b/.buildkite/test_buildkite_pipeline_generation.py
@@ -0,0 +1,190 @@
+"""This script tests the buildkite pipeline generation script.
+
+It modifies the smoke test files to print the test name and return without
+running the actual test code, then runs the pipeline generation script
+and compares the output to the generated pipeline.
+
+Some parameters in smoke tests requires credentials to setup, so we need to
+run the tests with the credentials.
+
+PYTHONPATH=$(pwd)/tests:$PYTHONPATH \
+pytest -n 0 --dist no .buildkite/test_buildkite_pipeline_generation.py
+
+"""
+
+import os
+import pathlib
+import re
+import subprocess
+
+import pytest
+import yaml
+
+
+def _insert_test_tracers(content):
+    """Matches any function definition starting with `def test_...(` possibly
+    spanning multiple lines, and inserts print statements.
+
+    1) print(function_name)
+    2) If 'generic_cloud' is in the parameters, prints "generic_cloud: {generic_cloud}".
+    3) return
+
+    Each of these inserted lines is indented 4 spaces more than the
+    function definition line.
+
+    Caveats:
+      • Very naive parameter parsing.
+      • Splits by commas, then strips out type annotations and defaults.
+      • If you have advanced signatures, you may need a more robust approach.
+    """
+
+    pattern = re.compile(r'^(\s*)(def\s+test_\w+\(.*?\)):\s*\n',
+                         flags=re.MULTILINE | re.DOTALL)
+
+    def replacer(match):
+        base_indent = match.group(1)  # e.g. "    "
+        signature = match.group(
+            2)  # e.g. "def test_job_queue(generic_cloud: str, x=42)"
+
+        # Indent our inserted lines 4 spaces beyond the function definition:
+        deeper_indent = base_indent + '    '
+
+        # Remove leading "def " so we can isolate function name + parameters
+        # signature_no_def might be "test_job_queue(generic_cloud: str, x=42)"
+        signature_no_def = signature[4:].strip()
+
+        # Try splitting on the first "("
+        try:
+            func_name, raw_params = signature_no_def.split('(', 1)
+            func_name = func_name.strip()
+            # Remove trailing ")" if it exists
+            if raw_params.endswith(')'):
+                raw_params = raw_params[:-1]
+            # Flatten newlines/spaces
+            raw_params = re.sub(r'\s+', ' ', raw_params).strip()
+        except ValueError:
+            # If splitting fails, fallback
+            func_name = signature_no_def
+            raw_params = ''
+
+        # --------------------------------------------------
+        # Parse out parameter names (naively)
+        # --------------------------------------------------
+        # 1) Split on commas.
+        # 2) For each piece, remove type annotations (":something")
+        #    and default values ("=something").
+        # 3) Strip off leading "*" or "**".
+        # e.g. "generic_cloud: str" => "generic_cloud"
+        #      "x=42" => "x"
+        #      "**kwargs" => "kwargs"
+        # --------------------------------------------------
+        arg_list = []
+        if raw_params:
+            for piece in raw_params.split(','):
+                piece = piece.strip()
+                # Remove type annotations and defaults (split off first colon or equals)
+                piece = re.split(r'[:=]', piece, 1)[0]
+                # Remove leading "*" or "**"
+                piece = piece.lstrip('*').strip()
+                if piece:
+                    arg_list.append(piece)
+
+        # Build the lines to insert
+        lines = []
+        # Keep original definition line + colon
+        lines.append(f"{base_indent}{signature}:")
+        # 1) Print function name
+        lines.append(
+            f"{deeper_indent}print('\\n{func_name}\\n', file=sys.stderr, flush=True)"
+        )
+        # 2) Print generic_cloud if present
+        if 'generic_cloud' in arg_list:
+            lines.append(
+                f"{deeper_indent}print(f'generic_cloud: {{generic_cloud}}', file=sys.stderr, flush=True)"
+            )
+        # 3) Return
+        lines.append(f"{deeper_indent}return\n")
+
+        return "\n".join(lines)
+
+    updated_content = pattern.sub(replacer, content)
+    return 'import sys\n' + updated_content
+
+
+def _extract_test_names_from_pipeline(pipeline_path):
+    with open(pipeline_path, 'r') as f:
+        pipeline = yaml.safe_load(f)
+
+    test_names = set()
+    for step in pipeline['steps']:
+        command = step['command']
+        # Extract test name from pytest command
+        # e.g. "pytest tests/smoke_tests/test_basic.py::test_example_app --aws"
+        assert '::' in command
+        test_name = command.split('::')[-1].split()[
+            0]  # Split on space to remove args
+        test_names.add(test_name)
+
+    return test_names
+
+
+@pytest.mark.parametrize('args', [
+    '',
+    '--aws',
+    '--gcp',
+    '--azure',
+    '--kubernetes',
+    '--generic-cloud aws',
+    '--generic-cloud gcp',
+    '--managed-jobs',
+    '--managed-jobs --serve',
+    '--managed-jobs --aws',
+])
+def test_generate_same_as_pytest(args):
+    # Get all test files from smoke_tests directory
+    test_files = [
+        f'tests/smoke_tests/{f}' for f in os.listdir('tests/smoke_tests')
+        if f.endswith('.py') and f != 'test_quick_tests_core.py'
+    ]
+
+    pytest_tests = set()
+    try:
+        # Modify each test file to just print and return
+        for test_file in test_files:
+            with open(test_file, 'r') as f:
+                content = f.read()
+
+            modified_content = _insert_test_tracers(content)
+
+            with open(test_file, 'w') as f:
+                f.write(modified_content)
+
+        # Get all test functions from pytest for all files
+        pytest_output = subprocess.check_output(
+            f"pytest ./tests/test_smoke.py {args}",
+            stderr=subprocess.STDOUT,
+            text=True,
+            shell=True)
+        pytest_tests = set(re.findall(r"test_\w+", pytest_output))
+
+        # Generate pipeline and extract test functions using YAML parsing
+        env = dict(os.environ)
+        env['PYTHONPATH'] = f"{pathlib.Path.cwd()}/tests:" \
+                            f"{env.get('PYTHONPATH', '')}"
+
+        subprocess.run(
+            ['python', '.buildkite/generate_pipeline.py', '--args', args],
+            env=env,
+            check=True)
+
+        # Extract test names using YAML parsing
+        pipeline_tests = _extract_test_names_from_pipeline(
+            '.buildkite/pipeline_smoke_tests_release.yaml')
+
+        # Compare the sets
+        assert pytest_tests == pipeline_tests, \
+            f'Mismatch between pytest tests {pytest_tests} and pipeline tests {pipeline_tests}'
+
+    finally:
+        # Restore original files using git
+        subprocess.run(['git', 'reset', '--hard', 'HEAD'], check=True)
diff --git a/.github/workflows/test-poetry-build.yml b/.github/workflows/test-poetry-build.yml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -50,9 +50,10 @@ pytest tests/test_smoke.py --generic-cloud azure
 
 For profiling code, use:
 ```
-pip install tuna # Tuna is used for visualization of profiling data.
-python3 -m cProfile -o sky.prof -m sky.cli status # Or some other command
-tuna sky.prof
+pip install py-spy # py-spy is a sampling profiler for Python programs
+py-spy record -t -o sky.svg -- python -m sky.cli status # Or some other command
+py-spy top -- python -m sky.cli status # Get a live top view
+py-spy -h # For more options
 ```
 
 #### Testing in a container

diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@
 
 ----
 :fire: *News* :fire:
+- [Jan 2025] Launch and Serve **[DeepSeek-R1](https://github.com/deepseek-ai/DeepSeek-R1)** and **[Janus](https://github.com/deepseek-ai/DeepSeek-Janus)** on Kubernetes or Any Cloud: [**R1 example**](./llm/deepseek-r1/) and [**Janus example**](./llm/deepseek-janus/)
 - [Oct 2024] :tada: **SkyPilot crossed 1M+ downloads** :tada:: Thank you to our community! [**Twitter/X**](https://x.com/skypilot_org/status/1844770841718067638)
 - [Sep 2024] Point, Launch and Serve **Llama 3.2** on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/)
 - [Sep 2024] Run and deploy [**Pixtral**](./llm/pixtral), the first open-source multimodal model from Mistral AI.
@@ -85,7 +86,7 @@ To get the latest features and fixes, use the nightly build or [install from sou
 pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp]"
 ```
 
-[Current supported infra](https://docs.skypilot.co/en/latest/getting-started/installation.html) (Kubernetes; AWS, GCP, Azure, OCI, Lambda Cloud, Fluidstack, RunPod, Cudo, Paperspace, Cloudflare, Samsung, IBM, VMware vSphere):
+[Current supported infra](https://docs.skypilot.co/en/latest/getting-started/installation.html) (Kubernetes; AWS, GCP, Azure, OCI, Lambda Cloud, Fluidstack, RunPod, Cudo, Digital Ocean, Paperspace, Cloudflare, Samsung, IBM, Vast.ai, VMware vSphere):
 <p align="center">
   <picture>
     <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/skypilot-org/skypilot/master/docs/source/images/cloud-logos-dark.png">
@@ -161,6 +162,8 @@ To learn more, see [Concept: Sky Computing](https://docs.skypilot.co/en/latest/s
 Runnable examples:
 - [**AI Gallery**](https://docs.skypilot.co/en/latest/gallery/index.html)
 - LLMs on SkyPilot
+  - [DeepSeek-R1](./llm/deepseek-r1/)
+  - [DeepSeek-Janus](./llm/deepseek-janus/)
   - [Llama 3.2: lightweight and vision models](./llm/llama-3_2/)
   - [Pixtral](./llm/pixtral/)
   - [Llama 3.1 finetuning](./llm/llama-3_1-finetuning/) and [serving](./llm/llama-3_1/)
@@ -199,6 +202,8 @@ Read the research:
 - [Sky Computing vision paper](https://sigops.org/s/conferences/hotos/2021/papers/hotos21-s02-stoica.pdf) (HotOS 2021)
 - [Policy for Managed Spot Jobs](https://www.usenix.org/conference/nsdi24/presentation/wu-zhanghao)  (NSDI 2024)
 
+SkyPilot was initially started at the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley and has since gained many industry contributors. Read more about the project's origin [here](https://docs.skypilot.co/en/latest/sky-computing.html).
+
 ## Support and Questions
 We are excited to hear your feedback!
 * For issues and feature requests, please [open a GitHub issue](https://github.com/skypilot-org/skypilot/issues/new).

diff --git a/docs/source/_static/custom.js b/docs/source/_static/custom.js
@@ -1,17 +1,17 @@
-// As of 2025-01-01, Kapa seems to be having issues loading on some ISPs, including comcast. Uncomment once resolved.
-// document.addEventListener('DOMContentLoaded', function () {
-//        var script = document.createElement('script');
-//        script.src = 'https://widget.kapa.ai/kapa-widget.bundle.js';
-//        script.setAttribute('data-website-id', '4223d017-a3d2-4b92-b191-ea4d425a23c3');
-//        script.setAttribute('data-project-name', 'SkyPilot');
-//        script.setAttribute('data-project-color', '#4C4C4D');
-//        script.setAttribute('data-project-logo', 'https://avatars.githubusercontent.com/u/109387420?s=100&v=4');
-//        script.setAttribute('data-modal-disclaimer', 'Results are automatically generated and may be inaccurate or contain inappropriate information. Do not include any sensitive information in your query.\n**To get further assistance, you can chat directly with the development team** by joining the [SkyPilot Slack](https://slack.skypilot.co/).');
-//        script.setAttribute('data-modal-title', 'SkyPilot Docs AI - Ask a Question.');
-//        script.setAttribute('data-button-position-bottom', '100px');
-//        script.async = true;
-//        document.head.appendChild(script);
-// });
+document.addEventListener('DOMContentLoaded', function () {
+       var script = document.createElement('script');
+       script.src = 'https://widget.kapa.ai/kapa-widget.bundle.js';
+       script.setAttribute('data-website-id', '4223d017-a3d2-4b92-b191-ea4d425a23c3');
+       script.setAttribute('data-project-name', 'SkyPilot');
+       script.setAttribute('data-project-color', '#4C4C4D');
+       script.setAttribute('data-project-logo', 'https://avatars.githubusercontent.com/u/109387420?s=100&v=4');
+       script.setAttribute('data-modal-disclaimer', 'Results are automatically generated and may be inaccurate or contain inappropriate information. Do not include any sensitive information in your query.\n**To get further assistance, you can chat directly with the development team** by joining the [SkyPilot Slack](https://slack.skypilot.co/).');
+       script.setAttribute('data-modal-title', 'SkyPilot Docs AI - Ask a Question.');
+       script.setAttribute('data-button-position-bottom', '100px');
+       script.setAttribute('data-user-analytics-fingerprint-enabled', 'true');
+       script.async = true;
+       document.head.appendChild(script);
+});
 
 (function(h,o,t,j,a,r){
        h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)};
@@ -29,6 +29,8 @@ document.addEventListener('DOMContentLoaded', () => {
         { selector: '.toctree-l1 > a', text: 'Many Parallel Jobs' },
         { selector: '.toctree-l1 > a', text: 'Admin Policy Enforcement' },
         { selector: '.toctree-l1 > a', text: 'Using Existing Machines' },
+        { selector: '.toctree-l2 > a', text: 'Multiple Kubernetes Clusters' },
+        { selector: '.toctree-l1 > a', text: 'HTTPS Encryption' },
     ];
     newItems.forEach(({ selector, text }) => {
         document.querySelectorAll(selector).forEach((el) => {

diff --git a/docs/source/cloud-setup/cloud-permissions/aws.rst b/docs/source/cloud-setup/cloud-permissions/aws.rst
@@ -223,7 +223,7 @@ IAM Role Creation
 
 Using a specific VPC
 -----------------------
-By default, SkyPilot uses the "default" VPC in each region.
+By default, SkyPilot uses the "default" VPC in each region. If a region does not have a `default VPC <https://docs.aws.amazon.com/vpc/latest/userguide/work-with-default-vpc.html#create-default-vpc>`_, SkyPilot will not be able to use the region.
 
 To instruct SkyPilot to use a specific VPC, you can use SkyPilot's global config
 file ``~/.sky/config.yaml`` to specify the VPC name in the ``aws.vpc_name``