From 0447079a82c0160de1c8d77b306b765fb41db14b Mon Sep 17 00:00:00 2001 From: Simon Prickett Date: Thu, 9 Jan 2025 20:44:28 +0000 Subject: [PATCH] Further dataset updates for the new URLs. --- topic/machine-learning/automl/test.py | 2 +- .../cratedb-vectorstore-rag-openai-sql.ipynb | 2 +- .../mlops-mlflow/tracking_merlion.ipynb | 24 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/topic/machine-learning/automl/test.py b/topic/machine-learning/automl/test.py index da39e931..2c8331e7 100644 --- a/topic/machine-learning/automl/test.py +++ b/topic/machine-learning/automl/test.py @@ -53,7 +53,7 @@ def churn_dataset(cratedb): Provide test case with a provisioned dataset. """ cratedb.import_csv_pandas( - filepath="https://github.com/crate/cratedb-datasets/raw/main/machine-learning/automl/churn-dataset.csv", + filepath="https://cdn.crate.io/downloads/datasets/cratedb-datasets/machine-learning/automl/churn-dataset.csv", tablename="pycaret_churn", ) cratedb.run_sql("REFRESH TABLE pycaret_churn;") diff --git a/topic/machine-learning/llm-langchain/cratedb-vectorstore-rag-openai-sql.ipynb b/topic/machine-learning/llm-langchain/cratedb-vectorstore-rag-openai-sql.ipynb index 4ae933d6..7eefcead 100644 --- a/topic/machine-learning/llm-langchain/cratedb-vectorstore-rag-openai-sql.ipynb +++ b/topic/machine-learning/llm-langchain/cratedb-vectorstore-rag-openai-sql.ipynb @@ -196,7 +196,7 @@ "metadata": {}, "outputs": [], "source": [ - "loader = PyPDFLoader(\"https://github.com/crate/cratedb-datasets/raw/main/machine-learning/fulltext/White%20paper%20-%20Time-series%20data%20in%20manufacturing.pdf\")\n", + "loader = PyPDFLoader(\"https://cdn.crate.io/downloads/datasets/cratedb-datasets/machine-learning/fulltext/White%20paper%20-%20Time-series%20data%20in%20manufacturing.pdf\")\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", "pages = loader.load_and_split(text_splitter)" ] diff --git a/topic/machine-learning/mlops-mlflow/tracking_merlion.ipynb b/topic/machine-learning/mlops-mlflow/tracking_merlion.ipynb index ca22bd6a..b5617dd2 100644 --- a/topic/machine-learning/mlops-mlflow/tracking_merlion.ipynb +++ b/topic/machine-learning/mlops-mlflow/tracking_merlion.ipynb @@ -32,7 +32,7 @@ "\n", "\n", "# Download the data from the Numenta Anomaly Benchmark\n", - "data = pd.read_csv(\"https://github.com/crate/cratedb-datasets/raw/main/timeseries/nab-machine-failure.csv\")\n", + "data = pd.read_csv(\"https://cdn.crate.io/downloads/datasets/cratedb-datasets/timeseries/nab-machine-failure.csv\")\n", "\n", "# Connect to a self-managed CrateDB instance.\n", "CRATEDB_HTTP_URL_DEFAULT = \"http://crate@localhost:4200/\"\n", @@ -47,16 +47,22 @@ }, { "cell_type": "markdown", - "source": [ - "2. Import data into CrateDB" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "2. Import data into CrateDB" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "is_executing": true + } + }, "outputs": [], "source": [ "# Split the data into chunks of 1000 rows each for better insert performance\n", @@ -72,13 +78,7 @@ " for chunk in chunks:\n", " cursor.executemany(\"INSERT INTO machine_data (timestamp, temperature) VALUES (?, ?);\", list(chunk.itertuples(index=False, name=None)))\n", " cursor.execute(\"REFRESH TABLE machine_data;\")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "is_executing": true - } - } + ] }, { "cell_type": "markdown",