From e8e4d955ed9a1b2d61aff7756ab8dac0d414acae Mon Sep 17 00:00:00 2001 From: Argenis Leon Date: Sun, 10 Nov 2019 17:05:31 -0600 Subject: [PATCH] The kitty is playing --- examples/sandbox.ipynb | 43920 ++++++++++++++++++++++++++++++++- optimus/profiler/profiler.py | 4 +- 2 files changed, 43776 insertions(+), 148 deletions(-) diff --git a/examples/sandbox.ipynb b/examples/sandbox.ipynb index c6e47665..091a58cc 100644 --- a/examples/sandbox.ipynb +++ b/examples/sandbox.ipynb @@ -2,18 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 36, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -23,102 +14,91 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "var kernel = IPython.notebook.kernel;\n", - "kernel.execute(\"console.log('asrgenis')\");\n", - "console.log(kernel)\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "%%javascript\n", - "var kernel = IPython.notebook.kernel;\n", - "kernel.execute(\"console.log('asrgenis')\");\n", - "console.log(kernel)" + "import sys\n", + "sys.path.append(\"..\")" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 3, "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "B is not a valid date format", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfound\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 20\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'{} is not a valid date format'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 21\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mValueError\u001b[0m: B is not a valid date format" + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n", + " from collections import Callable\n", + "\n", + " You are using PySparkling of version 2.4.10, but your PySpark is of\n", + " version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n", + "`formatargspec` is deprecated since Python 3.5. Use `signature` and the `Signature` object directly\n" ] } ], "source": [ - "formats = [\"sss\",\"HH\",\"A\"]\n", - "formats.sort(key=len,reverse=True)\n", - "\n", - "# print(value[1:1+1])\n", - "value = \"sssHHA\"\n", - "result = []\n", - "start=0\n", - "end = len(value)\n", - "found = False\n", - "\n", - "while start\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mdf_indexed\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstring_to_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"category\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstringOrderType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"frequencyAsc\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[0massert_spark_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_indexed\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m expected_collect = op.sc.parallelize([Row(id=0, category='a', category_index=2.0),\n", + "\u001b[1;31mNameError\u001b[0m: name 'assert_spark_df' is not defined" ] } ], "source": [ - "from optimus import Optimus" + "import optimus.ml.feature as fe\n", + "\n", + "from pyspark.sql.types import *\n", + "import datetime\n", + "\n", + "\n", + "df = op.spark.createDataFrame([(0, \"a\"), (1, \"b\"), (2, \"c\"), (3, \"a\"), (4, \"a\"), (5, \"c\")],\n", + " [\"id\", \"category\"])\n", + "\n", + "df_indexed = fe.string_to_index(df, \"category\", stringOrderType=\"frequencyAsc\")\n", + "\n", + "assert_spark_df(df_indexed)\n", + "\n", + "expected_collect = op.sc.parallelize([Row(id=0, category='a', category_index=2.0),\n", + " Row(id=1, category='b', category_index=0.0),\n", + " Row(id=2, category='c', category_index=1.0),\n", + " Row(id=3, category='a', category_index=2.0),\n", + " Row(id=4, category='a', category_index=2.0),\n", + " Row(id=5, category='c', category_index=1.0)]).toDF()\n", + "\n", + "assert_equal(df_indexed.select(\"category\", \"category***INDEX_TO_STRING\", \"id\").collect(), expected_collect.collect())" ] }, { @@ -129,7 +109,7 @@ { "data": { "text/html": [ - "Your connection keys are in bumblebee.ini. If you really care about privacy get your keys and put them here. If you are testing just call bumblebee" + "Open Bumblebee: https://app.hi-bumblebee.com
If you really care about privacy get your keys in bumblebee.ini and put them here
" ], "text/plain": [ "" @@ -137,91 +117,43739 @@ }, "metadata": {}, "output_type": "display_data" - }, + } + ], + "source": [ + "op= Optimus(comm=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\", sep=\",\", header='true', infer_schema='true', charset=\"UTF-8\", null_value=\"None\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "INFO:optimus:Operative System:Windows\n", - "INFO:optimus:Just check that Spark and all necessary environments vars are present...\n", - "INFO:optimus:-----\n", - "INFO:optimus:SPARK_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n", - "INFO:optimus:HADOOP_HOME=C:\\opt\\hadoop-2.7.7\n", - "INFO:optimus:PYSPARK_PYTHON=C:\\Users\\argenisleon\\Anaconda3\\python.exe\n", - "INFO:optimus:PYSPARK_DRIVER_PYTHON=jupyter\n", - "INFO:optimus:PYSPARK_SUBMIT_ARGS=--jars \"file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/mysql-connector-java-8.0.16.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/ojdbc8.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/postgresql-42.2.5.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/presto-jdbc-0.224.jar\" --driver-class-path \"C:/Users/argenisleon/Documents/Optimus/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/mysql-connector-java-8.0.16.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/ojdbc8.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/postgresql-42.2.5.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/presto-jdbc-0.224.jar\" --conf \"spark.sql.catalogImplementation=hive\" pyspark-shell\n", - "INFO:optimus:JAVA_HOME=C:\\java\n", - "INFO:optimus:Pyarrow Installed\n", - "INFO:optimus:-----\n", - "INFO:optimus:Starting or getting SparkSession and SparkContext...\n", - "INFO:optimus:Spark Version:2.3.1\n", - "INFO:optimus:\n", - " ____ __ _ \n", - " / __ \\____ / /_(_)___ ___ __ _______\n", - " / / / / __ \\/ __/ / __ `__ \\/ / / / ___/\n", - " / /_/ / /_/ / /_/ / / / / / / /_/ (__ ) \n", - " \\____/ .___/\\__/_/_/ /_/ /_/\\__,_/____/ \n", - " /_/ \n", - " \n", - "INFO:optimus:Transform and Roll out...\n", - "INFO:optimus:Optimus successfully imported. Have fun :).\n", - "INFO:optimus:Config.ini not found\n" + "Send!\n" ] } ], "source": [ - "op= Optimus(master=\"local[*]\", app_name= \"optimus\", comm=True, verbose=True)" + "df.send()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'-', '+', '*', '/'}\n", + "(['df[\"asd\"]', '1'], ['*'])\n" + ] + } + ], "source": [ - "from datetime import date, datetime\n", - "from pyspark.sql.types import *\n", + "def parse(x):\n", + " operators = set('+-*/')\n", + " print(operators)\n", + " op_out = [] #This holds the operators that are found in the string (left to right)\n", + " num_out = [] #this holds the non-operators that are found in the string (left to right)\n", + " buff = []\n", + " for c in x: #examine 1 character at a time\n", + " if c in operators: \n", + " #found an operator. Everything we've accumulated in `buff` is \n", + " #a single \"number\". Join it together and put it in `num_out`.\n", + " num_out.append(''.join(buff))\n", + " buff = []\n", + " op_out.append(c)\n", + " else:\n", + " #not an operator. Just accumulate this character in buff.\n", + " buff.append(c)\n", + " num_out.append(''.join(buff))\n", + " return num_out,op_out\n", "\n", - "source_df = op.create.df(\n", - " [\n", - " (\"names\", \"str\", True),\n", - " (\"height(ft)\", \"int\", True),\n", - " (\"function\", \"str\", True),\n", - " (\"rank\", \"int\", True),\n", - " (\"age\", \"int\", True),\n", - " (\"weight(t)\", \"float\", True),\n", - " (\"japanese name\", ArrayType(StringType()), True),\n", - " (\"last position seen\", \"str\", True),\n", - " (\"date arrival\", \"str\", True),\n", - " (\"last date seen\", \"str\", True),\n", - " (\"attributes\", ArrayType(FloatType()), True),\n", - " (\"DateType\", DateType()),\n", - " (\"Timestamp\", TimestampType()),\n", - " (\"Cybertronian\", \"bool\", True),\n", - " (\"function(binary)\", \"binary\", False),\n", - " (\"NullType\", \"null\", True),\n", + "print (parse('df[\"asd\"]*1'))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'SHOOTING': {'null': 318054, 'missing': 0, 'string': 1019},\n", + " 'MONTH': {'null': 0, 'missing': 0, 'int': 319073},\n", + " 'HOUR': {'null': 0, 'missing': 0, 'int': 319073},\n", + " 'Lat': {'null': 19999, 'missing': 0, 'decimal': 299074},\n", + " 'STREET': {'null': 10871, 'missing': 0, 'string': 308202},\n", + " 'DISTRICT': {'null': 1765, 'missing': 0, 'string': 317308},\n", + " 'OFFENSE_CODE_GROUP': {'null': 0, 'missing': 0, 'string': 319073},\n", + " 'REPORTING_AREA': {'null': 0, 'missing': 0, 'string': 319073},\n", + " 'OCCURRED_ON_DATE': {'null': 0, 'missing': 0, 'date': 319073},\n", + " 'UCR_PART': {'null': 90, 'missing': 0, 'string': 318983},\n", + " 'INCIDENT_NUMBER': {'null': 0, 'missing': 0, 'string': 319073},\n", + " 'DAY_OF_WEEK': {'null': 0, 'missing': 0, 'string': 319073},\n", + " 'OFFENSE_DESCRIPTION': {'null': 0, 'missing': 0, 'string': 319073},\n", + " 'YEAR': {'null': 0, 'missing': 0, 'int': 319073},\n", + " 'Long': {'null': 19999, 'missing': 0, 'decimal': 299074},\n", + " 'OFFENSE_CODE': {'null': 0, 'missing': 0, 'int': 319073},\n", + " 'Location': {'null': 0, 'missing': 0, 'string': 319073}}" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from optimus.profiler.profiler import Profiler\n", + "p = Profiler()\n", "\n", - " ],\n", - " [\n", - " (\"Optim'us\", 28, \"Leader\", 10, 5000000, 4.30, [\"Inochi\", \"Convoy\"], \"19.442735,-99.201111\", \"1980/04/10\",\n", - " \"2016/09/10\", [8.5344, 4300.0], date(2016, 9, 10), datetime(2014, 6, 24), True, bytearray(\"Leader\", \"utf-8\"),\n", - " None),\n", - " (\"bumbl#ebéé \", 17, \"Espionage\", 7, 5000000, 2.0, [\"Bumble\", \"Goldback\"], \"10.642707,-71.612534\", \"1980/04/10\",\n", - " \"2015/08/10\", [5.334, 2000.0], date(2015, 8, 10), datetime(2014, 6, 24), True, bytearray(\"Espionage\", \"utf-8\"),\n", - " None),\n", - " (\"ironhide&\", 26, \"Security\", 7, 5000000, 4.0, [\"Roadbuster\"], \"37.789563,-122.400356\", \"1980/04/10\",\n", - " \"2014/07/10\", [7.9248, 4000.0], date(2014, 6, 24), datetime(2014, 6, 24), True, bytearray(\"Security\", \"utf-8\"),\n", - " None),\n", - " (\"Jazz\", 13, \"First Lieutenant\", 8, 5000000, 1.80, [\"Meister\"], \"33.670666,-117.841553\", \"1980/04/10\",\n", - " \"2013/06/10\", [3.9624, 1800.0], date(2013, 6, 24), datetime(2014, 6, 24), True,\n", - " bytearray(\"First Lieutenant\", \"utf-8\"), None),\n", - " (\"Megatron\", None, \"None\", 10, 5000000, 5.70, [\"Megatron\"], None, \"1980/04/10\", \"2012/05/10\", [None, 5700.0],\n", - " date(2012, 5, 10), datetime(2014, 6, 24), True, bytearray(\"None\", \"utf-8\"), None),\n", - " (\"\", 300, \"Battle Station\", 8, 5000000, None, [\"Metroflex\"], None, \"1980/04/10\", \"2011/04/10\",\n", - " [91.44, None], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"Battle Station\", \"utf-8\"), None),\n", - " (None, 2, \"3\", 4, 5, 6.0, [\"7\"], 8, \"1980/04/10\", \"2011/04/10\",\n", - " [11.0], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"15\", \"utf-8\"), None)\n", - " ], infer_schema=True)" + "df.cols.count_by_dtypes(\"*\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 319,073 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
INCIDENT_NUMBER
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OFFENSE_CODE
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OFFENSE_CODE_GROUP
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OFFENSE_DESCRIPTION
\n", + "
4 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DISTRICT
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
REPORTING_AREA
\n", + "
6 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
SHOOTING
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OCCURRED_ON_DATE
\n", + "
8 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
YEAR
\n", + "
9 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
MONTH
\n", + "
10 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DAY_OF_WEEK
\n", + "
11 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
HOUR
\n", + "
12 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
UCR_PART
\n", + "
13 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
STREET
\n", + "
14 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Lat
\n", + "
15 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Long
\n", + "
16 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Location
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070945\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 619\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Larceny\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LARCENY⋅ALL⋅OTHERS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " D14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 808\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-02⋅13:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Sunday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅One\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LINCOLN⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.35779134\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.13937053\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.35779134,⋅-71.13937053)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070943\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1402\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Vandalism\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " VANDALISM\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 347\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-08-21⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Tuesday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Two\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " HECLA⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.30682138\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.06030035\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.30682138,⋅-71.06030035)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070941\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3410\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Towed\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " TOWED⋅MOTOR⋅VEHICLE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " D4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 151\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅19:27:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CAZENOVE⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.34658879\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07242943\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.34658879,⋅-71.07242943)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070940\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3114\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Investigate⋅Property\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " INVESTIGATE⋅PROPERTY\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " D4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 272\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:16:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NEWCOMB⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.33418175\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07866441\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.33418175,⋅-71.07866441)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070938\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3114\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Investigate⋅Property\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " INVESTIGATE⋅PROPERTY\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " B3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 421\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:05:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " DELHI⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.27536542\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.09036101\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.27536542,⋅-71.09036101)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070936\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3820\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Motor⋅Vehicle⋅Accident⋅Response\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M/V⋅ACCIDENT⋅INVOLVING⋅PEDESTRIAN⋅-⋅INJURY\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 398\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:09:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " TALBOT⋅AVE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.29019621\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07159012\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.29019621,⋅-71.07159012)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070933\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 724\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Auto⋅Theft\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " AUTO⋅THEFT\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " B2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 330\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:25:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅One\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NORMANDY⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.30607218\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.0827326\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.30607218,⋅-71.08273260)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070932\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3301\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Verbal⋅Disputes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " VERBAL⋅DISPUTE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " B2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 584\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅20:39:37\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LAWN⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.32701648\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.10555088\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.32701648,⋅-71.10555088)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070931\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 301\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Robbery\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ROBBERY⋅-⋅STREET\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 177\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅20:48:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅One\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " MASSACHUSETTS⋅AVE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.33152148\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07085307\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.33152148,⋅-71.07085307)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070929\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3301\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Verbal⋅Disputes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " VERBAL⋅DISPUTE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 364\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅20:38:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LESLIE⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.29514664\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.05860832\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.29514664,⋅-71.05860832)\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 319,073 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 319,073 rows / 18 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
INCIDENT_NUMBER
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OFFENSE_CODE
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OFFENSE_CODE_GROUP
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OFFENSE_DESCRIPTION
\n", + "
4 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DISTRICT
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
REPORTING_AREA
\n", + "
6 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
SHOOTING
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
OCCURRED_ON_DATE
\n", + "
8 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
YEAR
\n", + "
9 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
MONTH
\n", + "
10 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DAY_OF_WEEK
\n", + "
11 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
HOUR
\n", + "
12 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
UCR_PART
\n", + "
13 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
STREET
\n", + "
14 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Lat
\n", + "
15 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Long
\n", + "
16 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Location
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
AAA
\n", + "
18 (int)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070945\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 619\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Larceny\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LARCENY⋅ALL⋅OTHERS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " D14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 808\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-02⋅13:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Sunday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅One\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LINCOLN⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.35779134\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.13937053\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.35779134,⋅-71.13937053)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070943\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1402\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Vandalism\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " VANDALISM\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 347\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-08-21⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Tuesday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Two\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " HECLA⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.30682138\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.06030035\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.30682138,⋅-71.06030035)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070941\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3410\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Towed\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " TOWED⋅MOTOR⋅VEHICLE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " D4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 151\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅19:27:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CAZENOVE⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.34658879\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07242943\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.34658879,⋅-71.07242943)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070940\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3114\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Investigate⋅Property\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " INVESTIGATE⋅PROPERTY\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " D4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 272\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:16:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NEWCOMB⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.33418175\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07866441\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.33418175,⋅-71.07866441)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070938\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3114\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Investigate⋅Property\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " INVESTIGATE⋅PROPERTY\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " B3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 421\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:05:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " DELHI⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.27536542\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.09036101\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.27536542,⋅-71.09036101)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070936\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3820\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Motor⋅Vehicle⋅Accident⋅Response\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M/V⋅ACCIDENT⋅INVOLVING⋅PEDESTRIAN⋅-⋅INJURY\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 398\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:09:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " TALBOT⋅AVE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.29019621\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07159012\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.29019621,⋅-71.07159012)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070933\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 724\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Auto⋅Theft\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " AUTO⋅THEFT\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " B2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 330\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅21:25:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 21\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅One\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NORMANDY⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.30607218\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.0827326\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.30607218,⋅-71.08273260)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070932\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3301\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Verbal⋅Disputes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " VERBAL⋅DISPUTE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " B2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 584\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅20:39:37\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LAWN⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.32701648\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.10555088\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.32701648,⋅-71.10555088)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070931\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 301\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Robbery\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ROBBERY⋅-⋅STREET\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 177\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅20:48:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅One\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " MASSACHUSETTS⋅AVE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.33152148\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.07085307\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.33152148,⋅-71.07085307)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " I182070929\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3301\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Verbal⋅Disputes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " VERBAL⋅DISPUTE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " C11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 364\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018-09-03⋅20:38:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2018\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Monday\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Part⋅Three\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " LESLIE⋅ST\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 42.29514664\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " -71.05860832\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " (42.29514664,⋅-71.05860832)\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 319,073 rows / 18 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.cols.set(\"AAA\",value= 1).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d1\n", + "d2\n", + "c1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 235, in dump\n", + " return Pickler.dump(self, obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 437, in dump\n", + " self.save(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 786, in save_tuple\n", + " save(element)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 378, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 840, in _batch_appends\n", + " save(x)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 378, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 840, in _batch_appends\n", + " save(x)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 378, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 840, in _batch_appends\n", + " save(x)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 378, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 843, in _batch_appends\n", + " save(tmp[0])\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 372, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 840, in _batch_appends\n", + " save(x)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 378, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 840, in _batch_appends\n", + " save(x)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 378, in save_function\n", + " self.save_function_tuple(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 529, in save_function_tuple\n", + " save(closure_values)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 816, in save_list\n", + " self._batch_appends(obj)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 843, in _batch_appends\n", + " save(tmp[0])\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 549, in save\n", + " self.save_reduce(obj=obj, *rv)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\", line 804, in save_reduce\n", + " save(state)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 504, in save\n", + " f(self, obj) # Call unbound method with explicit self\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 856, in save_dict\n", + " self._batch_setitems(obj.items())\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 882, in _batch_setitems\n", + " save(v)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\pickle.py\", line 524, in save\n", + " rv = reduce(self.proto)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\", line 1257, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\", line 63, in deco\n", + " return f(*a, **kw)\n", + " File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\", line 332, in get_return_value\n", + " format(target_id, \".\", name, value))\n", + "py4j.protocol.Py4JError: An error occurred while calling o79.__getstate__. Trace:\n", + "py4j.Py4JException: Method __getstate__([]) does not exist\n", + "\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)\n", + "\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)\n", + "\tat py4j.Gateway.invoke(Gateway.java:274)\n", + "\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n", + "\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n", + "\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n", + "\tat java.lang.Thread.run(Thread.java:748)\n", + "\n", + "\n" + ] + }, + { + "ename": "PicklingError", + "evalue": "Could not serialize object: Py4JError: An error occurred while calling o79.__getstate__. Trace:\npy4j.Py4JException: Method __getstate__([]) does not exist\r\n\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)\r\n\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)\r\n\tat py4j.Gateway.invoke(Gateway.java:274)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\n", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mPy4JError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36mdump\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 234\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 235\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mPickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 236\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mRuntimeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36mdump\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mframer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstart_framing\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 437\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 438\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mSTOP\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_tuple\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 785\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0melement\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 786\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0melement\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 787\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 378\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 379\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 839\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtmp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 840\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 841\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPENDS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 378\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 379\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 839\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtmp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 840\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 841\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPENDS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 378\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 379\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 839\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtmp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 840\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 841\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPENDS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 378\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 379\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 842\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 843\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtmp\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 844\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPEND\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 371\u001b[0m or themodule is None):\n\u001b[1;32m--> 372\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 373\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 839\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtmp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 840\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 841\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPENDS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 378\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 379\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 839\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtmp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 840\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 841\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPENDS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function\u001b[1;34m(self, obj, name)\u001b[0m\n\u001b[0;32m 377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mklass\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 378\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_function_tuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 379\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_function_tuple\u001b[1;34m(self, func)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__module__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure_values\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTUPLE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_list\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 815\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 816\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_appends\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 817\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_appends\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 842\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 843\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtmp\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 844\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mAPPEND\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[1;31m# Save the reduce() output and finally memoize the object\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_reduce\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0mrv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36msave_reduce\u001b[1;34m(self, func, args, state, listitems, dictitems, obj)\u001b[0m\n\u001b[0;32m 803\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstate\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 804\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 805\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBUILD\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 504\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Call unbound method with explicit self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 505\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave_dict\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 855\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmemoize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 856\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_batch_setitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 857\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36m_batch_setitems\u001b[1;34m(self, items)\u001b[0m\n\u001b[0;32m 881\u001b[0m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 882\u001b[1;33m \u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 883\u001b[0m \u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mSETITEMS\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\pickle.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, obj, save_persistent_id)\u001b[0m\n\u001b[0;32m 523\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 524\u001b[1;33m \u001b[0mrv\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproto\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 525\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m-> 1257\u001b[1;33m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[0;32m 1258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[1;34m\"An error occurred while calling {0}{1}{2}. Trace:\\n{3}\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 332\u001b[1;33m format(target_id, \".\", name, value))\n\u001b[0m\u001b[0;32m 333\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mPy4JError\u001b[0m: An error occurred while calling o79.__getstate__. Trace:\npy4j.Py4JException: Method __getstate__([]) does not exist\r\n\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)\r\n\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)\r\n\tat py4j.Gateway.invoke(Gateway.java:274)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\n", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mPicklingError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mwraps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0msetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\extension.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, name, infer, mismatch, stats)\u001b[0m\n\u001b[0;32m 482\u001b[0m \u001b[0mstats\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 483\u001b[0m \u001b[0mformat\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"json\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 484\u001b[1;33m mismatch=mismatch)\n\u001b[0m\u001b[0;32m 485\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 486\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mComm\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mdataset\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format, mismatch)\u001b[0m\n\u001b[0;32m 367\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcols_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 368\u001b[0m output_columns = self.columns_stats(df, calculate_columns, buckets, infer, relative_error, approx_count,\n\u001b[1;32m--> 369\u001b[1;33m mismatch)\n\u001b[0m\u001b[0;32m 370\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 371\u001b[0m \u001b[1;31m# Update last profiling info\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_stats\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, mismatch)\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 443\u001b[0m \u001b[1;31m# Get columns data types. This is necessary to make the pertinent histogram calculations.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 444\u001b[1;33m \u001b[0mtype_details\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_count_data_types\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmismatch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 445\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 446\u001b[0m \u001b[1;31m# Count the categorical, numerical, boolean and date columns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36m_count_data_types\u001b[1;34m(self, df, columns, infer, mismatch)\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_columns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m \u001b[0mcount_by_data_type\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount_by_dtypes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minfer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmismatch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmismatch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 69\u001b[0m \u001b[0mcount_by_data_type_no_mismatch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdeepcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcount_by_data_type\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[1;31m# Info from all the columns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mcount_by_dtypes\u001b[1;34m(columns, infer, str_funcs, int_funcs, mismatch)\u001b[0m\n\u001b[0;32m 1872\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mflatMap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1873\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtypes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr_funcs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint_funcs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1874\u001b[1;33m .reduceByKey(lambda a, b: a + b))\n\u001b[0m\u001b[0;32m 1875\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1876\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36mreduceByKey\u001b[1;34m(self, func, numPartitions, partitionFunc)\u001b[0m\n\u001b[0;32m 1638\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'a'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'b'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1639\u001b[0m \"\"\"\n\u001b[1;32m-> 1640\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcombineByKey\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnumPartitions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpartitionFunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1641\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1642\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreduceByKeyLocally\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36mcombineByKey\u001b[1;34m(self, createCombiner, mergeValue, mergeCombiners, numPartitions, partitionFunc)\u001b[0m\n\u001b[0;32m 1878\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1879\u001b[0m \u001b[0mlocally_combined\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmapPartitions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcombineLocally\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpreservesPartitioning\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1880\u001b[1;33m \u001b[0mshuffled\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlocally_combined\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpartitionBy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumPartitions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpartitionFunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1881\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1882\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_mergeCombiners\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36mpartitionBy\u001b[1;34m(self, numPartitions, partitionFunc)\u001b[0m\n\u001b[0;32m 1815\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mSCCallSiteSync\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mcss\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1816\u001b[0m pairRDD = self.ctx._jvm.PairwiseRDD(\n\u001b[1;32m-> 1817\u001b[1;33m keyed._jrdd.rdd()).asJavaPairRDD()\n\u001b[0m\u001b[0;32m 1818\u001b[0m jpartitioner = self.ctx._jvm.PythonPartitioner(numPartitions,\n\u001b[0;32m 1819\u001b[0m id(partitionFunc))\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36m_jrdd\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 2487\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2488\u001b[0m wrapped_func = _wrap_function(self.ctx, self.func, self._prev_jrdd_deserializer,\n\u001b[1;32m-> 2489\u001b[1;33m self._jrdd_deserializer, profiler)\n\u001b[0m\u001b[0;32m 2490\u001b[0m python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), wrapped_func,\n\u001b[0;32m 2491\u001b[0m self.preservesPartitioning)\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36m_wrap_function\u001b[1;34m(sc, func, deserializer, serializer, profiler)\u001b[0m\n\u001b[0;32m 2420\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0mserializer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"serializer should not be empty\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2421\u001b[0m \u001b[0mcommand\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mprofiler\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdeserializer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mserializer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2422\u001b[1;33m \u001b[0mpickled_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbroadcast_vars\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mincludes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_prepare_for_python_RDD\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2423\u001b[0m return sc._jvm.PythonFunction(bytearray(pickled_command), env, includes, sc.pythonExec,\n\u001b[0;32m 2424\u001b[0m sc.pythonVer, broadcast_vars, sc._javaAccumulator)\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36m_prepare_for_python_RDD\u001b[1;34m(sc, command)\u001b[0m\n\u001b[0;32m 2406\u001b[0m \u001b[1;31m# the serialized command will be compressed by broadcast\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2407\u001b[0m \u001b[0mser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCloudPickleSerializer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2408\u001b[1;33m \u001b[0mpickled_command\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2409\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpickled_command\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m \u001b[1;33m<<\u001b[0m \u001b[1;36m20\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# 1M\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2410\u001b[0m \u001b[1;31m# The broadcast will have same life cycle as created PythonRDD\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\serializers.py\u001b[0m in \u001b[0;36mdumps\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 566\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 567\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 568\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mcloudpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 569\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 570\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36mdumps\u001b[1;34m(obj, protocol)\u001b[0m\n\u001b[0;32m 916\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 917\u001b[0m \u001b[0mcp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCloudPickler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 918\u001b[1;33m \u001b[0mcp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 919\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 920\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetvalue\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\cloudpickle.py\u001b[0m in \u001b[0;36mdump\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[0mmsg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"Could not serialize object: %s: %s\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0memsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 248\u001b[0m \u001b[0mprint_exec\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 249\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mpickle\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPicklingError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 250\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 251\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mPicklingError\u001b[0m: Could not serialize object: Py4JError: An error occurred while calling o79.__getstate__. Trace:\npy4j.Py4JException: Method __getstate__([]) does not exist\r\n\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)\r\n\tat py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)\r\n\tat py4j.Gateway.invoke(Gateway.java:274)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_name': 'foo.csv',\n", + " 'transformations': {'actions': {'rename': {'id': 'id',\n", + " 'firstName': 'firstName',\n", + " 'lastName': 'lastName',\n", + " 'billingId': 'billingId',\n", + " 'product': 'product',\n", + " 'price': 'price',\n", + " 'birth': 'birth',\n", + " 'dummyCol': 'dummyCol'}}}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.replace(\"product\", search=[\"piza\",\"pizzza\"], replace_by=\"pizza\", search_by=\"words\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_name': 'foo.csv',\n", + " 'transformations': {'actions': {'rename': {'id': 'id',\n", + " 'firstName': 'firstName',\n", + " 'lastName': 'lastName',\n", + " 'billingId': 'billingId',\n", + " 'product': 'product',\n", + " 'price': 'price',\n", + " 'birth': 'birth',\n", + " 'dummyCol': 'dummyCol'},\n", + " 'replace': ['product']}}}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"null\": 0, \"numeric\": 0, \"categorical\": 0, \"binary\": 0, \"array\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 11, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 57.89}, \"frequency\": [{\"value\": \"pizza\", \"count\": 6, \"percentage\": 31.58}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.3 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"pizza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdxb4GQ0f6Xm0t86jzFilxQtbylD-QjxANcVLCDLEg5GJ0KGNZIo_Hf_zTQL9BExQJXRq1On7mALDOz7QER5R0no0tX4JYzdhRBSA6G8FjypleE3OtB2MBRv8EIL5Np8uE4fwXtkac85YsSHyCod5qvBrayqoJ4dtAmqNRaCE7zAkMTl1jXfkB7z5CV_BFg6YTmLfW-eE0_yRHRdC1KWa9YxCsQDJ_LDjlh0Yr9DDDMyeVb53RaUEkc9quHnWUiPFUGS0-KvIseIvJwpzy8Kwy9HGyhzww0LYtWxItXKT_KWtwNR1QLMJSMEILZpGwc1HOtIHLIX5lxeTgZ2XM_PttcKXsCEhpCGdGp2XTpHLTQIHBR2Q-wl2gDwHXp30vP8cEFctMZgqYJtx1QYWShV8vKBuen3m6q09gHCSweksVpdBLkVtWTib5_HRL7SWTPZN_B5Qre5i_zjl2A8Ur-5nsyloFp0r3Ky16Zi5CSe97VsF7o4mPBYP-pr8kdpoxVfQGRXM7_N7U2x3MMwL2qewwPNfl2BMbdhHQKldCsg8SzKfCNsAr_Z259Fjh_4yi-W4wCCZ8VgTtyZFDQ7QHNmsO196oA9JKwU2HRqb0U27xPCsCGLFx26A5Fb-vLtEnIkokigKmKX1Zokj934rMSZsRl7F-qHBv6qhrJ9TgrCL2yB6MZnNys1rARcP9NPAlvWSzIItxq1jEMZmkCACp0iEGAm1nYdLZXTmktRLObTidNoX_YDt6ezyB33yjtf09bExmCoCgXwBe6OiAbLG-I5VFf-foqaElIXo-Yhi9dilwFgtUyU6ptQ8guvrF7t6P1lABf29Z4MhNcbZ3FxuOkPxlp8RclHNsIgk51JwbCyq8spwpGa0Aisioe2vhxjDPdmyx-TZ0Bnceh74WegCWuppEZTBJOhzSjn3An81JaVjhLyli_KTzeQz7o9PqTrjUBS0E3UGZSKVEBBUcCXXq134yGjlMhZs1Frjeu2njAQVDjrVpBOv1za4DWv-U269efTzPimDspAhvV9pabj5um9Ob_QtMSSolv7Kxs_m9DClHh-mxTAUfyOnSJou3gd4VS0N9lIdWGoLSItRdyOXBL15HSPimvJX2M7qD_sEP3ZS3Al-Rnzccm5riWiiKqlSu8fpavNbTcnDznimeBe2wxyiA7Apou_0Pii03nkbTVCrE9Q04zLF1CyXjLc1-IX3HX5dLqFVLkQDhsgeRXapmJjYud1zZzvVchrCSl7uJIvmoBIp09WSp5zAtumfLZ2q0-ScYT8_0zS0aWrbMhzt4ytvhodKqQa3G63xUexvsKbe3dviMrXXS2CS13qadIL4gMMojNVeowMffV65G_-MaIPGRR6SmLqPCkETk1OnQBynHlD5nz-zcf2AIeo2LIN899_C_v7MqoE_4kC5t8iCxJtxT6kubNk3R3RGk28tDAdgpqHgJGU1eLmjtv3w1IS-VmEG8Jq5NRiRoE6kwPKaNn4UZv4vlpi-1CNzQxCidi4uXxpZTVezLS1WJJ5iPz6PZQ4Kalr7QX6qn8U2FIUOZzuetlyxnSEtwLvNt0VGvnN32l9DY32vndCas9znHl-6Yt7iEd0I95PZFg5M4VLGir7a85ioot08CaDkpREpAx0m3e933jmI5i-DDnvAOMPWYPEiSsAO8L5SwuIrngqt2yELivRNiacdqVBlx8ReNIDVJKfxH4JQzztpLo8H-uh_375-KSPkc0JTmGsZQF7hTa5juugiQYl3GO4cN8En-_WW3fAI0DF_gHfzJosYk5qUuOMOUyxvV_1IZcoV8UyVLLvk6Ol_pYWCyiSu7rOHM_EkL0JDpz06KF_OMZWVR3xpvOnjX3wDFoLXRA57EA4DOR_tO5DMS9CX8waxEiKvYaHLJxZdMi1AUnRhduN7K3b0-d8H1n3ZPNR7IhKMX35EgZa9dsC96U1fyl6Xp2oi6yctLazKZqU-F5jFDHcuFFvNF_fHEwspzCMAqmQ4XMHIpQXmSUQzOrzlAtUi3BjlZc5VfrhwA9E-KmwcBBBDz7DpSlZkgyuHDRwkIbUb09hSsOQR4fESALEWtMc7RqZmeHVxhd8Gb_NxdLLaZ0qq0f_wA6IkM3eAJV-8RIw04s6L_p68WQP0vIxfWPL_YKd36EryAMfXHhlLzD8lX5yGrDYTTPQcrmlphMlGbWuBOm8RTTRrqYZ0y4IPkolCT5d8WBgu0ZZ5HPtb3dw3I7TT5c2QCFXTm8IfP8lB6B1440fPVcG_oDYKb8FIqPwdEWfOSYt5qIyIIXitjNQJVIR4dtc5WfwYev9CHk4UoJrenR2r4hnZ11Lzy_gfgoiVzoYcpbyf1vYqTKjgcRU3HFZp2trMcfmoIRMRBAsIFO6GtBXWekGQuzDas9wEm22ieR9tR2ZJ-vBSKkrQeURwLoPJ-9dy56abgo_7FK89liumMglyAIXgZarEvYiHGZ8QRMe8p4Y5WXjid-l1hHYl3EdEx7R4ab6v36_b5TVk0Blz86xEgystXOFiYG29AAhsgIpRFDu7Y5fhizp2YSQWh0XosPbDcXrBIdgUbe30-POR6QE1NKmfU7aVv5fxMHZerWMrFdQRj-Bibp5oqemkrewzXH-0UFhHin7dONPh-zPt7PQN1AR6T6M878D1bhVNw1JdZWRv9C4pDYidCxtost6mMdA3aFVDCuPwLZYFpoE3E5stnPiMBQ8tSOQIxBcfh7VY2iHm5vpi7eV8ZXKHcJRYwXtsqXPFvsKw5JapgzO6i5jG9pJpB3Hqja43___h2VeWefwX-OGuzLBjbRfuUnHS2sapfQ4-Ze8n8Z8Y4ZbV97hLyR3kjfSaYWAGwEOAg173iQ169y2fuFsy8V2Vv-SYu5ktNrvLAoZ-mBC51goF8y4CQeoDV3hQJerse26dFLGuQ3Pqgz5imPlAv54oAgdNVA-atXlpp4YcHhxUSE7QPiFnosYm0e9c-W_uftVkIchXfN1-MkAO9tgEUEjgHgu34LhOHpkqdjr3Iav_xqpODV9nymdHTc0HI6ZDAQasHKKHo99Jpr9fT7QmO7FlV7EnxC8a-80GOAyPnHlxYjsVZF6hAIOuqWZZj8kog1U6QanIHsuFJkx5XyA3_I63_ky6N1Obs6z_Bhcey3-l-DEqkr41z-TQvNy9VT2zJvUpKpMTw_xPhXOMuHqZHbPfNaz4Ru5bQeBOhazWk3DSNSPH64jY-eU1jMl_4MJwfSTBBm7CMV7-iN6AYyCDjZymQKFHQpyZoHh6I9v1jWFGowBjoA1wJCx8kBJf4W1ZpNLBbL2dX8Qv__tNEdowVN2NyU-bHwNMB7Opvj_RuW_m5SX9Fqwr--autdPqXMB06uJdIRVOhMnEX_-ANXf6wRCEOuYbyzAoHclBVNoUgawI4Ci82SJJO4Rf0TE7WtdVFf61OHXQ0tQeMF-b1B3W8T84iDiXfL_wP8d21r3M5YikRw616mela7eqRMhHuArc8yb4gr_T2ilLjzvRpJF12PgtkmEmmT6iIHqQiQ-rTo5rvP13JdYCdN18ELnMfduLh6JaLQ4GLe4WvZPeWUj57VfPVtaMMNqXXSFpn3mB7N6Jn5eaq9oVaNgb0HUVMoeLgpBVx8q5v3X2WwjArCei0U5bHOmEMLKKrwZbbLmQf2uKXy5asHKtcTX-6mXODwq6Bm6bQ3Gk3qk3tfbXvGymLeMM_SvSXz-uQVhJD5DlxTdhVprgJp886pE5PDWIod9ZvzJF_J6Wbb2DO2KyESUPYKlc9AbWfUP9y4XaUf37GzXD25X8o65hTrd9P4zZtmxIe6_LzGmyVlhZdzhDUeLOYquvtCZo-dXhgqiy1Wx6TPDUyQTCcwX72d1t9sE5EGvC6PeY3wF-nFzXnbpEN995m7NZUbdRF3BotgNRHztzGDftYMElU3wczw7j8EnSRHyhbiVnW2cpp0gt3skDv49vayeAox1rWpPcOkluKpRC24ycDoE47l_GNmk9MaM9w7Qd_dNiAhu29PRGFz44pog4S40zriMmqVcHhzVhitIiQxtfQljchFxXd4VgegbrUGLYGhESosWAI6mYHiLFVeLITb0tjg0U9Cm4ALe6Bfwf5uQi2xj1yH_VNqm2hohZmsVZs1yhQeMmrGmckoBn722z1nCIzIt24RCEd-UdF2IFWcOEqiBgA7whMryO0dkhdPrCHSlZfLV2UZHQc2kZFdV2MgVM-Mpcw44VC6hk65Y6Xhv2gB2NswJKifIEm4KB9FEOWSyWrqOV97-HSacbaNA2z2QjwbcCgTg9uF7BdBnVjkS-ADB6m1vk8w9YwMTLpRg9iD7faiDSIIQ_iSi40lTVlBpmMqz3ObKF0dAWyqQQ-22q_p04y_QfxsgcjQ_Zt2b7uyYHrzyx8dtmYnsebCAU82KS7pAkY_20idZmp1-D4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'product_110790' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_BEER' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Cake' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Rice' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_arepa' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_hamburguer' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_null' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pasta' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pizza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taaaccoo' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taco' with function func_col_exp\n" + ] + } + ], + "source": [ + "df =df.cols.values_to_cols(\"product\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {'values_to_cols': ['product_110790',\n", + " 'product_BEER',\n", + " 'product_Cake',\n", + " 'product_Rice',\n", + " 'product_arepa',\n", + " 'product_hamburguer',\n", + " 'product_null',\n", + " 'product_pasta',\n", + " 'product_pizza',\n", + " 'product_taaaccoo',\n", + " 'product_taco']}}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['product_Cake', 'product_pasta', 'product_taco', 'product_null', 'product_BEER', 'product_hamburguer', 'product_arepa', 'product_110790', 'product_taaaccoo', 'product_pizza', 'product_Rice']\n", + "INFO:optimus:Batch Histogram 0. Processing columns['product_Cake', 'product_pasta', 'product_taco', 'product_null', 'product_BEER', 'product_hamburguer', 'product_arepa', 'product_110790', 'product_taaaccoo', 'product_pizza', 'product_Rice']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36m_load_from_socket\u001b[1;34m(sock_info, serializer)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m15\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 151\u001b[1;33m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msa\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 152\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mConnectionRefusedError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mwraps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0msetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\extension.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, name, infer, mismatch, stats)\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[0mstats\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[0mformat\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"json\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 467\u001b[1;33m mismatch=mismatch)\n\u001b[0m\u001b[0;32m 468\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 469\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mComm\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mdataset\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format, mismatch)\u001b[0m\n\u001b[0;32m 364\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcols_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 365\u001b[0m output_columns = self.columns_stats(df, calculate_columns, buckets, infer, relative_error, approx_count,\n\u001b[1;32m--> 366\u001b[1;33m mismatch)\n\u001b[0m\u001b[0;32m 367\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 368\u001b[0m \u001b[1;31m# Update last profiling info\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_stats\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, mismatch)\u001b[0m\n\u001b[0;32m 482\u001b[0m \u001b[0massign\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"frequency\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfreq\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 483\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 484\u001b[1;33m \u001b[0mcol_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"stats\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextra_columns_stats\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstats\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 485\u001b[0m \u001b[0massign\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"name\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 486\u001b[0m \u001b[0massign\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"column_dtype\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"columns\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'dtype'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mextra_columns_stats\u001b[1;34m(self, df, col_name, stats)\u001b[0m\n\u001b[0;32m 593\u001b[0m \u001b[0mmad\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 594\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmad\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 595\u001b[1;33m \u001b[0mcol_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mad'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 596\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 597\u001b[0m \u001b[0mcol_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mad'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mmad\u001b[1;34m(columns, relative_error, more)\u001b[0m\n\u001b[0;32m 704\u001b[0m \u001b[0mmedian_value\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 705\u001b[0m \u001b[0mmad_value\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwithColumn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mF\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mmedian_value\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 706\u001b[1;33m \u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 707\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 708\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmore\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mmedian\u001b[1;34m(columns, relative_error)\u001b[0m\n\u001b[0;32m 670\u001b[0m \"\"\"\n\u001b[0;32m 671\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 672\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mformat_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpercentile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m0.5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 673\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 674\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0madd_attr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mpercentile\u001b[1;34m(columns, values, relative_error)\u001b[0m\n\u001b[0;32m 682\u001b[0m \"\"\"\n\u001b[0;32m 683\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 684\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0magg_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpercentile_agg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 685\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 686\u001b[0m \u001b[1;31m# Descriptive Analytics\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36magg_exprs\u001b[1;34m(columns, funcs, *args)\u001b[0m\n\u001b[0;32m 603\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 604\u001b[0m \"\"\"\n\u001b[1;32m--> 605\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mexec_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 606\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 607\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0madd_attr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mexec_agg\u001b[1;34m(exprs)\u001b[0m\n\u001b[0;32m 624\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0magg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 625\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 626\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_col_names_funcs_to_keys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 627\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 628\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mwraps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0msetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\extension.py\u001b[0m in \u001b[0;36mto_dict\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 58\u001b[0m \"\"\"\n\u001b[1;32m---> 59\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mcollect_as_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\functions.py\u001b[0m in \u001b[0;36mcollect_as_dict\u001b[1;34m(df, limit)\u001b[0m\n\u001b[0;32m 107\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 108\u001b[0m \u001b[1;31m# Because asDict can return messed columns names we order\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 109\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcollect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 110\u001b[0m \u001b[0m_row\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcollections\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOrderedDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\dataframe.py\u001b[0m in \u001b[0;36mcollect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mSCCallSiteSync\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mcss\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[0msock_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcollectToPython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 467\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_load_from_socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msock_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mBatchedSerializer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mPickleSerializer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 468\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 469\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mignore_unicode_prefix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36m_load_from_socket\u001b[1;34m(sock_info, serializer)\u001b[0m\n\u001b[0;32m 149\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m15\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 151\u001b[1;33m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msa\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 152\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 153\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "AQUI" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.replace(\"product\", search=[\"piza\",\"pizzza\"], replace_by=\"pizza\", search_by=\"words\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['product']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['product']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"string\": 1, \"categorical\": 0, \"date\": 0, \"array\": 0, \"null\": 0, \"numeric\": 0, \"binary\": 0}, \"total_count_dtypes\": 1, \"dtypes_list\": [\"string\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"47.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"pizza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdxa5nZ0TVQTxyT9DTaFmfPGIi_VqucIsaVJQb1nyO1DI8EnTGVjFjJvisCg2yh8PZ-xhcvILiuKcnGmCR0XOCz5WrbjDm8SaRGWj4L4F6i5zzlGvE5M0zhBGlK4Y9YDnDBuZc_iM_Kpx1ROtwQr-veRV0TEVtyy4EoSnJ16aTY4tly5j1XfJHYB0bWn6OXt8K5JeCxaPTNlJZRYwoOFPWHitLDDt8U3guruKndZ5c9w-dh5rkK0BmnyEjA7G_uANe_o4Ysf7-V4aaNtIWvO0SRX9mLEbznvsMbV6Avpu5MF3DG_MUKQnXZu5RpcaGnNwC5sRf-hiCYGRt4bz_Yxc7uex6WANBDC8rixgQ4aJtsjdTnIJA8PcSndoe0VXeTu8AhSSPuxIlDCENQy2M85Z488-LT4L3IBklyfy7yLMfATr8_zYmUSJjW8sHFaiRR_amyRRDJeN37sUD0C4BYxYbTJcr5VUPF338Z8WJa1sIWzz-aqtrEmnDpMwx4i0VeaMpmcKV9nQ4kP8d607jSrCkAqfZBAG8LIib2g1GaWZLACJZ7awKRH3gRERarVLWpgR0MLLi0UjjX_BDP_nC5onVgMUTmwjHuodspWTpatxVMvF1f2xfEuhOMil0isjTy82kkVPl_gLsdiVSALJtpCf_Q-vGOsOr918pRs2_XicBdNt9EsCmY_d59j7XCZOQSnzXuWkJWK5Q7nmKAjg5IyK21XuSzWNOewk34aM7jNWQJPTvVhQm2wonZB9aRsB784NZU2LOMKEf_7hRLvOUYF4TuyhuwUXSsc4EfiYCPU9Ng7uVC_TK2HFz72NujvdVkOWyeWcK8R_McvRwf8yIabRzT3CxgkxNxD7SeWBborP0EdEsKK40OKX8Bn2SPaFT98EIgLd-XrgvIt1Dhx9QHUrd16nerLu41UXtrxa4pThpPUbGuPRX9kQsOGTn9EmuO0QIJ7Gxps2fTmX5TqG_w4A7NYSRnPK91lVT71RXOOr2YpN4OWG1fvnbtwBxmBBlt_XuhHtsM-w54Zwe9S1_sYw4Lx_kfvabI6340cTm4X4yxHJkm2ZGtpB-eKt8t-myQyGhfvyeH4LKnEvt5edkgDVwEYJL4-7ZZ9GwPv5-8rSimnjesCqRvbGJBA0OXJCGP51t0Utn8gQluLbjk3Mz8A84cSZw4d2SnVI3lRRw_cmUzDW7hq9JElxDTii47QINsuml3tBM12eWxvD3flQ5i5ButiVZERL27xvf1U0GsCQ0Jav4jAv1qOj42wZRYq5WNRNJdDt_6MgNKBW81ARbRZwHzvHkfGpjRBOuUIVfL7p1b3bBkwD7BJY9xs_41-LekJsypXbsOWiJ08nFglhprYfv5TqvX39r_FZ2r16en54lhLyL-Z-59utrzIN1I1T2T0DcwdIxS0u3HXEI_izC8CYrqC4_m5GDW37Dt1HhUWC3X-d7dyrteCpFfJCK1EacjZ_rh25y7bs4Znbf8pD3tEPX19c0z5lA_KCMysXJMp3_DUuEihdsl9lJNYDjL3T6xAcmD_4Y-dI9t_jAEecjaHvq4CCiTwPaCLYe9lI9hv0C-svFK7-LnAjKW9avtymwPqAEwdLPvmresptxFDfRq8kpSNXl7HxXTI4R7zWTX-NjinFryQi0yrl4JR4gR8nuAFJzC80ay87iZsrVlvUjpIx426_eONhhR4Jfu6PwdjJDBo6Z6FsUfJSOTr23HSe9Ac4VWkOcJD6Bw8vF21dLC9owyOVXlo_0V8edeBiabSOypwL5jN592AGta9CtnI-23_3hM-Ue_C_RnTkaYu0v-k6a_y5hvrNsatRQZv7OeFpWqmMkBwOXNRGRJd6z3DRXoT9fFTrzwhZfDVecMhlPaCuBB7lUzFKx0NiMQ5LaS3txgkyTPRvApcp80D2tCSr0elHovC_AG_XMwu_reskDDGcIvw3IjgEWsJ7VEU1sDlOsoNyZ5GRcD2ynmKsEKA0K3BwwZeFzxJmPmaDnS0u93lNmwiH9Zw5yMGCkV0k2tVwmH5r40LXsgdtXGkZ86KRoIP9GaeMzE5rl1u42Z1l6eD1ZJcrlNTY3WNOiSRVaakhoNCbW1Is7uLp8EAOI0K9m7o-X-wbn3xHNMDOqspTGVDS0OmsQI30P1rmyJsBLOBrdis-JVuBi7yNzrRkJpK2komGdzZtFOnwZgYHtCQUbh4X_9YQDmMfs1jcm5QkHIonIWQzIOIwU0UDKNKjuKyHdKnSRUvuLO4az7wMw_YDsic4R0jxCuBN4RfOiuzxI7FttHPmF5TN2hW5zN_7T6hXj8jaE40PKeD4umg86LVVIc2RQtMo1RXEuF6imzL5MzV5UHH5kcQd_YHFvQRMhb52U7kQbK6oPuArEyxbYwvGacU6cxegVLNAISr-GEv1PLERIlZ9pk9KSjPadkW3PpPEb2I6EiOH8YqQMO7qetugS2mO0Y_Q6_kMMHo2wCI0jQE69unHwsDN-SMGu3X2DJHRopHIcFn_7L5nchWn31wafFimr8Hbk3CDBa_EkRXANxzuxOX9eT5T-PYO_XYcSKnwVeo027y904sAT42EOZXn82_W7GKvEoI1FWdsdiSOk-qm8HKCCDo_dsj027z23H0liOi8N9-FOi5cU-BRVPyOIOipjeBRQ_52QnK-rpiaykWjm_jWgRPrxxD2skv8dEZ80-NnZrAnjbVzZZcxriDSN8JoqFHo-OEPd2SQLp61ODQbCyGs6YaN7lfQrNybbay3XNjlMJ1WWtukopcl5xs7nQG3SgD0wy8kPt8UaCcktp4gtN2LTNWw8Hq5jhLQNUwgv0qfD6sHlJY3_2cMCvir-I6HY48TsaaYWNd_CXb9xVCgMpS9aviSnQxKg5WI4VotnSlaXYbN-xa5DJi1sP5TcDmCDBjkombPIqFIyo7B5YJq1E0XlpRhMD3nDl4VlAlGnj_NSI0urhutnArgAVByUeVvEEtNWXjYgEw57c7Ysna1C5km4g0oTsCORMF4VlRBe913uXWUcQU0aP-w4eRaY9Vi6FZVxKeZ_sLCCF5RKqoKjnycvawaGn41ADHngCImtxhDTVBcjWrC_5nqT7UjT6k62S7eP8AMR7Te0MAg8h1TGGy1XXIHFipaLaJL3YkMz_5lW6DWuXKOxR5nAWpeqbv1qj4FrFk4q_MoIgHcc428TpJAT6grywyjenStt8O7JypWUg9WRulM98hZ78K4c47VYScwqez8-7ewSqInt4liewx9Fp8-TiS6mIFyVWm7zUmrteklWgkazodNi3you9tO8-YDviEu8RHjqvg8Yv9-mhHbS966q2xrtUIRfEMeNiNtY4sU8-xZFrTYyUGpl3Ditc6qz6eo7HJMZIvTJCIFrNXX6iBqKsfhopXcqwNZZ9liea-KdxqphMgTo4gshxmtxb7SRE-4B83jMufW5oJoNw8WDEoAc09R8g6PgzLu5ELUPE7DvxQlAjfF37ruPbUjyITUsfQPaJsXY5j4PFa8yc8HVrC6tZCVI4SOII37iNsjkds2np3zHgtxuhVh43sXqemkCk8NJvELH-aAZtIwE0Oftfa2F2Pth0zRCSxhUVSgOetjlZEHE2yd723vbREw0-Px1qnlFhNh_thFUVh390AsDVpn57_932UjBu2fMY5aO2oz2P58xN9-_EyhjYIOv1ioSI_zomSHLADPjmHcXmv65Ldknt-uKx1Yl7HTTkzdkzj-kOwxZBWchCCneoyKQI0zmVbZ-DK0b8oL3hoc62DC8k2vasI5wNlmeNWuJabZi4J3PlMZ66Az-1Unvgs_CHBocMoOZM-xz9xxv-Q0UOrKX6q3XsJNIApPGBs2nJoPuixjKZNrc0vTLy9e2lKqe4stOnxI73uuSj82gX4nXuj4nixosBawbcaMURSqt2s2X44L6NHf13THpnLu3I7cYiL7HHNA6T9MwN2ji5uBMx0aKDV8b2s7MzXpcZThbu56lObjeOtSKEK7bnufr6H8dvo3EcQawru8ppBoNn2obYyUX97bOzVR_T4WTXyBUuEAF0q8GCO360sp8T1QPaD0Ws3D8F5GsO76cXxC9Mu8g69vHWGr0gK5vqKk8Zw7M3lLGGxdRoLF4GzRc0dwnTK6LC5HxkxcHZN56XwVbGQvMRiWQkl7BV-X_goapi0QjyFq9IUTx4V4IIRb1obkeHEf-zL1CyHyUWytp2qzZsu5CABwCZMVpe8N1rY9L0g2bNA3yUGGT3FlI5UGxbYzniytZy5OW_bq1HwJ_CvjYXmQME0udL8bCIvVrRmvlUaqd5xP-UbHdUbIWi40cDL-0U3CyBBUzQuwgFlfbUSCtfg2RuO4nqwfmqQvBOI0b0U2fXrIijVrGaPDt56j2KRJAtq7AV_4tRMaHCO3awqFOvr_M4cKSHeg=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "invalid escape sequence \\d\n", + "INFO:optimus:Using 'column_exp' to process column 'product_110790' with function func_col_exp\n" + ] + }, + { + "ename": "AnalysisException", + "evalue": "\"Reference 'product_110790' is ambiguous, could be: product_110790, product_110790.;\"", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 327\u001b[0m \u001b[1;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m format(target_id, \".\", name), value)\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling o434.withColumn.\n: org.apache.spark.sql.AnalysisException: Reference 'product_110790' is ambiguous, could be: product_110790, product_110790.;\r\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:213)\r\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveChildren(LogicalPlan.scala:97)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$36.apply(Analyzer.scala:822)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$36.apply(Analyzer.scala:824)\r\n\tat org.apache.spark.sql.catalyst.analysis.package$.withPosition(package.scala:53)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve(Analyzer.scala:821)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4$$anonfun$apply$11.apply(TreeNode.scala:344)\r\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\r\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\r\n\tat scala.collection.immutable.List.foreach(List.scala:381)\r\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:234)\r\n\tat scala.collection.immutable.List.map(List.scala:285)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:333)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve$2.apply(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveReferences$$resolve(Analyzer.scala:830)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$9$$anonfun$applyOrElse$36.apply(Analyzer.scala:891)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$9$$anonfun$applyOrElse$36.apply(Analyzer.scala:891)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:107)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:107)\r\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:106)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:118)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:122)\r\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\r\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\r\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:234)\r\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:104)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:122)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:127)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)\r\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:127)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$9.applyOrElse(Analyzer.scala:891)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$9.applyOrElse(Analyzer.scala:833)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)\r\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)\r\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:288)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:833)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:690)\r\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)\r\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)\r\n\tat scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)\r\n\tat scala.collection.immutable.List.foldLeft(List.scala:84)\r\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)\r\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)\r\n\tat scala.collection.immutable.List.foreach(List.scala:381)\r\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:124)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:118)\r\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:103)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)\r\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)\r\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74)\r\n\tat org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:3296)\r\n\tat org.apache.spark.sql.Dataset.select(Dataset.scala:1307)\r\n\tat org.apache.spark.sql.Dataset.withColumns(Dataset.scala:2192)\r\n\tat org.apache.spark.sql.Dataset.withColumn(Dataset.scala:2159)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mAnalysisException\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues_to_cols\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"product\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mvalues_to_cols\u001b[1;34m(input_cols)\u001b[0m\n\u001b[0;32m 2114\u001b[0m \u001b[1;31m# names = before.cols.names(keys, invert=True)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2115\u001b[0m \u001b[1;31m# print(names)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2116\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpivotDF\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoDF\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mnames\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfill_na\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_names\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2117\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_meta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2118\u001b[0m \u001b[1;31m# df.table()\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mfill_na\u001b[1;34m(input_cols, value, output_cols)\u001b[0m\n\u001b[0;32m 1189\u001b[0m \u001b[0mRaiseIt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtype_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_col\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1190\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1191\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_col\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_cols\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moutput_col\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mActions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mFILL_NA\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1192\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1193\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(input_cols, func, func_return_type, args, func_type, when, filter_col_by_dtypes, output_cols, skip_output_cols_processing, meta)\u001b[0m\n\u001b[0;32m 238\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0minput_col\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_col\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_cols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 240\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwithColumn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput_col\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexpr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwhen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 241\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreserve_meta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_col\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 242\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\dataframe.py\u001b[0m in \u001b[0;36mwithColumn\u001b[1;34m(self, colName, col)\u001b[0m\n\u001b[0;32m 1847\u001b[0m \"\"\"\n\u001b[0;32m 1848\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mColumn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"col should be Column\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1849\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwithColumn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msql_ctx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1850\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1851\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mignore_unicode_prefix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m-> 1257\u001b[1;33m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[0;32m 1258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1259\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 67\u001b[0m e.java_exception.getStackTrace()))\n\u001b[0;32m 68\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'org.apache.spark.sql.AnalysisException: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m': '\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'org.apache.spark.sql.catalyst.analysis'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m': '\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mAnalysisException\u001b[0m: \"Reference 'product_110790' is ambiguous, could be: product_110790, product_110790.;\"" + ] + } + ], + "source": [ + "df = df.cols.values_to_cols(\"product\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "argument of type 'NoneType' is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mwraps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0msetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\extension.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, name, infer, mismatch, stats)\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[0mstats\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[0mformat\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"json\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 467\u001b[1;33m mismatch=mismatch)\n\u001b[0m\u001b[0;32m 468\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 469\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mComm\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mdataset\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format, mismatch)\u001b[0m\n\u001b[0;32m 316\u001b[0m \u001b[0mrenamed_cols\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmatch_renames\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_meta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"transformations.columns\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 317\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcurrent_col_name\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcurrent_col_names\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 318\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0mcurrent_col_name\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrenamed_cols\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 319\u001b[0m \u001b[0mnew_columns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcurrent_col_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: argument of type 'NoneType' is not iterable" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'pandas_udf' to process column 'lastName' with function multiple_replace\n", + "INFO:optimus:Using 'column_exp' to process column 'lastName' with function _trim\n", + "INFO:optimus:Using 'column_exp' to process column 'product_110790' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_BEER' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Cake' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Rice' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_arepa' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_hamburguer' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_null' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pasta' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pizza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taaaccoo' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taco' with function func_col_exp\n" + ] + } + ], + "source": [ + "df = df.cols.rename(\"id\", \"id2\")\n", + "df = df.cols.rename(\"firstName\", \"firstName2\")\n", + "df = df.cols.replace(\"product\", search=[\"piza\",\"pizzza\"], replace_by=\"pizza\", search_by=\"words\")\n", + "df = df.cols.unnest(\"birth\", separator=\"/\", splits=3)\n", + "df = df.cols.remove_special_chars(\"lastName\")\n", + "df = df.cols.trim(\"lastName\")\n", + "df = df.cols.values_to_cols(\"product\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.rename(\"firstName\", \"firstName2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_name': 'foo.csv',\n", + " 'transformations': {'actions': {'rename': {'id': 'id',\n", + " 'firstName': 'firstName2',\n", + " 'lastName': 'lastName',\n", + " 'billingId': 'billingId',\n", + " 'product': 'product',\n", + " 'price': 'price',\n", + " 'birth': 'birth',\n", + " 'dummyCol': 'dummyCol'}}}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName2', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName2', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName2`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName2`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "is cached False\n", + "OUTPUT {'id': {'stats': {'count_uniques': 19, 'min': 1, 'max': 19, 'stddev': 5.62731, 'kurtosis': -1.20667, 'mean': 10.0, 'skewness': 0.0, 'sum': 190, 'variance': 31.66667, 'zeros': 0, 'percentile': {'0.75': 15, '0.95': 19, '0.05': 1, '0.25': 5, '0.5': 10}, 'count_na': 0, 'hist': [{'count': 1.0, 'lower': 1.0, 'upper': 2.0}, {'count': 1.0, 'lower': 2.0, 'upper': 3.0}, {'count': 1.0, 'lower': 3.0, 'upper': 4.0}, {'count': 1.0, 'lower': 4.0, 'upper': 5.0}, {'count': 1.0, 'lower': 5.0, 'upper': 6.0}, {'count': 1.0, 'lower': 6.0, 'upper': 7.0}, {'count': 1.0, 'lower': 7.0, 'upper': 8.0}, {'count': 1.0, 'lower': 8.0, 'upper': 9.0}, {'count': 1.0, 'lower': 9.0, 'upper': 10.0}, {'count': 1.0, 'lower': 10.0, 'upper': 11.0}, {'count': 1.0, 'lower': 11.0, 'upper': 12.0}, {'count': 1.0, 'lower': 12.0, 'upper': 13.0}, {'count': 1.0, 'lower': 13.0, 'upper': 14.0}, {'count': 1.0, 'lower': 14.0, 'upper': 15.0}, {'count': 1.0, 'lower': 15.0, 'upper': 16.0}, {'count': 1.0, 'lower': 16.0, 'upper': 17.0}, {'count': 1.0, 'lower': 17.0, 'upper': 18.0}, {'count': 1.0, 'lower': 18.0, 'upper': 19.0}], 'range': 18, 'median': 10, 'interquartile_range': 10, 'coef_variation': 0.56273, 'mad': 5, 'p_count_na': 0.0, 'p_count_uniques': 100.0}, 'name': 'id', 'column_dtype': 'int', 'dtypes_stats': {'int': 19}, 'column_type': 'numeric', 'id': None}, 'firstName2': {'stats': {'count_uniques': 18, 'min': '((( Heinrich )))))', 'max': 'William', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 94.74}, 'frequency': [{'value': 'Luis', 'count': 1, 'percentage': 5.26}, {'value': 'André', 'count': 1, 'percentage': 5.26}, {'value': 'NiELS', 'count': 1, 'percentage': 5.26}, {'value': 'PAUL', 'count': 1, 'percentage': 5.26}, {'value': 'Albert', 'count': 1, 'percentage': 5.26}, {'value': 'Galileo', 'count': 1, 'percentage': 5.26}, {'value': 'CaRL', 'count': 1, 'percentage': 5.26}, {'value': 'David', 'count': 1, 'percentage': 5.26}, {'value': 'Johannes', 'count': 1, 'percentage': 5.26}, {'value': 'JaMES', 'count': 1, 'percentage': 5.26}, {'value': 'Isaac', 'count': 1, 'percentage': 5.26}, {'value': 'Emmy%%', 'count': 1, 'percentage': 5.26}, {'value': 'Max!!!', 'count': 1, 'percentage': 5.26}, {'value': 'Fred', 'count': 1, 'percentage': 5.26}, {'value': '((( Heinrich )))))', 'count': 1, 'percentage': 5.26}, {'value': 'William', 'count': 1, 'percentage': 5.26}, {'value': 'Marie', 'count': 1, 'percentage': 5.26}, {'value': 'Arthur', 'count': 1, 'percentage': 5.26}, {'value': 'JAMES', 'count': 1, 'percentage': 5.26}], 'name': 'firstName2', 'column_dtype': 'string', 'dtypes_stats': {'string': 19, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}, 'lastName': {'stats': {'count_uniques': 19, 'min': ' GALiLEI', 'max': 'dirac$', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 100.0}, 'frequency': [{'value': 'Alvarez$$%!', 'count': 1, 'percentage': 5.26}, {'value': 'Ampère', 'count': 1, 'percentage': 5.26}, {'value': 'Böhr//((%%', 'count': 1, 'percentage': 5.26}, {'value': 'dirac$', 'count': 1, 'percentage': 5.26}, {'value': 'Einstein', 'count': 1, 'percentage': 5.26}, {'value': ' GALiLEI', 'count': 1, 'percentage': 5.26}, {'value': 'Ga%%%uss', 'count': 1, 'percentage': 5.26}, {'value': 'H$$$ilbert', 'count': 1, 'percentage': 5.26}, {'value': 'KEPLER', 'count': 1, 'percentage': 5.26}, {'value': 'M$$ax%%well', 'count': 1, 'percentage': 5.26}, {'value': 'Newton', 'count': 1, 'percentage': 5.26}, {'value': 'Nöether$', 'count': 1, 'percentage': 5.26}, {'value': 'Planck!!!', 'count': 1, 'percentage': 5.26}, {'value': 'Hoy&&&le', 'count': 1, 'percentage': 5.26}, {'value': 'Hertz', 'count': 1, 'percentage': 5.26}, {'value': 'Gilbert###', 'count': 1, 'percentage': 5.26}, {'value': 'CURIE', 'count': 1, 'percentage': 5.26}, {'value': 'COM%%%pton', 'count': 1, 'percentage': 5.26}, {'value': 'Chadwick', 'count': 1, 'percentage': 5.26}], 'name': 'lastName', 'column_dtype': 'string', 'dtypes_stats': {'string': 19, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}, 'billingId': {'stats': {'count_uniques': 19, 'min': 111, 'max': 992, 'stddev': 280.19735, 'kurtosis': -1.04115, 'mean': 556.0, 'skewness': -0.2137, 'sum': 10564, 'variance': 78510.55556, 'zeros': 0, 'percentile': {'0.75': 812, '0.95': 992, '0.05': 111, '0.25': 323, '0.5': 553}, 'count_na': 0, 'hist': [{'count': 3.0, 'lower': 111.0, 'upper': 159.94}, {'count': 0.0, 'lower': 159.94, 'upper': 208.89}, {'count': 1.0, 'lower': 208.89, 'upper': 257.83}, {'count': 0.0, 'lower': 257.83, 'upper': 306.78}, {'count': 1.0, 'lower': 306.78, 'upper': 355.72}, {'count': 0.0, 'lower': 355.72, 'upper': 404.67}, {'count': 1.0, 'lower': 404.67, 'upper': 453.61}, {'count': 1.0, 'lower': 453.61, 'upper': 502.56}, {'count': 2.0, 'lower': 502.56, 'upper': 551.5}, {'count': 1.0, 'lower': 551.5, 'upper': 600.44}, {'count': 2.0, 'lower': 600.44, 'upper': 649.39}, {'count': 1.0, 'lower': 649.39, 'upper': 698.33}, {'count': 1.0, 'lower': 698.33, 'upper': 747.28}, {'count': 0.0, 'lower': 747.28, 'upper': 796.22}, {'count': 1.0, 'lower': 796.22, 'upper': 845.17}, {'count': 2.0, 'lower': 845.17, 'upper': 894.11}, {'count': 1.0, 'lower': 894.11, 'upper': 943.06}, {'count': 0.0, 'lower': 943.06, 'upper': 992.0}], 'range': 881, 'median': 553, 'interquartile_range': 489, 'coef_variation': 0.50395, 'mad': 230, 'p_count_na': 0.0, 'p_count_uniques': 100.0}, 'name': 'billingId', 'column_dtype': 'int', 'dtypes_stats': {'int': 19}, 'column_type': 'numeric', 'id': None}, 'product': {'stats': {'count_uniques': 13, 'min': '110790', 'max': 'taco', 'stddev': nan, 'kurtosis': nan, 'mean': 110790.0, 'skewness': nan, 'sum': 110790.0, 'variance': nan, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 68.42}, 'frequency': [{'value': 'pizza', 'count': 4, 'percentage': 21.05}, {'value': 'taco', 'count': 3, 'percentage': 15.79}, {'value': 'pasta', 'count': 2, 'percentage': 10.53}, {'value': 'Cake', 'count': 1, 'percentage': 5.26}, {'value': 'piza', 'count': 1, 'percentage': 5.26}, {'value': 'arepa', 'count': 1, 'percentage': 5.26}, {'value': 'taaaccoo', 'count': 1, 'percentage': 5.26}, {'value': 'hamburguer', 'count': 1, 'percentage': 5.26}, {'value': 'pizzza', 'count': 1, 'percentage': 5.26}, {'value': 'BEER', 'count': 1, 'percentage': 5.26}, {'value': 'Rice', 'count': 1, 'percentage': 5.26}, {'value': '110790', 'count': 1, 'percentage': 5.26}, {'value': 'null', 'count': 1, 'percentage': 5.26}], 'name': 'product', 'column_dtype': 'string', 'dtypes_stats': {'string': 17, 'int': 1, 'null': 1, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}, 'price': {'stats': {'count_uniques': 8, 'min': 1, 'max': 10, 'stddev': 2.95285, 'kurtosis': -1.4482, 'mean': 6.05263, 'skewness': -0.22564, 'sum': 115, 'variance': 8.7193, 'zeros': 0, 'percentile': {'0.75': 8, '0.95': 10, '0.05': 1, '0.25': 3, '0.5': 8}, 'count_na': 0, 'hist': [{'count': 2.0, 'lower': 1.0, 'upper': 2.29}, {'count': 4.0, 'lower': 2.29, 'upper': 3.57}, {'count': 1.0, 'lower': 3.57, 'upper': 4.86}, {'count': 2.0, 'lower': 4.86, 'upper': 6.14}, {'count': 0.0, 'lower': 6.14, 'upper': 7.43}, {'count': 6.0, 'lower': 7.43, 'upper': 8.71}, {'count': 2.0, 'lower': 8.71, 'upper': 10.0}], 'range': 9, 'median': 8, 'interquartile_range': 5, 'coef_variation': 0.48786, 'mad': 2, 'p_count_na': 0.0, 'p_count_uniques': 42.11}, 'name': 'price', 'column_dtype': 'int', 'dtypes_stats': {'int': 19}, 'column_type': 'numeric', 'id': None}, 'birth': {'stats': {'count_uniques': 18, 'min': '1899/01/01', 'max': '2000/03/22', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 94.74}, 'frequency': [{'value': '1980/07/07', 'count': 1, 'percentage': 5.26}, {'value': '1950/07/08', 'count': 1, 'percentage': 5.26}, {'value': '1990/07/09', 'count': 1, 'percentage': 5.26}, {'value': '1954/07/10', 'count': 1, 'percentage': 5.26}, {'value': '1990/07/11', 'count': 1, 'percentage': 5.26}, {'value': '1930/08/12', 'count': 1, 'percentage': 5.26}, {'value': '1970/07/13', 'count': 1, 'percentage': 5.26}, {'value': '1950/07/14', 'count': 1, 'percentage': 5.26}, {'value': '1920/04/22', 'count': 1, 'percentage': 5.26}, {'value': '1923/03/12', 'count': 1, 'percentage': 5.26}, {'value': '1999/02/15', 'count': 1, 'percentage': 5.26}, {'value': '1993/12/08', 'count': 1, 'percentage': 5.26}, {'value': '1994/01/04', 'count': 1, 'percentage': 5.26}, {'value': '1997/06/27', 'count': 1, 'percentage': 5.26}, {'value': '1956/11/30', 'count': 1, 'percentage': 5.26}, {'value': '1958/03/26', 'count': 1, 'percentage': 5.26}, {'value': '2000/03/22', 'count': 1, 'percentage': 5.26}, {'value': '1899/01/01', 'count': 1, 'percentage': 5.26}, {'value': '1921/05/03', 'count': 1, 'percentage': 5.26}], 'name': 'birth', 'column_dtype': 'date', 'dtypes_stats': {'date': 19, 'int': 0, 'decimal': 0, 'string': 0, 'boolean': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'date', 'id': None}, 'dummyCol': {'stats': {'count_uniques': 13, 'min': '#', 'max': 'you', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 68.42}, 'frequency': [{'value': 'gonna', 'count': 3, 'percentage': 15.79}, {'value': 'you', 'count': 3, 'percentage': 15.79}, {'value': 'never', 'count': 2, 'percentage': 10.53}, {'value': '#', 'count': 2, 'percentage': 10.53}, {'value': 'give', 'count': 1, 'percentage': 5.26}, {'value': 'up', 'count': 1, 'percentage': 5.26}, {'value': 'let', 'count': 1, 'percentage': 5.26}, {'value': 'down', 'count': 1, 'percentage': 5.26}, {'value': 'never ', 'count': 1, 'percentage': 5.26}, {'value': 'run ', 'count': 1, 'percentage': 5.26}, {'value': 'around', 'count': 1, 'percentage': 5.26}, {'value': 'and', 'count': 1, 'percentage': 5.26}, {'value': 'desert', 'count': 1, 'percentage': 5.26}], 'name': 'dummyCol', 'column_dtype': 'string', 'dtypes_stats': {'string': 19, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"binary\": 0, \"categorical\": 0, \"numeric\": 0, \"array\": 0, \"null\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName2\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName2\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName2\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdxY_pPGXYkqUn4wuJytw6oQu3fdhU4yq-Bje48xoekhteyJnRAJfvwh-QtPORWAzXQZJGVJ87SEsMIshE4aVRla0bmQxqYjoWwZiaYgQlq0JY8r3THzmBqnihAaGdDspSxJC2V00ghz6g-mOLzLtFVejpvKubWjIj4DBnO5CTOgB_iXPVeyr20TMx1QC45PSN9xNr3uiW5FXnoAyUcimVWLG0S55CEQ9NjFbiJ3yQaCr3fi28TB3TX7FXZvNjoukXqdxWEoMuIml8_w1VCS6te4vD63XEHIzU604hqS2J0RJPTsxvE2AQlsSGfMufDUf1ZZRQrhWoNteld6sYLsu1gudIVPQnA_iMVmICz0M_tliS4kiRRwnoEfz7Im0Y8C_0S018DdJYpPwQMkSMX6Yq7VjpimLqm7AA2v96TiH6upLQY_ClQERWx8jLwuKZhn-jBOK9IlJqUxPWyMcQFoSdFZGjzbj6pAXrukghj-FGMy6VR7U4-PwKnkO6bGcZrmfaj_WzxvSe80EtKEs6GYfC1JpGwwsdSJovmGeB39E32keWZyqzyUQ2r4gZSo5hMPw1EkitAfUnpdibOG3X205G2FigS57FiA6g34pl_zipiRyLg5jboyAdGAyuBlC3uefl4-0jS-orc5Mp1aLNwdzEN899Dj2Ri-BU3KbZRPE4O__d_iDoaoWnZZ6aAImVCsOHt6utgOpqazMrPuRPz1JNgda8nVS9JYWI0YDyRVYoD6DJZYryYK8lD5fT0nYjeVZIOXaJfaNgxAwXEdLojiOuCR_TATDZdRFb67LPCdyRdl-gGNpPAxNjvOlgALPu1NMe9vFibAyRxe5_Y6GdmlNy0B4ceLkDk8tJxo4sEQNbyMpu9KzpXINXaIhORFptLBlxEnifXE3A_oevn1PnQeH-lpOkDZa4Dv2Qg2iXvATC0G_R51YFqo-RLcQ2K4qZ-fsaGeDS2kOwSLe0hlKrKuhNQ8dzGsQyFCcTxyYd6cdJCKK0KWAiAZnL4vRnsCe9iN3gSidPctprPrsACn-8JBl00Dpbcgt7tqfTE1eANMj5R_N0mimsN66IHwTbhTayUX7WCmx3R5SdfozODsem4fTYVuEaV89dopAfibanTX9fCRHIhUV3hqehQsO3JERmpKdlk3y6f0aVsUevWfQ69QPiRaFJiswp-uMCuJStF2-OQx-rTQPbdBqI9R7dvTZUR2s5rsBiOgllt_i_XsQBA6mEs2cKUB7gIZCyVrwrIeePpv0MA4rFhCFPcXS2cWIUR7GTtsiFeGHSyX2Ik341QQoN435M8Ggsc4kPv2GedBYw6TaaQEQe8Og9ouLMybYNMLJsX4lFNGqU29aUASWpUIJCs0AALOHK2D23K_0jlYGctqHCbDTcMO1dj3xwtLn9kUF76I_PukGAn9DMQbzQZw-bkDeE007ELAx8-vT0kYD4Ypj2t-dtHVgIicG6_h4nMAJQzBIX7oAiKtloWykoOTbybupIFZzYH4C5Ir9fHOM9HKbI67CVpN6nC1ESO0luVzMJ2Ye_KXsxJ5LXpf5FROCmGVOmerwIvVCOswFBH-R3_ymrM-tbJ_HTlrPLV-uk4BbDlRw29RwSRpjA-CoF1vmhAIZycqHNyUJ6b-bQzPFlr-klGrWjFtky1Mcp4La9hPdEc7HblXqOgKEhXIWRiBIScV5iy8jZZ7mwIBG8FuL-WCIFbpVQc5kkMSiYBwm5V8RjV6RRe6NfUwYwMDRZ1We_4_RFQRFJe6X8Kb_LnevqbIx_xSmt--Rd03HxuCHeXqNIwyhncfMiGvIQXBa6dD82cZ9DZNP3C3wAvovZ1UX33umGA90JVIX8asV-u87yrIIWQXkC1BqWkHq757n0BKdbYN4RU4YwlODkJ0dzI1fFHIKIYiJ5JljFM8JdOmSDn3Hx2mmxTHAHfHg7HwPNIB1sXFzWfuglTKs-lXbtOiSFYYhpuFwgqek1CaQKwTVOOJdTQrd4v_jxMRrCNfQpCiqBSW5hDvSvCkJf8xlVQyHlPh2aVBY9kRdMVUOKxQprQwhO63Nd8x6gVq2P46LYEuGO8SKObFblHITJ76k22aqYpIVPBXv-1_-bonPMtm2RhEMZQ5Giyp4IG8al69ygtBkfzSMtIYtJSz9VVlXWFLZD2kTWj_oXSyk9RuHpKsLODH-bp6tGri-tFxTecduaqakS3_Pxfnntgoozh_ifDUVJa6Aa-d6KUe7xFj2S0rpJ-j_KTUcvtq9Spev_9Jc2IKYUqmiBU-aIwjwufPsNtKPCBOASHk9GCK72dJpvxdcYGW8AtUANduyYFxejsSMng1j_HFV8xpkPTzIDigwFDvVja96rZX9qJnDOH4sLVNwiH8WUzSIz7AoHxfus2c-WJn4f86aJa3HddJinzL6dZi2uvYF7-cVr25i6XNRToKJw2IJmiE3Da892ev-OWEpYF_8YjisDM5CQqFl8TpXC4fp2KZwIaq771-lgvK60ZT8CKEQ-BexhaptgBCeQJw7_uK1jh10umUvQXpiajpHgdZKAilayXxLMxILEXjO4-oByAlqAIDD-8Tc2qp1ZIafTbfthV5b-73ecs0TiBlr0a_7wE0Tl385wAUqHGDV_W6CKmWaqIOrMwDc_FntysP1VuN_de4QNLHH8AB90BpeVAgd1309jH7TYF8iOx9Gw1AAwSls6xUEHityXnAaoDXItx_eVWr2zRPl7dx_-iAHl2XEbz-ifrtPV-GEc39z-30CLFB65vHoEevnrgIhyYohAK-mcL6k4-ZHjIBltm3Vn8nIJiPRLV_LButaNt3OyycSLGyOYcu-UxfQSfRDPTnb0LM66CX_AkWNgknyCdJQaC1EcWhb9NVEmb05TXokRx-UhAXklnroU3NRRBzOzXRJOu_66GbfxqtG3_HuxiygrO2rDm1HYACmi8du-9RCIkD6c2Yg8sma5mLsVD2tpySMQvC9exyDv0dv2nsvnGYsg4Vwl5wyX4u07ywfnfAa3RYrl-VE14NEl6_O-EBlWhF8DWWbSuumM9H5yMU1qqidFseT-UfZMMeYrzmZB06Ab0mQ0KQhTa8b3xbQKv1U58faU94KINAhOG5-xG1gE9_QOjm964ZnpZOJmCYk7zFgUCtKBJCQhBcrhT6W90SvZ94Nw8RZHTQ6Sh385J0c3DFsDf1iXiFW9pjb_4o76WTJ0lfqrZAfexgZf_5ieJRtiu5E6lGEJWp7BTojjgiIcOmZ9K_D0gQgSgrgzHHwX88s1RhG9tHb_ha3DHX4VLYd6QGeJwOtddqTyR51jxca1kgkZ8PSZZUsmJutdO5bQJGEcdh68kywaI8wFAdsKWlIXgVQ4wfP-fHd7NKHQW3mkjbiw3bn-N3LbfxaZLr6EVEeNFSeuD5bJ6Cnl1yzGhBrjVj7EcR1nPdatPps9lmfKKoV6qROWDTdNj-Aytoz53Lu-6k5Tb6soz1EIg4CKHjV-kznHCba06o4IZFkcm-XQQdanr29nwBFog-DNvw32EnJQWMk4DMrRZFrqgLj3QPe6kDLleQy-V74cXvqhPQ9QAn1zvLKGqgtRy2QcFCM3MwntLtCmZ8KnErfzG5enZ6ky6pwWDBcVPkRP5_j8pZPD-lMx3eQrOMXNNJ9mLSwNmNR4jXSFoDUu-0A6MJfPA-RABYdg5WViBeOqOPQFMPxKgqT6KXSa-xIwnQ-Uux7aK3a1095kn0s3dpcErvPP40ga3kPjPoyY271HGUqXYlxRIuIdpOPwDGvrSO4q6xRIJ7rV22f__7oR_u0qZA_MoH-yzWTYyzc7wrI2bNSe-bv9KYCCBG856--xoCuTL5Ku9V2Eq6_Zw0WDHXYcHxUVSgnBlyDB1nLNx1Hy46imbrwOjQzp08qAdb-v0Tgi7nYXmY4XWmgetTjCGHsJVZUrH_idhS_JYNrIELUOKNYqR3g9fNd6nyx695WCFqWQ5mu63erJt24AosWUNtLNz1H79dwwUEHdWW-scNS8BWzWMkOow1eAep_Jf4rQWvXBydMXtMij2m5BY-nlNchaZbPBuGOoSaW2Fd60yTn0PjpfKcnP7zObfnffBJj2AyCaIWtg4yoSErP2ERO1gfzVg7lY0-bzraopsVjCYLJjfzM9X3nT-U61S7BuEx0yfvgcpJVxwztaZowQyIMDWOMxFQB0ldvEBF_pQcMcfaGvIL7vSTeaHZJXVi4DmjEAjRdTcKniUy1xu_i4hvUiKJncYPUdBnh3Gy_MVV6AdrEEUXxumuPTOAZ3gfzcd9OSvM4vMK0D5diVITawKFVAwxJr5ZE2G9KzOjEEN601yhVtL7ht4xX2bFoyTe4ftf9XhNG94XgZvQo5Gt-iwWF_hsCPHllDUc2XSXtXt3Hfe8VYACAqet36Y_iHXMEn2DdU5WmOO4RxoS2iE6zJw2s-DOc0Fu8M2Yv5qtG9BuzPTXXnjRQ3koxDSLpYSaQEVLObYqo=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.rename(\"lastName\", \"lastName2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName2',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {'rename': {'lastName': 'lastName2'}}}}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OUTPUT {'id': {'stats': {'count_uniques': 19, 'min': 1, 'max': 19, 'stddev': 5.62731, 'kurtosis': -1.20667, 'mean': 10.0, 'skewness': 0.0, 'sum': 190, 'variance': 31.66667, 'zeros': 0, 'percentile': {'0.75': 15, '0.95': 19, '0.05': 1, '0.25': 5, '0.5': 10}, 'count_na': 0, 'hist': [{'count': 1.0, 'lower': 1.0, 'upper': 2.0}, {'count': 1.0, 'lower': 2.0, 'upper': 3.0}, {'count': 1.0, 'lower': 3.0, 'upper': 4.0}, {'count': 1.0, 'lower': 4.0, 'upper': 5.0}, {'count': 1.0, 'lower': 5.0, 'upper': 6.0}, {'count': 1.0, 'lower': 6.0, 'upper': 7.0}, {'count': 1.0, 'lower': 7.0, 'upper': 8.0}, {'count': 1.0, 'lower': 8.0, 'upper': 9.0}, {'count': 1.0, 'lower': 9.0, 'upper': 10.0}, {'count': 1.0, 'lower': 10.0, 'upper': 11.0}, {'count': 1.0, 'lower': 11.0, 'upper': 12.0}, {'count': 1.0, 'lower': 12.0, 'upper': 13.0}, {'count': 1.0, 'lower': 13.0, 'upper': 14.0}, {'count': 1.0, 'lower': 14.0, 'upper': 15.0}, {'count': 1.0, 'lower': 15.0, 'upper': 16.0}, {'count': 1.0, 'lower': 16.0, 'upper': 17.0}, {'count': 1.0, 'lower': 17.0, 'upper': 18.0}, {'count': 1.0, 'lower': 18.0, 'upper': 19.0}], 'range': 18, 'median': 10, 'interquartile_range': 10, 'coef_variation': 0.56273, 'mad': 5, 'p_count_na': 0.0, 'p_count_uniques': 100.0}, 'name': 'id', 'column_dtype': 'int', 'dtypes_stats': {'int': 19}, 'column_type': 'numeric', 'id': None}, 'firstName2': {'stats': {'count_uniques': 18, 'min': '((( Heinrich )))))', 'max': 'William', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 94.74}, 'frequency': [{'value': 'Luis', 'count': 1, 'percentage': 5.26}, {'value': 'André', 'count': 1, 'percentage': 5.26}, {'value': 'NiELS', 'count': 1, 'percentage': 5.26}, {'value': 'PAUL', 'count': 1, 'percentage': 5.26}, {'value': 'Albert', 'count': 1, 'percentage': 5.26}, {'value': 'Galileo', 'count': 1, 'percentage': 5.26}, {'value': 'CaRL', 'count': 1, 'percentage': 5.26}, {'value': 'David', 'count': 1, 'percentage': 5.26}, {'value': 'Johannes', 'count': 1, 'percentage': 5.26}, {'value': 'JaMES', 'count': 1, 'percentage': 5.26}, {'value': 'Isaac', 'count': 1, 'percentage': 5.26}, {'value': 'Emmy%%', 'count': 1, 'percentage': 5.26}, {'value': 'Max!!!', 'count': 1, 'percentage': 5.26}, {'value': 'Fred', 'count': 1, 'percentage': 5.26}, {'value': '((( Heinrich )))))', 'count': 1, 'percentage': 5.26}, {'value': 'William', 'count': 1, 'percentage': 5.26}, {'value': 'Marie', 'count': 1, 'percentage': 5.26}, {'value': 'Arthur', 'count': 1, 'percentage': 5.26}, {'value': 'JAMES', 'count': 1, 'percentage': 5.26}], 'name': 'firstName2', 'column_dtype': 'string', 'dtypes_stats': {'string': 19, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}, 'lastName2': {'stats': {'count_uniques': 19, 'min': ' GALiLEI', 'max': 'dirac$', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 100.0}, 'frequency': [{'value': 'Alvarez$$%!', 'count': 1, 'percentage': 5.26}, {'value': 'Ampère', 'count': 1, 'percentage': 5.26}, {'value': 'Böhr//((%%', 'count': 1, 'percentage': 5.26}, {'value': 'dirac$', 'count': 1, 'percentage': 5.26}, {'value': 'Einstein', 'count': 1, 'percentage': 5.26}, {'value': ' GALiLEI', 'count': 1, 'percentage': 5.26}, {'value': 'Ga%%%uss', 'count': 1, 'percentage': 5.26}, {'value': 'H$$$ilbert', 'count': 1, 'percentage': 5.26}, {'value': 'KEPLER', 'count': 1, 'percentage': 5.26}, {'value': 'M$$ax%%well', 'count': 1, 'percentage': 5.26}, {'value': 'Newton', 'count': 1, 'percentage': 5.26}, {'value': 'Nöether$', 'count': 1, 'percentage': 5.26}, {'value': 'Planck!!!', 'count': 1, 'percentage': 5.26}, {'value': 'Hoy&&&le', 'count': 1, 'percentage': 5.26}, {'value': 'Hertz', 'count': 1, 'percentage': 5.26}, {'value': 'Gilbert###', 'count': 1, 'percentage': 5.26}, {'value': 'CURIE', 'count': 1, 'percentage': 5.26}, {'value': 'COM%%%pton', 'count': 1, 'percentage': 5.26}, {'value': 'Chadwick', 'count': 1, 'percentage': 5.26}], 'name': 'lastName2', 'column_dtype': 'string', 'dtypes_stats': {'string': 19, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}, 'billingId': {'stats': {'count_uniques': 19, 'min': 111, 'max': 992, 'stddev': 280.19735, 'kurtosis': -1.04115, 'mean': 556.0, 'skewness': -0.2137, 'sum': 10564, 'variance': 78510.55556, 'zeros': 0, 'percentile': {'0.75': 812, '0.95': 992, '0.05': 111, '0.25': 323, '0.5': 553}, 'count_na': 0, 'hist': [{'count': 3.0, 'lower': 111.0, 'upper': 159.94}, {'count': 0.0, 'lower': 159.94, 'upper': 208.89}, {'count': 1.0, 'lower': 208.89, 'upper': 257.83}, {'count': 0.0, 'lower': 257.83, 'upper': 306.78}, {'count': 1.0, 'lower': 306.78, 'upper': 355.72}, {'count': 0.0, 'lower': 355.72, 'upper': 404.67}, {'count': 1.0, 'lower': 404.67, 'upper': 453.61}, {'count': 1.0, 'lower': 453.61, 'upper': 502.56}, {'count': 2.0, 'lower': 502.56, 'upper': 551.5}, {'count': 1.0, 'lower': 551.5, 'upper': 600.44}, {'count': 2.0, 'lower': 600.44, 'upper': 649.39}, {'count': 1.0, 'lower': 649.39, 'upper': 698.33}, {'count': 1.0, 'lower': 698.33, 'upper': 747.28}, {'count': 0.0, 'lower': 747.28, 'upper': 796.22}, {'count': 1.0, 'lower': 796.22, 'upper': 845.17}, {'count': 2.0, 'lower': 845.17, 'upper': 894.11}, {'count': 1.0, 'lower': 894.11, 'upper': 943.06}, {'count': 0.0, 'lower': 943.06, 'upper': 992.0}], 'range': 881, 'median': 553, 'interquartile_range': 489, 'coef_variation': 0.50395, 'mad': 230, 'p_count_na': 0.0, 'p_count_uniques': 100.0}, 'name': 'billingId', 'column_dtype': 'int', 'dtypes_stats': {'int': 19}, 'column_type': 'numeric', 'id': None}, 'product': {'stats': {'count_uniques': 13, 'min': '110790', 'max': 'taco', 'stddev': nan, 'kurtosis': nan, 'mean': 110790.0, 'skewness': nan, 'sum': 110790.0, 'variance': nan, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 68.42}, 'frequency': [{'value': 'pizza', 'count': 4, 'percentage': 21.05}, {'value': 'taco', 'count': 3, 'percentage': 15.79}, {'value': 'pasta', 'count': 2, 'percentage': 10.53}, {'value': 'Cake', 'count': 1, 'percentage': 5.26}, {'value': 'piza', 'count': 1, 'percentage': 5.26}, {'value': 'arepa', 'count': 1, 'percentage': 5.26}, {'value': 'taaaccoo', 'count': 1, 'percentage': 5.26}, {'value': 'hamburguer', 'count': 1, 'percentage': 5.26}, {'value': 'pizzza', 'count': 1, 'percentage': 5.26}, {'value': 'BEER', 'count': 1, 'percentage': 5.26}, {'value': 'Rice', 'count': 1, 'percentage': 5.26}, {'value': '110790', 'count': 1, 'percentage': 5.26}, {'value': 'null', 'count': 1, 'percentage': 5.26}], 'name': 'product', 'column_dtype': 'string', 'dtypes_stats': {'string': 17, 'int': 1, 'null': 1, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}, 'price': {'stats': {'count_uniques': 8, 'min': 1, 'max': 10, 'stddev': 2.95285, 'kurtosis': -1.4482, 'mean': 6.05263, 'skewness': -0.22564, 'sum': 115, 'variance': 8.7193, 'zeros': 0, 'percentile': {'0.75': 8, '0.95': 10, '0.05': 1, '0.25': 3, '0.5': 8}, 'count_na': 0, 'hist': [{'count': 2.0, 'lower': 1.0, 'upper': 2.29}, {'count': 4.0, 'lower': 2.29, 'upper': 3.57}, {'count': 1.0, 'lower': 3.57, 'upper': 4.86}, {'count': 2.0, 'lower': 4.86, 'upper': 6.14}, {'count': 0.0, 'lower': 6.14, 'upper': 7.43}, {'count': 6.0, 'lower': 7.43, 'upper': 8.71}, {'count': 2.0, 'lower': 8.71, 'upper': 10.0}], 'range': 9, 'median': 8, 'interquartile_range': 5, 'coef_variation': 0.48786, 'mad': 2, 'p_count_na': 0.0, 'p_count_uniques': 42.11}, 'name': 'price', 'column_dtype': 'int', 'dtypes_stats': {'int': 19}, 'column_type': 'numeric', 'id': None}, 'birth': {'stats': {'count_uniques': 18, 'min': '1899/01/01', 'max': '2000/03/22', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 94.74}, 'frequency': [{'value': '1980/07/07', 'count': 1, 'percentage': 5.26}, {'value': '1950/07/08', 'count': 1, 'percentage': 5.26}, {'value': '1990/07/09', 'count': 1, 'percentage': 5.26}, {'value': '1954/07/10', 'count': 1, 'percentage': 5.26}, {'value': '1990/07/11', 'count': 1, 'percentage': 5.26}, {'value': '1930/08/12', 'count': 1, 'percentage': 5.26}, {'value': '1970/07/13', 'count': 1, 'percentage': 5.26}, {'value': '1950/07/14', 'count': 1, 'percentage': 5.26}, {'value': '1920/04/22', 'count': 1, 'percentage': 5.26}, {'value': '1923/03/12', 'count': 1, 'percentage': 5.26}, {'value': '1999/02/15', 'count': 1, 'percentage': 5.26}, {'value': '1993/12/08', 'count': 1, 'percentage': 5.26}, {'value': '1994/01/04', 'count': 1, 'percentage': 5.26}, {'value': '1997/06/27', 'count': 1, 'percentage': 5.26}, {'value': '1956/11/30', 'count': 1, 'percentage': 5.26}, {'value': '1958/03/26', 'count': 1, 'percentage': 5.26}, {'value': '2000/03/22', 'count': 1, 'percentage': 5.26}, {'value': '1899/01/01', 'count': 1, 'percentage': 5.26}, {'value': '1921/05/03', 'count': 1, 'percentage': 5.26}], 'name': 'birth', 'column_dtype': 'date', 'dtypes_stats': {'date': 19, 'int': 0, 'decimal': 0, 'string': 0, 'boolean': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'date', 'id': None}, 'dummyCol': {'stats': {'count_uniques': 13, 'min': '#', 'max': 'you', 'stddev': None, 'kurtosis': None, 'mean': None, 'skewness': None, 'sum': None, 'variance': None, 'zeros': 0, 'count_na': 0, 'p_count_na': 0.0, 'p_count_uniques': 68.42}, 'frequency': [{'value': 'gonna', 'count': 3, 'percentage': 15.79}, {'value': 'you', 'count': 3, 'percentage': 15.79}, {'value': 'never', 'count': 2, 'percentage': 10.53}, {'value': '#', 'count': 2, 'percentage': 10.53}, {'value': 'give', 'count': 1, 'percentage': 5.26}, {'value': 'up', 'count': 1, 'percentage': 5.26}, {'value': 'let', 'count': 1, 'percentage': 5.26}, {'value': 'down', 'count': 1, 'percentage': 5.26}, {'value': 'never ', 'count': 1, 'percentage': 5.26}, {'value': 'run ', 'count': 1, 'percentage': 5.26}, {'value': 'around', 'count': 1, 'percentage': 5.26}, {'value': 'and', 'count': 1, 'percentage': 5.26}, {'value': 'desert', 'count': 1, 'percentage': 5.26}], 'name': 'dummyCol', 'column_dtype': 'string', 'dtypes_stats': {'string': 19, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'null': 0, 'missing': 0}, 'column_type': 'categorical', 'id': None}}" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"binary\": 0, \"categorical\": 0, \"numeric\": 0, \"array\": 0, \"null\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName2\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName2\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName2\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName2\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName2\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdxY_pHm6y9CStQH7lwYjRt3uQkkHqeOmFJe1BXTwlp8ukKEqIoYsnYg5StfMSHfU-Zgo3DLJwrKOWg2i0g5TMoyESUiVdHDxhitXgbB8D_CD9K7pp-R_VhCfC7TsT_T0X-Qr_vidObV1w0vlT0M8SX6-ZmeVAG4BjB2M3-3PoLRl7j2StaSnRtAtZJlV3de3peyT4Nq8mZsZNPzuuZgTNxUAhrMWdSLS5VQOgUVKS6zAt2Xwdm7Pnx3EgalA9ANZ70F-tXbElXBU5-mYaqyRHwA9LaVG5zYcYtn0rd_PgsAbszpmvWuQxdt0Z_0tJ6Fz1gc3Jxz9XcECR94s5_djYe6lRtdSt0AQtF3wbVCbMfdu79OIkJUcKh3Wz5w50zTsmhdbPp2LgdqfpSGx7v8Cs7v_v9_s0MU3F5EE90CdwQc4hiVHT3qiYa8eHn80__D0WHDQ0SMRp_hdOFc5J-zhGajADN-HeIx3w3ebzQhkmEjXyjI2LG-_pTxunH2Gc8XaeMNjpXfrzYZLFzzGE2HZugT0pObv1T4Y9k2rVZg97WFwMN3LAnw1KWP2Va7uvPrmXAcLc02T8oDwet8cC15-t3ptOIyFObW3XnyAeV6V__UEDujPqDYp5oMZN1BnhUt2EUVt_ZD60rH0N53jLxJkF2Vn3SXIwXTDHCRsDqxg4MjGqy6yXIarG-JqcIea6eXfp6fSGSjnzfVrBX-wW2YSVNs3WdVhVB0Yj4clUM-MtD00-RFU1VmnZdmLl8Rz4AZHnGTnxscTokVZHy6Bk7mLbRZ-CTCr5AKRu3m_7LPaYee1dIFQSb6BLhbpl_dMMEUDSuljdtUbZ1ovOYbwBxsD5AF1cKr22sZ1xb-RpyGY52wWLRp1WbZiF140QEWb_qlvG3fTZ7pBC0Smb-983PFso4tFPni-BIpj-Sj0fBpmpsN2dBchycB37S2ADlUzD_T1jgJGFtGPV4CMhJLMoX67tiet1Clxol71FJl9RarULTcZ_lGG9sHyT4YLshtCu2lNooC3RaspMiT8j4DFdp6tDz6olVl0zn10vpjeDBCBp4Qxlb_7l8UqbnA4SX57h4HAmkwFtZ639F4-OkAzWcRfJoFaypygsfgJ0dQq3u337Vd43T-rnRHH7OyxeWIikBtt5d8gZgyB7uZNv_C2xIACytMINrKV02uR1ftLsVwhfeFAzZMhwNykxlOO3nvZKfwCBjgzlq7k_VjoGR06euf0ndJphd6mLPcmni-vOXrpmMhhxRCWY_Cb83o6olgPPnRs8wO6_DIuomKl8pBYFJ5TxueLIP4dz-87sAqWaebTP94T8TdRayyCqwWJl7fnIkcA1ezHz2xsax562v8_bWJiMt_Yl3XKLytWVxMRSK4Pno_NJN81p0WzsJJrTxXpE2dFj6-QLcydhLeush0Yqzt9CpGj5B1EAUJigAEva_WCxO_-2Fajd_l8berLX5WdkjDOwVzll7C68yKPGkyj-vCzI2nj4sz_GzFluizC4UoG9qO14h5BRy9ID_5g7ZqXHS44reHPzl5vOSB0C_Fivf-3lqIlxLQt2iulhgCTNEmdKbIfd8wjMg_lg-qKxmMwWP-WfUBPbC-6fovoX8mtrgKkuj6E4sNd_7W7JqFt_xfmqtf4TYALznO2QviqURNxWyJ_T6uzH_VxY3d3ARzo5aRpujL3IdWIfbGzv8ygrAR6U698nvgrlcU98TlJgEaM7jrNepaphZc_YIDD5KXWruAOByJFUXaPHfaRbWGN5wlsBRc5KbCwV1um2uCdqC9Ipy0LPLhCuQeD2caqvf6hczYVV8jQ87UVw4ODV5IWm_NGEzGArvbNW44u-7BfofZ_B83ehjRL3dLNwXRhANdlmXlv84H9nKvQTa90VuJgBjNJLIi4LU2E0xIlcNLrbttvtZt7jnNH6RWTKwdZUj7jYN-hjofG98Z881vuszGC72myf__VNFOXUy5HT-6_bIBxH7AmzRXSK2auBO7UQiBq2tz8_sJOJ6DGwwjlY88pyKd6OiqUj_Bom3dFZeDESvFDb_MTuLQS9YrvwjHjG4nzz8B3S4NeyfIXnXIsQKJYjHw4p0DEa3dXb_oRsrQTNey3zwha3K6Nm4ncSozpDme1S53JPtqpMIIlBDsun75U98Ruk0rn8Pi1UjZKl_7LYKReVLeaTEc4xOtR7qY_VWbauVo1bc8Its3NNrx9OjBDTzpgr4e1cZ8LWmmmBuuPgNdnhQn8myJVBuxQsDfaHg6VM5wAw6Th1WPPPxmmf6Lmt60sIg7vyFfhFYGYeeYJzGO9YIzRF5MRMSgNOzOayUXUcjmz-l7Zh_ZQkfdp1ctE_iCNWZNuL5Sk5x9LYsSkqRA4Jn83MyhEBHKU4b5JE9AaoLzIeHZxjqLzh6vp7SoZQOT9eKCGItoMTkLhFKMjZQhKnNA9vYMtibT37wkTkrgluWSW1TQsSzsCVhbaQVbMOfViGfJEIaK24hkKWk19NQ8uqIE6KbVdhqzzz3V_FTLvtnJQRh7TfcORgRJr1JiKzrRJRfzborajnd9_h8fWpkLF2XkxE5tI1-p8EQrTqLRcM_e6kcZpFbpE4IdgfPCNaNmcUCrKqI58ifRPnT4N08_50be_d-L8PZ9oPFas6iXIHZCzVKr-dRjRwrEu9-pTSQ6Yq3ZeBfR2CPXqzIOLzhYG9tO4lFafiLVBRp2pFJGGUiCPqtsofcuntPUoP1gO-ZwR3cdaauttJKU39Wcodo7RTNzILDxHHrJaufp63VIH_EJiOwTLurAbwEqpiz9kAEmU2wVMoke2K_4tJJF5wqhkHmwBahNAZPOtCPCoEDyD8hpT7Ydnp3oweFeczpeUPhl71dBOz9bTxd6IZFT4PRRaEeaUOBCTFaeW2dkHciTXyJRgCwwecpJBxlRpVqkCxoc4rr8IngJjtiXc3M-EAuZv0og3vqMBh3Vm4pdG1uRzMy7fCmBrUnj8UJVS0kzrufeihuLf0Vp7xEmq0A5kkSK18VgV8NSH2KwKrUle8SHyMdYwqLgCs95_VJdkdlsPf4zVvVE7s1sNhUYF4nFpbE69p1UF1fBo-V95pJzH_AZm3ALLX-hGI4dfG943QSmNHylRza2nmtOtime50wTQlPW8N5advPgSGPGx6A2sXh0T40AW_UbGWDSvCuzr7UjHRi1qrFHicydYzca66DnrwAbLjMHrvcgA1M2Bjox85HTmPJVEoETcKi3rNFMzhjgID03ljf2bO_m3tLV6XiDiHTP515w5gXJaysgDl5g-TSTGjkVFgSAyKZYonOqrm07Trw4f1eaJNlScymkMN-da6FlSd4QOQxZwAZRgs1pZx43N2FalTBMt_3Z_PgSOUM66H7n3eElW5G46jXBzmjiAIzXJXvkNINwxguQ13Et7Pks4prPyEe3Nyqf8KfsuDf9A9xBuAEyDfZBDZqQepqoBSp1TJydrkIPuQPm8HFB0ZNSDscKXNaVRvYdSmkhGsE5Y3xM6HwK0sIkQBT_qGihEPspjhcD33gZtj5EpzNqYkwPitJWv_HZQ_Kmldqnbz_0V0jOK7oqSeDMZBYe3FsUmWhPYA3X6v_GHbb7x4fizk4uPntA5SP2oQ_XQvv4LPLJYvoKk8cKd9GbQLhqxKcqdSbf4xNBR9ekmk7Egg8F9dm9Z1spvjre1kgHjWzJfK3Oumsv0Ic0Fm5GexaP2h38VQfu0cro9JYlsLhgkWAgny5IBM7dLK04k2wWJxJPqf0FWnlKblr9klltmdDqzttFk-CdVPzeep4iM4TOQzRQSVRFPPw-_mDybBuViFn6bSt9MAJKRhSeolGq-PKIDrqhuktWYn4LFyRKCtu3oB-RyQwS2FUMIUtOv4pn51ZYH4BmSFWcQotDx1zv45YJ0cDYtqFvns7ihj5VAcW2qwxIE4thEoAoKQmko6KdkOjkJ__1r81nSr4DJvj2ZMuOi7VLCqaAmSFsMtDsV_1KR-dEIi7D4xAq-P2DP3kWIKAbUOOXc0FfQC8uSPwBLMJOcs-_mzykd7qJK-bVjUtRuPHTONDYFzHsYyNIc2tgY5EU2Co3CL-Ors0JGw7PsnJRJbk0flKN36g4D51milxHQirZgSAToR4U35r9Zm8FQUDGJt60lQuUpoGzBdMOC5uM0R43qHiCokHLJmDuwQ8jEwZ5JUwJIc_3nkMXjZNMtBGbSkhXKG3siAIxHGWhTovIGVXnyuRefANhyhuGUdWdfOXVXDMPcbp1BRhZ71YApJPs6uEVkeM2X0XmeeOLuKDIB6LsojZFI0wrSYrLDmhP73p8Iw6wwOkSiOxk763ZWW_3MfYX8Mw7so65qeA7fd9r09rIMuP7b8Z7knOLschHmICaAV51JXQJAZX6QYIJCm1rYrhdr-j0C3WUy6FYDWBIGXXbCVEwbDTu55HsJOygwiuME8LOJZgq30_YvFrjop8zWVmwk4nc7dNNSOohY=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'product_110790' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_BEER' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Cake' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Rice' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_arepa' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_hamburguer' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_null' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pasta' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_piza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pizza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pizzza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taaaccoo' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taco' with function func_col_exp\n" + ] + } + ], + "source": [ + "df = df.cols.values_to_cols(\"product\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 21 columns
\n", + "
200 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName2
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName2
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_110790
\n", + "
9 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_BEER
\n", + "
10 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_Cake
\n", + "
11 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_Rice
\n", + "
12 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_arepa
\n", + "
13 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_hamburguer
\n", + "
14 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_null
\n", + "
15 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_pasta
\n", + "
16 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_piza
\n", + "
17 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_pizza
\n", + "
18 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_pizzza
\n", + "
19 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_taaaccoo
\n", + "
20 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product_taco
\n", + "
21 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Max!!!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Planck!!!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " hamburguer\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1994/01/04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " run⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Marie\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CURIE\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 912\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Rice\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2000/03/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JAMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Chadwick\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 467\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " null\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1921/05/03\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " #\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 21 columns
\n", + "
200 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName2',\n", + " 'lastName2',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+--------------------+--------------------+---------+----------+-----+----------+--------+\n", + "| id| firstName2| lastName2|billingId| product|price| birth|dummyCol|\n", + "+---+--------------------+--------------------+---------+----------+-----+----------+--------+\n", + "| 1| Luis| Alvarez$$%!| 123| Cake| 10|1980/07/07| never|\n", + "| 2| André| Ampère| 423| piza| 8|1950/07/08| gonna|\n", + "| 3| NiELS| Böhr//((%%| 551| pizza| 8|1990/07/09| give|\n", + "| 4| PAUL| dirac$| 521| pizza| 8|1954/07/10| you|\n", + "| 5| Albert| Einstein| 634| pizza| 8|1990/07/11| up|\n", + "| 6| Galileo| GALiLEI| 672| arepa| 5|1930/08/12| never|\n", + "| 7| CaRL| Ga%%%uss| 323| taco| 3|1970/07/13| gonna|\n", + "| 8| David| H$$$ilbert| 624| taaaccoo| 3|1950/07/14| let|\n", + "| 9| Johannes| KEPLER| 735| taco| 3|1920/04/22| you|\n", + "| 10| JaMES| M$$ax%%well| 875| taco| 3|1923/03/12| down|\n", + "| 11| Isaac| Newton| 992| pasta| 9|1999/02/15| never |\n", + "| 12| Emmy%%| Nöether$| 234| pasta| 9|1993/12/08| gonna|\n", + "| 13| Max!!!| Planck!!!| 111|hamburguer| 4|1994/01/04| run |\n", + "| 14| Fred| Hoy&&&le| 553| pizzza| 8|1997/06/27| around|\n", + "| 15|((( Heinrich )))))| Hertz| 116| pizza| 8|1956/11/30| and|\n", + "| 16| William| Gilbert###| 886| BEER| 2|1958/03/26| desert|\n", + "| 17| Marie| CURIE| 912| Rice| 1|2000/03/22| you|\n", + "| 18| Arthur| COM%%%pton| 812| 110790| 5|1899/01/01| #|\n", + "| 19| JAMES| Chadwick| 467| null| 10|1921/05/03| #|\n", + "+---+--------------------+--------------------+---------+----------+-----+----------+--------+\n", + "\n" + ] + } + ], + "source": [ + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'product_110790' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_BEER' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Cake' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_Rice' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_arepa' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_hamburguer' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_null' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pasta' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_piza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pizza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_pizzza' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taaaccoo' with function func_col_exp\n", + "INFO:optimus:Using 'column_exp' to process column 'product_taco' with function func_col_exp\n" + ] + } + ], + "source": [ + "df = df.cols.values_to_cols(\"product\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "df= df.cols.rename(\"iden\", \"1id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"categorical\": 0, \"binary\": 0, \"numeric\": 0, \"array\": 0, \"null\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"1id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"1id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.2 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"iden\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdxLK3BqRhs4YV42o_zxm7bm_UhUVd4PVdj9aM5Mc31LaeVSOWN982TeebepE6Cha88boA29Qu_MoV1ia3sDVlJELrEMeJSNmBK4OVpwuTOTbgNz4KMyGow2mB3UWjvprj7I5A8_OSCtrdXVvy52sBnQNvnlJuqWFK0LEp0UC6Ap4UBalY2RPjDoCzf_HRoJFxnHlyL6EOA2FZSigYUZtKelqisbR5Znv7_47ijogzsglYeRQwjW-i9AWU0DooXeqvSvPmmlfd0uHUUqETs_sPI1kbYQx3hSESyGseZ5vktoenuFSljPiBvt4SV4tTsSKdXzR4DhvWlbhLhVTdXae_ltyWnSAGC0CXAu1C8DYHV8NP7gQyuAnspo9PCLvNqqzfIFFOsgRlhziu6C3MathLycEkWAOQHcLn3ksGsLHQGrQGzfD3CJDMIwbRwmg52fFHBHYHqSk56pvttHY_Ksr6AUOc5shKil0OL2Aiwnv9b_gSoE__s72uM0CNB_3RHswDzRjdFeN-vxlnQ8nIwPl8In46ZREE0MTalW8VN9HoxEiXL_jXG-9BuRkUrPCD7CKIPHm6RfCOxEhl65J8bxvuOvY_BUdjbOXl9lMKfWr3SXXWxScQzOjNO_mWhi8P7u5LPN_TibXOMQuo9u7KaU0MpTfEK6xRlrsObEtQDqWcj6oIzgaouqf1KN1z7jWZq1IR8dHLvGivyditUsy9CSnMoK9GmspUkcVf3UkkkroBc3VmVCZ0jdHb5-EFGEAwu0s1DARPbUg11UQ4_AXbcn4V4DkmDnYbCQWet0btj21ipR8Gqh5INZ-Es_AaRmgY3gk8aoK3HBLv50mX-sniLRSir2UtcjXLNcrnugCXSIGzhNDneuW_jzLwPr457f8vMyCbtC5jIMgWOb0CdUNkLvg8mRYq9f8AMOTva6UO-r1lJ_QP6_FTJiu8QRZjPF2Dq1EA3tiblvwAGr7b4hpJhuAWLXdBxIeurpbezpVe89bgbEP7DogmPjZMp5BP5WLTTe01csR0aCKiYtD4zjKo9hS-_VgBCvDy87cSfXD5mTjkSG9ZFQZGDG5InLOuyIIURImJl0m8-GHFSxkpBYFVyF_mPmYeFqZGbFyzcMRbheMChRVFR8_SSb1PT8XkFHXMsIVefKEWfPecJsNoO1ZfPehwMmtAk_8aAN6laeLBsMISP3bNMT1NqXGaIZwuQEtB-k_WJ8ss2PHZH4waSW-o95K5CoamKCoQ4EPLhQJ4to419n8yFY-k4HCWsI0B3tw-rkQN39O_cNqwtir4cgbpJ9R0WELHxeCSkLNGMhF9XvNGVGiygLwXJ3lKPvUdU933asWZ_viLSb-4gHR3bPnr7GW81sFpK0TVUhZXzGkBJxvwtB5PnY9WY8lp07ZF5AI9KYVM77CBn-araESkbyl3QMHiOOYKoJFauNtY3Hkdy4FRur4oMJiREui4WGzOQphNksUUrfy0Ht8Fil2Ss6_-4EBwlwyh4jL4K1T2k3QqUqk_uNxIu6VmOjTKw_i_LgEHpnWEqZON_QgSE0itP8GYMW_hZvVV5_KNvdyVTzC1S_zJH-5jcudCMrodtu8Pev41ctUuQxUlgNh6OV0orNybiEMCcxRYzQpqjSeNTfav6S2mQZnt1W5ib63gRnjNQoOZCa6Wi2Yu5YOx2W3DHluI81VYJakZvUvZXEKyhqZQVfVyLXz5LpxSBFeyxCaAnXBt1BVmsPu8Wu5Z3yjvs4ON6dXEcsy3F6WScr8VY6QeItnFzivACRKLlZZ8iHxORbwk5h_ShNsXeCWm1nsPnnKyKFeDEi5wHjn_DKPI3xqG8YDJzJJXl1oQqrc5tvOMV9BL53zzsrMZ1bJL9ed81_Loa--h3xWIkqlLwlXYo9VSAPOTg8iftveMW4GrGQEhP-DDV42xGd19taZ8Ba3-i6Jp6Yb20PP1-w9KDobBP-poREg5rch-ExUNk1ZzidfBNJ_dIialKfNRXswa_cfaQkGhSL8N6PEvONgkOHwN3wIpe8LuCGs-RimszO6HYSZkk1m88juuj8OiIcstoRWoUBosG0krBidBIYkZGB9icWIrnqm98iTas4wFEtAH70B_5cdqWIzAxa7I-a3L63nhV80tVYMybCsnQ2c_0C070Yd7hyGBSf49aJX93ZezaGCfzgZNfy6B721__poOgjcXJY2Ob0gcqk5Mj_6DvPwxKHYSsRmHZGM6nscVgsS-chV3fYc5gHbYjagOLlNoujTIPksUUdT3xY1Xbbj0sReFmTagWhkw2AvWBpkIv-njMMlLxq_cVT_NeHx8SNxpvgFmoWCHInhi2jlAt20VZrPtqAdyrCR9Nz2rF6js0nTjfN0qxwjSBsMM-C5wv3nl6-wgt9EeHo2seqHc5r9jBAyu64H2EKo6BDj4sxNAMmV87rg3nr8U6Wl32dtE58EWEtINMYldKKUuZYhMZXovtGAqsEMdCAoNv47_GAwfo7djZ1oUGzyFp23b6PtB2Pg1tNfvYkBBUrmojOo3Qrxqp_cZPHiSbhLo1Ff9Czs9zs6quSEb5UCbyRoVHzvdgJ9H3cDglpRfssuXeO8PAX5p_LPtEjSts6q7KsK3mItT_7Yo4ElwPigYFjH76ig_q6Mp_qDpYnND3F9ulvs4J1QQp36MfSifRnowKGTbGJQcPvIpx6Xy5eKLSVeNFB2pgs-wS0VteYnIG6avIgNuajGt0EuOHa95Ww0uduRMS7tDItXSCOi2jdJ8wp4Z9h0tAXFuig4B7gRxGsT6KQp1MJu5w9Kq45xHXqNZaeeWR6kndosVh9RGM2gq8CNKb_01nQanpzrdiupw7SItC3xGMQXNhT0iTyhq7mv-mXGO463TBEAj_F67YRmPoDekYgpP6SAJP1YrORjsLEUn-5_sMQGq6__R9ZbrYJQi7OkAsxxPGgfIzvGag8UxkhKnDM9Sl9mWxdSWv5456Y1_Hp8vwsQsqafH2TWkjib12ODzcKWtdeePyRQHey_YnWUgJTyQ-zpXumCL4sk6iyvIVZoETXfdioo_KX3DNeNNGIBjoe98X7bnJPm_4d_1MpH-NyjwgwZDXYVJB9IpPHz1_QOgV6uWap0BEvc8ulkV-b0Lbf1dBLmdfWkm_MQnINp5WZ8S2Y4HSsFLGx0VMh7mfuypriQdRSe2AM4ahLowxibW0ejSZiDNF6llVFVYxZGB6ghOUvapOMKL6zQAZdGZTDjmv4XiFqucNkDMuhLmg1OnCyVcdLSEGVSTYUqkZnWEGVidxKSLmZn0UaIzo371zX_CwVZIdemNS9cQ7Tl4UHf0H0c38P8uQh5TXX1WlMxCWTLqTZLXrW6v3BuCIZSoJX-SCTzTKYwWQJHPC99U9qJd7TxxNwthxXfe8iZQZriiqG8E1M_NuauahE5pT86pjt2hLzO-YGgzjQZ1ZltguWdU8nBwXiXp9_oyGV64jvG1ABsb79p30Z5qM0qaroYP42y-sYnG2WD1OUD6kYr38Ehgabc5opa-oYDw7VpyPiRxpkRvXa814bhKUOwCvKYnikJ0QhNskLJEUzEEF-CKUCYqfTS1KGmZnFOz6gHECD7js3_yZXrEdLV974o-76ZVPpQpgj3Lpjxlh__FwsxVKxLMyiYYxOKdb3dIC0_tp1eL0YGSMjQXUneypDErRQ5358p-5FJLjZOrv8fJCXI-4Zvkz_3jjAFg_dvPqRkH8CB-_KuckkqvEgHZo6gZLeb97ZsGkznXV-hYWvK__94GNyoT2ZEz9rAuxZY6vdoH4x694jmx4xL97Uj3O2iNIFQESYAYFC11Ns1ZIwGlR69kDYBuctT5i_IKywBb8DcCiVuJhyRhHbcg4diiXWSj9AobvqEFyugUP9yGroRcV0Pchnj9ObaEA8inbKPhVLnoB0LAFdItkANavZfNofT42Jdmg4vX2wsXE0v3CjPZD8DyXhq7ZaywKNvR3U-ITJartzFnPVMNWXyr-GjbR7oAIKDKOBZNZIiPak6AE5N-swNwAK4056GHyEjUB4PydHNbzFoHrCMPeJn_NsuFMnsp7r5mlRRLu5esiD5OBx-tmrzH7xax0NWbP02pofUbIxMxDf9Hy_9IhQ0O-a4qG2cgJOQPP4k_F8tkMhhkFSGmwKBIyYmhauzIANz4wHwWrI7Gyvxv9QE2xJHm1JdXKzKTH6EDasHpmORcVFgU2FfFYj0MO2ybGEX5hPOGUmg73xjfqAnJdb2DXV2x0xKy_Hy6_TD3N8_e4GCyzlV7uPCVrY6sbV77b2VeSdvMzjM_H-YLafMujYn8M3aJp06Ow37JsTz0uvCrSu_2WX9RRQ_pFtWwnhdwvotpVRLXD0EBANje4QqkgTMmes-97VjoPW-0KcVWdRUuoqDw_SDz-B8PYl4yfV_CxjUPCvuV2g3fz_-uY_TCZE_y\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "df= df.cols.copy(\"1id\", \"2id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['1id',\n", + " 'firstName',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {'copy': {'1id': '2id'}}}}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"categorical\": 0, \"binary\": 0, \"numeric\": 0, \"array\": 0, \"null\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"1id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"1id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"2id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"2id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.2 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"iden\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdxLLFAz8PkC3FzUE2uVJm652q2huh_JS1hLnZRD90yld-_2KKNxJMkFZtDvkgG_IeuUrZlJd_w45C5zG9EtZIYz4br9yc6yPHbx9VDClInTvt6ZWN_iUa1BsyYmkytPg4NMN2kYB8H0m_BnlXVLlDM89lMejSP4Slph4xeG1rgDL3q0YDrc8A8bvHAecyfK-9Aj-dRZM59fqx3d7Hmddk1tlG_n_qaSwKsgJyTB_nXqjvBoRX1lqJTcvGmBsnehk2semk3BWURgo5pmt1sithyHvrcnMwtlsId_8Xpw-Ion_pypy80N_bI7PgLNW_NUsBagLjjVaZlh9zG54ITVibO3nyuXQHmERLPnz8MvU0EAUaYs6WXbDep55DQPvVNBeSPClksqQcvkwh6kAULxNs6tz7vieHvSsShatH33xphYCEPxrr6VeOk_bVMseazusDH93-fKwhaIH2Ta0eevzilRp0D2r6U4K9M9hLGnOdGp_k_JFfQpaaQWApUVh9TS32nJWPWIks-1ZJGT_zYD4zIJffHmd29nGFfyTxtBhfKYLS3OLU0qtK42_1vQxr8bW61xa9XeVRaWpHc_iT_0uKVjFibom9_IeMDi176EhYvqMEJoiTHVATZ734YsXtSFSPaI2J6RxbMGzSbmP1cYfIflA1rQSPMuiE2IGNifbwsC7ipiVzSh_JXe3VKfKg1WQt8IwvQvdW50T66Q4K5gPKA0I6BeMi4yl6jEJ_bNB-wWA71ujtSeLHi8BWMLoToCRItoLI_wTbimhnSlEZwESKpl9TCb_73uPOPUTid1znV8KX9r_aJrL6pd1EcqKLoRRM9ynp9Ef6NZGQ_7-ZwHbAX6tNIXmGb3iPc3xBi2TomNiabCZ9Ylcbi5yL8GVnvwxEg0Zhs3vRmbn4K71AcLeTIDU1Py34rub9WTo1raTVVQoOL1nDDdPJMyMjUNyDjl7KvWqskAmOfXupo8LhmMz1Otbb078_XMphUu6dE-0Gqq2qcUKgWjxdud2C9Tzf3LQM_C46BeaqEwrh612mdFJh7Bej4OEPGRYuACjb_LtYoon39oq4OLSGwMDU7FN5lfa83K6mDQnUrwYBDY3DTyLWMzc6Q5hEuhpvuJur2hlJpH1lc_JJP2JFX9TPUsyGeVD0htHw0UpM1Fcby8mk6Vdokzy1q43txwQYHb34H-KGdqv81c61yRRJuAHNS3_FVL3rhsgcorLPD7dWc1yxiJF0F0c4f7AujCfguv76Rq6sV5XATLcruQMn5KzUR2LwNafFl5IcdBZ6XG_LMiOhKZtvHB4StHrts-ko10IuUwGfchpX7fJhhyGPvZ-YxiGZdXZQrqAbDfjipJ0Bq1bZAPUQwQ7rRiIKY0EJA_CA9pO7rpIGUIpN3JWCTHYg3RpXrJBJA5FlNRmgdt3od75muDiAPttxF26cj29aJMmadSoO436i2CGDmtQm5eToCv0SlrkKytiNCUfHeDmyfmwq_5Y3aJuPwOp-c9KR6iIMEPUWX_ayq3e1DvuifiF6MJKV1ssQV0j_gqses4ihIp9YNadH5HEmIhHwwjGdmQsQUGP2RwTZJ8ajBSOjcHOcYdBqvt81cH4U-agPFTUEAN7xj0QRT048buDp4eFttIpO23m8PY-KrsIt6QI7JzjM4t_4RGLr3cVeWtsTzuRmcuStEKEBJrK7ToAwGwB-jYLmnuNEm-pvrqp1TtjfQpGyUj1SYncd6V1QT4FJ3gwhtp4-0aZtswShn8o1lRS2iBRXBInGZh1OvnlOKW9gDYsR6aontjwZHNWItZbijq0Nu-TqIajONFExboR7n6dbutYloRQ49ndRDE-zQnomEbh0KxUDZxL6mfkm0FbRbvOyUzMhbIFkm1hTlKOjXe3WhDsAngLUKV4ydEhJlTFpSYuF1BqFWcQECbo4bTAlLRgtxFLCwFJII0qB0lFwi-5kmEvcfrpPafOLGsOAJCZYQuPbvDUrq1a42xV0yZc4ZeOimpUd31tILHkneS1KQkOCPHMkG2U41ivUeK1p0f_imybnwoijwvkY0zbT7uAOr8RRBox4ZU452-PVZI7hmJMpHbgMcZjt7cdApTgtjaFIoNO8obFijFq0As5iuQd85ceJOjesPbUY__pvZRgNSKJlkxNYiImXq0fd9X1d5ea0oQW_Sp4ddeUvk7_rTC90WP5A01cpvhCxgaqtUa6Hoogp6l1ISszWCEDITz8Rnpj0hSvy6udtKw1frxNrVJTe-5Y29UoN8TPgrXkjgwNPlmxAsYieg6UhLSXd1Bc8DV7kOxHVe2UxTzRvgeIt28kpZDaHC6_0FOwf1hdStjLPaaR7HfZPnZtHoXy9hG8Kz-Lh6rR0dffDnOh6tJe9Lq4h9mvWgym0v7A3tSSljJjYLjj5VOxCjeSrRI9YKB7NHwyRycdJ3H8l8h7lLgtm1F2gFmN6CRXSNRgvyWWLOD6U2wPBkA9qLdHc4HqqHqeK399Bf0WtuITQ6ZvwHTU6x6hMH5MJpAi56KqfFM952VYyBeGQ6HG0hESIDNAfrOR5Yn6hcOqM8xh5an28DX687J2HBU_ITMrxiOOX4AmwEN16JxUjkRErxPQXRQjK8Rz1XuKjQofvyCXZ8V4iRTm0oxKJecfAkHUOQAPgNSUqaFjOeFTsrGBAbigEIB6iGriWDvAonFWsLKDa5nlBpGRsVP_2ydOLMDc-upJ2sek5jlQ-CElPIQ54_DIXdSx7VluhkXwgrqh2WgzDLYhdT8VcdQPqzGBaCy1ZxfGJqAPfGp6ieJ-7GWW6rHrY7GHmpaZbzCKA8nR5b7WHDZPAgx9-VVFEvrlWGW6ZluWJQny4nIFwFYD-5Fr5_3QPh154pBeNzIO6YnEwxiYmKupl1Fp-pb4ttiKOinBy-_e_ScUXBvEdRz1OeQvEJmKapy60W3Fsb9STLXNvr5nFCbk0_qGtOOzum-RPs-uRpvdJMWLjRH-0InZSBGbiZuBM9QnNGhrfWH5vn8vtCG0S5luEZWdvwGvSMYS7oS82gz0w14LBZxRfwrEn1ecOCnpKd8BKNqQvLJEWfCufNDzXqqDQybCYGYjkLDch_yUucbh62vCVEo-dhGlnp-oxqbCo4x2PCeWEZjGh_T2GZN6nRgt39p_uJl0xqjA8d20cyUYGxG46SSZexxnpNHLXl-2y2qZZiiIzvTqQv3XdjTpxZYfGg1ufD9NLi1257If-vlAX7D03DzezsRpI65Zw8RaLjaMGG7P84s7G2iimPDkgwEqNLbO7vg_5V35QidgJA0hdYfKK5cmUvyICjqYMTbty6Vfux_O03bntgGVZer1EvYCQNp6Ol3mEbwOTqZbWY88Sg04uO14MuONM-LeFpsJRXo1IeDeb-seQXRPXBiY1p6TGO3vuE-UKrrjpxhZBFLhnyrRQFIbU9zCjlUiS0WkOW0IEpEQ8azJvffezjr_TWoQY7CN-nV0qpcZ2DDXdUBgqr5pMWAetwVcIVwTYq9a4btkxw6j4gMQqFQpE3E404SFQHqghazvzKb2db36vEnJQ0XgAugyKTC67f6DWHbTRWlpHK0U-x_KqiyMGUxlpXvh75wmgPJom-V9lVdur3fW2qURgAmFQSBvY7NrI5M9GRH7dXG7-FDeNbw_PBJCPvOUyEGRrxWLlfkJN948FBaLnOsaUwrDr2etXBwVeQvyVYaWeK6vkAT7ORSrL1OOqEWd853hM0yh5-TjqzbRQD8Cd_U0N1MIl2Xs6DOMrCmmt_1iOzLh07XAJxcs6jncyUkyscdmf89RrevbYsBLTiKZcCQQFhCHPhaJWlVHHZPPaWwTMzfy211tD1xNs33Q7QNYmpDP4h6iLRQdBvUI_6sHlNhU9z9Ok3hd-2yQE_LF93fcKdhMMmZ_Sjc6sgheXw44wITbSS4OlUguazsY1N3h05RqspWg0A03hTge-ZqR30bwpJnpuHL4G-T_6O-2igwDqcniDrQZPVOJA_ZgV_tih9oN_2EvCQIwky2ujUtaI0hLxuZ49KsVGZsJad39Cra6F5rdVRsMO7KWjWCZyuohLl15Zc1WFBSdXeey26h2ESmrMerc0IHPSjCkqQaWSGOzDgAeqayIAeqGTDBuOLNAdMTbDRggPUGw_KB1heeyGVkco34GI-re3HpAE0xZDnJ4EOzrwXzG9PIKtlps5hRoELV_k1WfSX_6L9QYqIkf9OpNqbvpuHBcJSWwod_Zeq8Xur7a0sM1H0-FbtEFo_wuK4GCnBDz4CB4wIcBpDSk1sKqzh4IpYD0QJC24bQaXyA8ytNjV8d6rZfRetaG8cKjehItjIt-nj_HjgwIB8Pm4vPUUnxzfXYS7EYDk8ekltyPLSELYBC989cmrXCOUyZxHli8Y1uvWnOcidpRBtQ39R_yKHNdZ-d_XnTMFgE22NvTSr3b_LWq5WrY7-8fZgfjTlK0wH4qnr3mLzyFmpuqD7ChJi4Y=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['1id',\n", + " 'firstName',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol',\n", + " '2id'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'optimus'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0moptimus\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'optimus'" + ] + } + ], + "source": [ + "from optimus.profiler.profiler import Profiler\n", + "p = Profiler()\n", + "\n", + "\n", + "a = p.run(b, \"YEAR\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.copy(\"id\", \"id_1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_name': 'foo.csv',\n", + " 'transformations': {'actions': {'rename': {'id': 'id',\n", + " 'firstName': 'firstName',\n", + " 'lastName': 'lastName',\n", + " 'billingId': 'billingId',\n", + " 'product': 'product',\n", + " 'price': 'price',\n", + " 'birth': 'birth',\n", + " 'dummyCol': 'dummyCol'},\n", + " 'copy': {'id': 'id_1'}}}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', 'id_1']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', 'id_1']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"int\": 4, \"string\": 4, \"date\": 1, \"numeric\": 0, \"null\": 0, \"categorical\": 0, \"array\": 0, \"binary\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"id_1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id_1\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 9, \"rows_count\": 19, \"size\": \"47.2 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}, {\"title\": \"id_1\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\", 1], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\", 2], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\", 3], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\", 4], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\", 5], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\", 6], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\", 7], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\", 8], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\", 9], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\", 10], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \", 11], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\", 12], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \", 13], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\", 14], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\", 15], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\", 16], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\", 17], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\", 18], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\", 19]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdw4_MGVl9xekUX2pYWGTW1bLuYkn0HqWup-krRTxkjt1e2OPrHX3XstgKRxCfu49uO9tsarVpxIbovjd_X0ST97YMuz8bRCaIBIoyyfl2M70GtzufsAu0t_nhgQJz6sPXGdqvGeifYF1Hyj-ctw70DU56EPrTUKcXset9uLttF1v80K1rGuxB2WOnWI6_Q2v9jLjYP--f_PTEgtjGyyt1LrHsuBUpJsuqHgagGS7OjxCskVolFhuCxP0FmTnvglcGJ6M6ACnoxo-XTYffNqXpLHhHziiBUsr7DGX4YX-AsZFFjw_cSSkIMnd9C1RLSOg7iBHb9jQOrWzy1jHXLJevogghp5lVkHMKaLxSjE3sVCbJDmrQWLfeinAElaBC6RwPmpyujH1FaGncZHJYSNVl8b5r-eDPweILYJ0roohMI70_8wkBpLDpr1ImmtbbPdkiIv-Hjob4gZ1-1DAihE10LMfwmOZKyjIrvqTmfsvquP52I-mERRfE2Q3fneNr7BdKp82CGbQCBhhxw8GodSZcedRPpf2gdFgAiC-UpmuhXcjVHkwzWC7WasKpua9gBX2eGlOqjVVBRxDgT4NZUMvN1wT6S8YkdC7zTTILzdNudK51iavgldkmfjK2ZwPk1PRhk4oGK5rgm5eohIkgvA3k81CnbpUPt88JFntRBrAe767ZVmzBS7T3kJzKC1Ujb956_qpL2soCsO0LpzewTXjcEjwWf6_elDK1bcqGYECd9qxUrsFDdgc1MvOuQ6-o3m8cLz5nQake-nta1eiW1Vpa-neJyR6CvxVqQB0hrIQcPpjJRrlx41mf888s1Q-oWhdMAYoHe9z2FbEmgYskU-pVyIH62EOBZSl4BQbMOB_nt1ryUFeR7UrR58hgkN_rmL4tlepYHhCRoshH2qBNdjXVU71iQf8qirTgh5s0sc3TOr93DeR73xGRvfLq6NUMg80Z-_3N2krFGHl1COVlt3dYnj6VViY4Rqv47qWsAnniNNIy1mD6-uYPb--GJ8aMpii6N69ZXUeyDlLX-F6C1fzgFYypIrJqxAlKHNWrMF4sZRUAGtYBsISV5I8D6DKxzUe5ftIYtvp1qDYAZHb6SB6CzT0zEYipWa-H-aFb2IXHw0tm6giy6yWpty6o39haeYCKJP39tSeus_pJYxWK7ewT9O5bSB4_TH3-lFWDJzq19N_MmXCtJUQasW4gFpj1C_ipOGyxqmczp-hkMpOfzDWfnETG0nRlY-EQnolrr9FPD_uSawSnO8pL8cChtDiPSTCPE_rOe1-7rzq6T421TvwybCidyHO-FCjQzuADP92cPcwq_6Dz3ZfXCrcV0WMlNdOr2C3nr-5XK2Sf9Bqd4aWjZ-8b487S-WgRKzMxZ7H_fStnT_jz-ks-oC55KbTZlwAYbT5_OOz3P0Gr6i3OOfxUf6cW5Z39M1LZuyEU3Ywe1QQosSX620Wpui8D0xbQKwkzpBlME68rosMDeYoxId7pbgnxUuTkZZCG0jfmj9vAQqTvaEf0U0CofBYhvoC3mNTgFZNNF2HNffZ0ZrTYfXrbRghmCw6tM7SyqOKAKEDWmJnpDw2NGnK_WRgO8AHlhANbk9Req80v3H3vTHa7PaULiI3nYxvZCO0CwfhT8SWjJHvMhXWBu0HEBtEqsYUjkg2DFTo9p_TzoeCZIGJPWycq6xkRK59yeyfc2s2OWBqpqucqA-sw-iQrfXxgrRp-swicG1t7-qq5-chhy3HzYoIYws2eTd5v0otLLMWy0H4IR-9p0D2kg74fDjMiIYsi0A2gdZ-X05gWRuEHL_bJ7m8V6jPhEsxv5DSUckj84mX7UqbBxIQOsZvshQ0HzQaJcGSIGIvOAdFNTbq57wWOhSJ9RLGxpRuoBm30EuwZQ8nDKnxmWw5my5iICe1041Ldl1aCOcbtrUM-2mjS-Y_pi9SK7q73AGTqENN41-w3A6dYa43b6ETbZSOXIsIv6NqTEk9OsYBFC3PP8h4Pss7onK0jKDgklwfqXk4DDIi_SLJ1ps6WFoEHfGFIa-Ed7cKJa7TC1gOWjOpcAc77wOpAJzzdmmxv_L7A9to584Mjzhyj4VaHmHtSiVOe5nnV7rk9qlSjW1O3F5BM4Oq6T_qHXpcRCkJxDLQdLpD3yggRGx39YPR0jiiddvTlPBXu_0F2H3JBw6AZqszu-mqkJhbKDTVwLUvJazDiSda-rGhBIHcv-ZpnCVOgZrId4txv94sXK3fEVgzeT7UqDSUfrN_pTrA7MQ6EksUBB0DXLSd3g5_k2a0vtFfr0sfqb2h18_U0iHJD5foRTG1slu-9RF-RmLF4GxfG2TbN1BDRprMthQlXdRoT_NFyGyS3XhazkfnjVY5L5Fce9txAndGS2PQFM7dgR6yp-6KZO1eJNdRRVNIy-NMsv-3kKVhxQrikVZwSL3x8JH9mmx8YsMiyXlX_5_Vb_7bVTEvgAt9dYebJ8aTc9Np7tpVKvVjdpqUp5rgxK4Hj2PZVGpKU9MyGrzCmihVAk2sfZX1Ytl3Ul5JMAwBsSoqoH87d7KRV0UPe537NYgAAgMc4qCnnC6AA5qzznc4djU0edEC8CjXEFtWUyTpdZ-SZ4gm-34b6XsQw2-GxxZl8CevzZnTICfdL3pBSHNeTmqXmd5EoHNOVP3OE_AP_WfyJU4O6hywMXlw3kLyDCbmHXug8tLdz7fK6eLvyRu_ahLHc3cxE_dL6otylXhHCfDyD86QgT2QbKWDRaU7vyVNNu21ZwMljdIwB0SfqMExocwFIa-bcmyzKgSELVa_4ri0ccW4u2QMkRov5TXra3Jy8nOlMJEZlK1N9O0eUiPY6BIJnp5uKrD5WtfY3_JOvilIPEIAMgQhGg0M6_-yeneKK8xXWyfko4Um1_TpC0fK0S9lSOyU-rV45ieTYXqiHQAeaWMJ-jetTUjjTmBqlVqXAx9XI2wAlyJqjqERYFuVDkh5_Q6FobpCXYnv3IeqdS9urvkOhqipjy3glQNfTrlt114Gkwc2wLbuCWB9lN6JAWs9w5z7lybG7yqoBJlYvKCoEqZcpJTGTPmmrrB7-8hoLuCrZXqF2yN4DFe7OLlxc2JJVejKf_dGn0SYgbl4meO-WCvG4b1jwiEjkUhMaBw0j1O4gw-fZybZuHUfym6gvTVoq3joVcho14V9JTCZphaB_J7IZH_4oqxCd1dVvObTQAgjuDI9rv1hcQq3IhVaUlc75I25wXDSMVtrIti6ZBvt0Jg6iDQec38ycY0lVQMdTMc4VJw6NKfYOHD-MMj9hPJ61i0jmasypw02lMg_G3-NAAJ9GtGywlpjg8BvjCmSZirimV_7z4ky-RBENyOs0ScGM9vL_sbUe4nuPNfisF6lwgCvpdhBgIHxIdo-Av3yXSZl1Dq2c-zTD14dx9lV8yV-tL-IL9oQFPMT8txZkxoGWeZvdGD8ECfRymJitJGChzPmLpVpPFqzcWFCGCD2Bcv91T6ycQszWc7mO2u9j5E4pr62GHKH5H5OmbOiQ9RdOYT2dgd5EYIxncNuErV3XnwSgIOLWtoqtAl9Jur_qP-xZgi8TBTQQjE0g5gBxxWMC6IMpzcOFPV2tI2VJ6i7-UGiHuEe8ZsJ0mdCybFv05bxmdhFg7ESvizGvq2x4uJ_uGvXaoOsqto92KnG9Ce33UJgtWnzsgm-YXA5-17LcIqlX5gKT6CtYPWUSARr7p6NUcZzbCSPnvYDX_jJIppP5bbNPoh7uufnjNeEMA3to-fsLhIl2fPm-aszfNWaMf_vF2yzmk7f6IZAr2HYKr0vS5q4U77okhFkn7nMig9sqyZ0aVamLFG2quy6zExcghDNPh-7j3DovXgAApJNTKv9PQZB-KIjneqbqsFAFx_pZjFUjs9DdHYAraT_E6LZM2sjutD16CqrjYDt5Ivdagj_aoYz-fjPKOD_LXmS10wl1_mEqR-NpQdVLOXFOqkMMDU8qaUyBbHbZwz5BWJGbe8P1E4jrfJV_JqxXxwWDNkSRkR6ZBo_jzSi3erc97mKfQc1pbRBFj7MdWyKIc07WvZfhsR-364r6Q3Beq1suvgJEJRWqZ14xwyIx00jTkz3h0fOMkiHfsZAnp9NYaqAQQM5gXTIPi6mPIaEettsyzODxejbYy5iRfXWlHBoYVffCOQRdQ5QWyJKgCY1wj4XjHnUtSunyIWPhseK-GH3oArbHQm0-vMXuNjsMVEI-gn0dgajBNUnQ68Ca5g81tQaHJjf1kj_BthG87R4TsqEZJcK-bmNRv9tp7HMlLhn9mSq401_dhKKIUeIWKaZ8N8m8aqn3Ror-jKh31ktfJAfHQGwStBMdAPmtWT5VNLqlSunMr6jyiYoJLpdc-Ye5XmZ20FNRxkFgijhHbL7pg6O1YxrZ0yWz17vAQ5JXa-EzqCcYaCyd7eMY-PeG7jfUkpW9XXZD2WK6fDyPM8DmSHc92dlTC6FtLEvAaFAaqXJFG9Cb6-Ed-Sh_H8XeVrdI8JX6PSYtwJxqHOlrPCX8-q9yf6KWdcU_8RJ4sahQn8_GaKlOAVTqF9r1fww-3GSa34_HjmNBH7oU2piiefhejEkwSjrqpQkmw1s=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.copy(\"firstName\", \"first_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"string\": 1, \"numeric\": 0, \"null\": 0, \"categorical\": 0, \"array\": 0, \"date\": 0, \"binary\": 0}, \"total_count_dtypes\": 1, \"dtypes_list\": [\"string\"], \"columns\": {\"first_name\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"id_1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id_1\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 10, \"rows_count\": 19, \"size\": \"47.3 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}, {\"title\": \"id_1\"}, {\"title\": \"first_name\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\", 1, \"Luis\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\", 2, \"Andr\\u00e9\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\", 3, \"NiELS\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\", 4, \"PAUL\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\", 5, \"Albert\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\", 6, \"Galileo\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\", 7, \"CaRL\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\", 8, \"David\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\", 9, \"Johannes\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\", 10, \"JaMES\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \", 11, \"Isaac\"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\", 12, \"Emmy%%\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \", 13, \"Max!!!\"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\", 14, \"Fred\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\", 15, \"((( Heinrich )))))\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\", 16, \"William\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\", 17, \"Marie\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\", 18, \"Arthur\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\", 19, \"JAMES\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdw5Cpa1XbSwNEXKML6ea6zq7qRCSdq_EnzUoOAp3Ha8tZnJAdTs--lZPjZ8bx_I58t7J5cwocgSgExW7PKevJOEh_zb6eJNstBT0TzbllB4ogz5j3AxZKGgZqk7jGxlFww6GsqXu4knBnqB8tfX_IBzqKv_tDF8rTV1MFi7h32nX4YBzeFE8aOxWreWC6_V-7WrFyyWXauvvGZ7vEiY0z3O_bXmnaT9z0duv-Lg47ZjHGPSa-lxZw5n136xqU3VVXXfH2NZjpmyeP9kldPVBmNuoybAktagasSnPBGpAbzHaeet7eFm6agka4eeFiwb03xPBEZE6BhJRo7-UcdPB0C_hY29NI8Y5PX7yB4VI-9P5pbi5o0xamXWA0Y9Ex6QoJJKw5TqxwWBeG16cNGEwDV7Us_unu5sWE8iMjttQV04mapVvN9kQwWRzoGBnVQx9ncCKbBCk0Qae4vDuPXpfzmvfULDAxpUxmGx5IaCAexSs07txD5W-7D8yIxtut64GKbfP9EOJVbd01KseBESA3UyS4AKgds1L5o8tx2pIg1Lkj0tWHYsvp1QZ8McCzoPPv_oSUnTcO8D18FyeS2lM4ikN88Tcago_PECM_EinFNhxmawINnBSt1h3CwmgpbNnBbZmmjMUJKkqSkWt4BzWM6GEhia67pqtW0WdWm1S2PQfS__0MJMrc5WS0xfOglBWFj_oc2GEA_19PH-DiaFZw2vf6VcZ1w2wnuuLcQ3CwB8g9Tl5twSxeplCsIHZ78TaeSHCGrq8-4SI9Ap3EkRVoTQtPWYYZUOScfsDwseju0d3vcakhQEzsW9QgbULg2zjtI9B-nF5tn4q49l7s0d9Ifva5U0Q6pvZ46McAucDOBh8guRs9Bn6xz6wNqwXOwnULxa1X70avvs5ctOmpvD3WUxtOx4xG1VAQ5bCSqTMcxUsSsHsvUFMnECxmnUtohqNPI06qIJkhOnyoUDdWv_YEWdbHj6kDV4OqXvQitZCc14maeJQdKYUsD34uxWtKuIOW3P7i21nw2sWxKq9CbiuU5qUFFs6tFFOvr7lz9IrBiPCLqsoXHVoPAvGuhzw8f2LNK6LL9dLkdrtaP1fAttuNAWum_FnvyAmwB9wWNyy4cigR43bRuS-NEKBjk6mzcwve2ghlVA5KQgMyBXMyuDAks8PJgWUj4fOBbo3JzeYOd0gGEPimg_eaClddFFFrP_s-VzDjUNWUZJeeXD52mWY10xbfE2-CZhLIDmAo2DRbqvdCbOnpPZuK5jqqDcN66pm_ySwErFrjLYDhv_hr4k7P7w8uFuyDPVCQgkB2LAQqS5xU7lG7f1W-owrAd19iEhdEUlXcFirXgEGWTSr0QtypGB8piorRPDhS5HO3d58Nl5WO2qXouTvzgns6KflUdLXwI1KuET8mOLZMDyoibJmA-iEFK22uGRQAazqnzQKLxQFEXdKf_Is81V10xmrFNOUlZCNriMtSFGZVPTss6MDOMBYx5c_iLH1KcW5YNVXocAPA4YIOa3-7d_lr0wsxnpjtqYjtjb5xzAfmIUfiFf45SuHIiL6pI4OR9_fiD-gRirzHSa0jg1VMIv5prmnn0icNmuZQzmKs_v4_qiEWbzotgqE1RME7OQUcHeAgPrDuZD1fyb5CqQ6PXqJwjc9656574vHz2ej1IWvx10D5e7P-iK9EaLkZEPzGYNdENmgJu2qUKHmDAGxXFvrBFg7efnBjI1jOQIJQRWr8GNOldmecphJTsPL1ZxWBSKALTu_H8-T-Ni7JTLs-0UiMTfbrVnek9upAwQGXPX4TgGRXkvJHczvUyycV2OTsLCfu77aq2MlOX9ZyQU_nJw6hhluQnvfTl9IKEgZHo5xorbcOpQZAU-cGfKDeokPljVtZ2cwJZaiegamvMqQANXoclU8O_bd5bIpwwU4QOF3mnpXzJgrmaxXnplIWzBLkpDXzBYFMY_g-HFdsM_Fkmn0x2GC4ngwpjSSxOSh7Hty93rBQ4OybDy2dgybPqu-GpOXLTmZJZK8V256iO9UGgmTmcfJMuzeodwts5CpDSrKc0dbBz7Vt9Z97TJkd4d-oeG8B5HHOXpW7_15hly4GP9raUXs3C2AEU_Lu1elcPwzfw2As6RZjMRfao6bpu4GNRj3dBDshDnZwoxrUq1id9llHK6CsSFgWFxt4Y6GOkHxEMH06xEPWdedLozgW6hI5SksKVfQHojlB8SCBmYU8RNTg-RGyuSGJ4wKjt8-DbYLCPCZf2uUzkTjbpgroBqTnPCqc3ioDMKWky4Th8CFykyXSHoepIdM0TDZJ1FxyrBlWgP3i6l6OjUHz3t-dLPnZmDasBT7dQdCpBMy5UpgIrMLs6Dw2H6Ckrfqw9y3Baqc20mvVg2pwiKF-dPXkmaAx93vmvgPMxETD1IF8FwnZyAITFOzzQZ-22qJgoNKdff0TZJbE2TsfV_nfmZiUInsP1T4FezoUQ1e1408plPmvzr9TTUOKGyrdSNypxonfR0CyqL0i6keyDzRKVUKfc7g1wCXpn7xVlIGDqknNw59X4ncWKYhPFvuGQeEq__c6qqVwpB2jQ1LoszXhZEN3qCLkkwV-7o24PHu7SMmpSrdIrKQ1DzFGttF5qzgeyLl_66CtVRIJ79S6NvG5rReCxxLSfUooac6mTpamEyZthv6GqI4G6ljRne8sV8k-7vav66-ZRr_LgTC6Ohc9Nk1c1dpenkJWVvULjHepBHBK0s9JNikdE1C9xzNrvgKfsbBgxIU77HMDBbF4Gs4CMccNccvzksXcDOb5EZTL0dod-HES01TI45ikBipxHk053TVb1-YUhFaQUTxOkpN5ypTRYgYh7IxyVeU_Da84p1MXfdan07rcIk4_HliuTgg35T88eFYP8S1Io2pHqjVetQSrGJCP0FFYd65ieqKmYbalHPCpp-4Ma1-XpP_HYw4DsnlIOcMkD63xMJlO3bh9FLRimY2Z7O7TZ9lRBGum2QwpUtY1vlgKD8-LB-mzjfFSiXhM3GO8hrN0LsDl6cHeYQD68ZKCi7-AJE0XYWpbnEiSmL2I-jtXT23BfQFJwJ2Ra-3DBMOLd0PV5zhIskOZ4pDi1WIEhBWfU_UuBe8FTE23-uZBVioRp2PBua-oLysv3_A7O-gZTByBySIDh4p6PMkAflW2i46iQ9TdxFiLa3ul64S3C8evNzqgwZsZaIUTFa50cB0RKNjYDjSEMWvzuaJhaGawzMTk7R_9QDxRmgkiKMR-PjSoGmZtWgFCgvumaqIss_NLl-GSPScs-HhoszaRUyjsS_WvK4MNwzQa_up4KHaEooMGaDVJo4eEs_n0D_eOiIx576RQ2Nyszv55933ntXbl2hm4EESEl2U_5m8tl060VeUOEdsNLbzgy5aCk2Cu6uy5qd6k1T0a4pRoBeIyAc1YAd7JrXeyAMtQBhVGOEwAqFfavPTE9Q9AZ_VZPC-8_XG4_q-fb7ue4OkHTRU0k09OYj11ndLdNYGrnMptZUQTns82DysMgEDF4hiZhEqvIvN7KWzZnBzOt_nsNz65WWWhiEI9T2EtVQm8QAVRr6jDg3dYPzY48tdPxLltsKZg3RGYuoOOrd9o0Xvu5ywVn5ca51tf6pSWvaPrgAKnkQs3OBQy95IQQDAu_w6CNzsEewrB5uiNnn4G9eOsHIYuMqpd-YySQAe6g-7QjspRKLkTEhTcqq8Cwi_xOYBIbKVJWdQvIGRfNS9Ybg9V0a25T0hR4fEx546RzEA_nfJ9fvjm6lXGj6ZvElzOvmaJx6nkfZ3ytyUTVzxz98iF0orJAyAFqlcSadcRzKl1kbuQ5Cm9_ZmjpPupXHXyOqmoarJrY7yrDOcwbxX0dRRrKK6i_Mc0OgtsZlwSGoGRfEUWEyBukY9a_OM6i20VWRxaKymwrKcT2v38JmypA7qKgi6CfBjwlafez_LTBBt-Lrpw3_KYXlaJtCpZrQlZigrEYaWuz-LxN597NjPHUTDOv2N2AQblxf2y05FBoaYjoxrXxDyaayCDkZ1WT1sUWQWXWAxS018YGEU9xr5qkHWZoSLKe2b6vW-m_q-Q8b6D4VICEg-ax_ZJaJdsDetgzdCVVLT_L3GxAyOi2w8MPbJNPUtS1KnUOjgW6Zev9kc1Od-6iwyt1N-XqRvI38RwHwxebV6lJkFLOhjkdliR21QXvT71Kyvm66yio9zUP7AEL8Glwwp7x9AH2RRlysDsxQNrCea5ANKcKdlUYdi0bIs4hmoki_FUIk2AzPlokvWprv_VhnVuyCqgMvYuIenuT5ZYhfdk6e47HSK09Yzkc68ybqcRtqe4KkO3kygQvSHPuPLP32owXzpsnUCimxhmA_6Izc4QSCNVxlpuwg_xUQo-qEkWdqxNYQnlhMQM54LCMGLe-e8YU2liPKlbUbVTNKGDe6YUF-v1LJl8KRpqjd3f6peueuhAS48-dnVFZkh5pdM64aJB0gKuYNjPHD951x7t0szBAJUE_dNDZ2FvM6gzffgBujSMc5_dpMwCuv-JwbGecVZKor9mTQjnwDPxc-3t5FJlKRCFumWovsOZMPFRojdFdse3GNjpYYyhVvW03rJT7XBxfvJEcYAPCs8JDKWg\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.rename(\"firstName\", \"first_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id_1', 'first_name', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id_1', 'first_name', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`first_name`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`first_name`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"array\": 0, \"null\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id_1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id_1\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"first_name\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"first_name\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"46.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id_1\"}, {\"title\": \"first_name\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdw0S8Uu1hyi_ghNzbu5fkGAinCWOd-qBR9mc06MOFhwEOu47ws_rym6Cd-J4ZQ3GP9V5Id_y34tog4_blwTzhtysGRNd_5mrEoJbsbRJ9Im8cN-0I0VWNuQGZaaDFC9XntOAQfNfMzSH68-oStcKlzmQauJDfn2l-b3zRvtX1YX7QwpLMbh53B2mlPNBQjXYx_llIYGPJd8RWy00S8e6nVaNCqBc_ej17qg8PK_yIFXEYWfzKWCdwkAzGTHmjvV3WTvptJTIcjc0mVEJ-96gMpdO80J9AURjl3QgIV76yrOPee3kzazBHv-6oe6wvghuZJ5Py5UFVRGq5RIPECJu0FUC2sv1r_LUaJddiomKL40dC3SAy3hKa-C6RSKZjFKU5XMa2CbFjYrcwPcQ8DnvOj5bZDICXS8ZWGBdX5VcfBbFwm-9ZTEzgcbGbHmqAd2bMYIjW8MAH6lgXrCv0y-Sb2FmETpE36N1GyNM4tYltat-FCe1GUz4xmBSrOPNM2Ru2q5OgCAOyWf5kiHY7BOZz_qWRGO_1Pzj1sgD920CKQGzD3_qQbdEvBcLLAE77TDV_lt59H9l0fWC7FEGyYsw8nvltTcnj-S6jXbFz_FpT51-RrUocdRiS8JeWrZQV6z21ZMBgFiDnAMHsV0to5ek92QSmgUS4tOqO32CEBSx25tQvF7QvYTaLI_1nqe8Or3luiyxt93fFobIDgNcYXtEwma66_tK_gx26kZd0SkYTFfzk2qkaVayFfthXvV5YLnq6--xJWqlsoz6q94GzXjEczs53JZYUGdJTX-Bu4lUYx5jYHEfwJE1g657iTylRAcky6W1x_axgDLRHXGflkhpAFiiW9GG0WRAXolxFHabGm8tQiqo8SgZdHiCR05CwqZiaQamgFc_xdtGkVxQfRP8AQa20W__G2htM49ND8EOiMtJIyTn-KSdjbDSDdAR3nVdMlqrQT-Na5Qt5P0Reg_Z_XMzJScCtjMM6IMBmrwCi-c4FPjrnFoC3xJc6Yf-MTwGjqkzhkz5DedtZYatQw5h6JNshc3IsoxivDaC6Yxp2J1ilIKbIt_zwC3rKerQY3o5jskCWMIcJK8Hmevsk6gulaG-hOWd8xBI-0qzpdQqHa3wbCA7-u5IKwmrTeHz6aH5Q2ocFuv_HmUh3NVGDpeIiK8LAHRRnVthPEm7XBnamxFk0m3Gv87FiJT8aQrnztBkhCe8ee9FA1TFUHcz3gsdPiFClVANc6Et0Ch4T6pnBisHkK1_wnp6mXRj7oWbZLuFaO1lUQj8FesbFMSXw1XxQpmdj6qM-9uLR5hXzsuk4VxdATdaZzho88sQdxyLFKD51tW3x0D7bargJSL7QtfI9OK6JUZxHcK1cgahgArZHYS0nJqaCgvNFnOGQvOQ9XcUCmPjUOoVt6DMyr-PAwAkFBwu089c2KrQxyYF2r4MZEDcSy-7uPIMeC9KpS4plVlvTh7uRSDaq5WVQaZ5AvYhkyVDGIusgwKLQ2BDto3qfjGhL2lPmo3GaUt4ffoL8ckDYgylimj8hylN4KSkiqeTPbStUgAOv16XDvrEwlhbq31xPRRCUlRYh4bl5FI8oBcg2F7rf4tuVwdBBoBGuW_6E8s0cgOaPS_h_KcqqhZMErpAjJaRwIEvwYUQgWFw7MJ9Xv24gdLDjEehxq2p86Vn_izcCUdMfcYt-rjEdaP4L2MV2zZncSISGiCXu_lchaQHo02PGoU_qCNkzc1Pl8QEtYFQJNLHuGZdRS81CPPajcxY7We5br1s_dJEdiSuYOlPZGQeL8bKk0J-KgmUdbL7GzZ6SrjNHWns8UUJ3pn5BEbJi-D8XfMXHTDcKUdh1p80YpadTD0Fx98_GNJSSRRBE1a1h-_VLrPLYGQQyBtRTURx18pbbDWUdPfvfXfB67szMgLaryZggE5LucFPF4JA6XryS_l06m_aJ9FKIBd2JV5Lr9i5ahAuEOJGUpfksUVXXPjG635Y17CPnckRRbp35p9PIgPUW1FFni-cx03a-m6DfdNlRzMsecv-OujNrukNrYQ3AHqN3PcIbwumhLOZoOLS6pqswVwpMzZ1ffh6Ix5dpSO_jS7Pg4_Dpvm2UnoSBncDgiOs8FbDuSityDT5Xk1lqn51AbD9VM1QTGpI2PZGzJE09_s3XbUG2hWByJMCkYz90emGVTr-otityj87itiogkL-I_lC4aoyPO9JyUvgMhgr7migD271l9j9TH4EfUpuy1MLNpgHxQJQYsWY5u9D01BHJJopZCB7MJ2l_C6nw-IjEuJ6spjtEAtd5495YKzp4XCu5sftNdR0e-SdWQinv6W50Y2jKOD4bzR_r73-0VSWVQcKno0Coqit4uhYmfVpsBtECrpuew1BhVFEWkt0Ql-nJX3wamXtBs9CX_kWw3yzBfaFbKV2kitgQ8Ae-3KdNyg4JcDEJ2BvkRM1ugo1W5ESotn5llEqp4bIfef_DaSRIVzxNA9_IDo4asX5Y9Gsx4TMrfardgsa2GHWlIZRbHofDDpF-1pe86ualnAzdM5pfA9Pj7FED9IxslsqDGuQ0aXf3UKWA1nHNEOTmuubDLm0tfiGDMad5QzqwHAtnU05imsPw2dtWgOPumI8dnWCyCMPkSt4KocPgdC88sF25FbdZFXLg_j_sn_BlCiXMjq0UtshYhKRV8MW3cRRacbbObvCDpmeOZcxviqKhuL9LM9t1QU_FWkWD3QOel3dJs68A2orK1FsWP8lGUK6Rv220L5xMfZoF5T0Wf7eeKgxzc1I0pWho7d8FGbcSviAMxF4ZjnXn5e16XIj_hyHhIyAFPjZ2qX2gieglZWzw6O9dls3nN3NBloUuFBojitzU3fQuge-Dx26K99plQcP8El8L3DD1GRfVVDrF9hIwFJoHPMLbhKoIv9FIV_5OyjbjiJ54DYp1rMHafaYV2cdN0pfvqjNlJsLPdELJd7-zcsrM5Oi3BlecTCHsaIYNvXqlklNDNNWXQtRdag7n-53ZgstE7-NFm4W_FbE6EhAsJZoZSHir61gn0XUMWy2XNWi0E0XT3fjdTOWAUNaai4qUOhxnr-4BPtROq4jyePJgaMaYR1chFe4XszqjoeJLd5uqHLW7956NGw-BKo1OSqALJKX99sE-GOpYj1YmwcTTfiOc-rYhxxsM7P0KkmPBuZBAW2wIXNG1R16dzbFziu3XBbNfDRPN5jGkmhbKsoyb9Nsho2m4mavFTJvbNYDFn_4dySqJPIEZlDgWFuyite3wWkqOMmgyfgreliw1xJdaHCv8AufFexzclcZqHZMHDOYmXjng4DUeiblYZ7L_6ipqfFzgsY51Qcw3gYBz1HARVhDO33P4rSVjGfRyb2uqKJEJID0dVjDJFW5Ptjg2wMNMZPB0ZFnV1Gum4UX_I-9cg24awgho6FTPWHK2t1YfQDtsJpAUtl38c8E7Z41dTkQ-PlTaDZA7JxMGQipx1R8BD_1mmYyH4BG0F_XpuBfR8-X0srhN59Mi588Hie67n0AWlYuebk8LsjNsKyAs1Rq7SRPyu5pMwM_NO3BsVNbpOqVSGHtGTGwIbk9_RGi-83H_xaxEr6FtJ9qgHPpsvaIfrATdAsVgBNuIck_vp_1ShvYNnTczWJyIeOJxIDh5jtsY6ORnOC_kZYlAqhWx8qD4kdsAua3BMSA-haVt4k8xHd7BJbjCMayV-zFZiApZzyqkq70N4iILVSNuECViFGaQv9oVa7C-kJ3Or21XYDtk1OfmbKxbE_ImL_fLCruwUoJmtZp-Zu1qgggoqTUepfHrKkcb-fj5n0nIwJrshU5RQIE4ruyRpVVUUPRC9Zp2X1S402tlUQPDAnaEVIHbYhDP_bn5tLXQDkzb2ym2CmN3NdsF-4PATd5mqJtt_Ped0DMHWVL-TD6xoAfudcduqhYR44WflDrr_RnRAJKrnJk3K_YaYd3ZLQFTPW7ApJkHsFMdPYVcBQVn9Zvm7B5m_VvpdOEy7AUgnUmAYqBb-oOGyJpg4uCQln6e0WdvW_MdPEwfDhOENYh8lDU6EQCuYyk09TUQb1TtxLwMJfaYc1d_oOrGNq9NeliAlrDxxW11xgTYgtmxcq_kugfeUhUwkNGVmCvjOrLpPfNdzTvknO8q71ndKtzREs1QMxcHO0-yi4ZSnyCW1oZKZLXmZ3jgh-8uJPy3d7p5IXhxii6d9S_52oz7oAFUzCQ-KQeD4OxzKT3Biu3eN61U1XrV1CiFMQfx7unR9PTmzjmrfrSSyUxYn7TiNwHDUDjq6v5FgygHbQlbaX-JNKUfLxhQfhtKT6XclJcSZkOVWETPWhtOizTogVPvwcvD7Zc55ExzNkkX4pUzrWaLQjnHTKSjxvEvtZrhPCiIdqLrGqh_WgikQ54PHViJSah0ESGvtoYcb_AC2PtCJIz4i_2iCT4p8KLs6VgVrt4YnwbfV-9xLZ2t5TuZQoj7LsI=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id_1',\n", + " 'first_name',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_name': 'foo.csv',\n", + " 'transformations': {'actions': {'rename': {'id': 'id',\n", + " 'firstName': 'firstName',\n", + " 'lastName': 'lastName',\n", + " 'billingId': 'billingId',\n", + " 'product': 'product',\n", + " 'price': 'price',\n", + " 'birth': 'birth',\n", + " 'dummyCol': 'dummyCol'}}}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName', 'lastName_1', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName', 'lastName_1', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName_1`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName_1`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"null\": 0, \"binary\": 0, \"array\": 0, \"numeric\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName_1\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName_1\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.2 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName_1\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdwz5DmHnn7voICl0kA1zFJ7tf-BZtUqtQ6V72pHuEyZSH0ydqOhcAGCzPP9w3WnXZ3kDN6jWkkUhxZui9uKXbNBmtQPSLC8ZFAudpxptzcKmc05xxXunWj3MlK8zEbp1uq6HEAWAHSytJUL5Y04aGGweL6oQtdtglcNyvblX2t7k5-gd35pRZxhY7m2rJzIhZrOwXD9L4eNWM6n7gigOaxzKVaxUxwBz-zG7HJ9lUGT_aVxHOJv21pZ4tFaivmuQ7mjSpOMM-RHz7kIdaGE7MugF62JhBCfbEIyxW52sFDELvz2gT7mJyEaRfmPcovPM1W7xK7fAaG6PrwwWb5hvVIq2FRX3w_Tc9IX_mvJszMUyRo09q_LV67dAMxErLM6iUlgzQUBg-ziBoCcPnhiSFyDt1txJdOVZps8PpQFBCo8JXpWbq-bYrus7Jw0DwCdgmds_m5iDf1YO5WuIPUbX4ClR9NaBRCHC7yaQiJ7GQYyqi31Fbr-8xaSioezdmrNnCAAOM2STi15SDyH8-tLKI3tgi9Lr_aHsbHDL4PQ9BwIpF8TqFdxAT_Yh_Ow42OOTvFMQ6Ros9K5Lcd0d4CDu0eSrizEpjvT25rdYsUMMWyiItXuoVW1-ODzOS96mDHIXIF8ytBD7oad3N-MXi2PCpHSMrlq-3m6ckiROorEiXCHUd3xNQ8I_wFL0EtZopgUhkenfU1LEioM0_jDuw1wdWGsHup7XMB-xBVUApmLvYcw7f102nwHvcVSyrXr0vXqWHlz775RoYG0dqi4F1Ri0uzGINTXZHDWNaAs1w4IBHehWBGrrwDTalBKEw310E8My5jJiNKgCRyv4EWaXLiuq6yzEs4LLHpKZuv22bbT2SoNRcjJNpv_45I82gfb3ImjE9o5fMAcgeQrzaSBDmnkMJwx16bGUoT5Ie6D1fNhSm43UrwAdJNChtKKXWevmynNAKhTQA5yDEOqG2PLm9osGdHEST-LwO3MzNXh-F6CwTGyg2aORyh8F2h8FhgGScJGXj8XrlsE9glH6AqkEN23Tn_QlvKG2rsG782Dc6DWpEDsq4N6XfzdcBHq4FhHt4KZvoe847IxWkKdtxU_hWBjqAW5v9NnRlz-ePFiCzrv-Et4BXlZq7X0_TOa6cpMd00O6a8f3i8GXWUk2CveZZ7LGSOXobdZiePSQAftkiqhzkfs6WMMmk60wXCWdl331JHXVrdGaqO2nAG61W0mPHnWw55Tn5VL8VxxieJPqBNN09yNX8tVurqGfyjPwKseOe2N2vGmrL2O5_k-ZlAv3sfwm82yb7TwB7zP7H1SvwIXixCEb3iBxR7WI3wMDS4Qb7ZSu1TORHTT7PhprlY7CFxrrKfJsHQ-nE8-7FLTicuO_6nwqU48Vf5E5FwLBAYxQoqa3vZDpnSic93iGpWPBEhBeyJb0lRbwxvwlIXPifutt9BRwLSwZt-tuX-9sQxTC4yekc1zEyvYhCUn71kn7EQKygBdwDjoqUM2kx4QpdY3gXqNF46tQ0gNZoiy7Nzx_OFNUsLUMrTvZs_xK_psH6hB5paMl1xjigJngpHXFev5E7kFPG5uRB15QrXxlYzySRSa9f2SzLaUkxARGn0GEn_zEJks6vEvnxJ0b5rzMqmDYapDNyZY4Hk7-a4UKwlefVN-MJlpO8bdEo_lX9LhYQk6SOrNXzduiCo9pN1gYsJptJ2G24hk5VVezPd6UMAEHifq0IbzjzDAGTDVk0Senu4DDHsQ8wErYLwvJdOuln4wq0UpaYIPARNoeRrsuOGXVGEDmTW8hXNkoxzhPEH4HAgD8VBAXJIzevcQtMB9aJ4JfZMoIqV3YFgCs-yCyE-CAVlZuOSOCZMeMUVM4ZNBUGg5YqbeNPgVOI8YOAv2b6Exd7U97_sj_BR_osnCU2CEgFTD2QsQgYxNvsoFAoKEMvTuMyKDdMZnjKhLQr9bfE0-Lxckjz_sPdce2no8jKI9AtDcGk3GYTAsUpMSHCwDc-efuTvFqvmgAbojaUvYFKmgb1CjPHihhHtTD5TsSUhjBJ80MYcMkjvsUIpqnvfTFY7Vi451sV1HT59y_ociMgRmt6zObtakQJIdcL27UtP6WI9X2VRssAo7o5bM6nlooWddHWawJHUzGl3gi2kjla5MNfn8PZGfml6yE377yQdZWqeGEfM6q2vNgkvkuqqifTEj9xgHQ4KBNQ7i8WlOA82QzdodKMVYwOEBJ2ncIisNweaPtdeVb46pdlN3LBudy6_V-iVvw5fnnM1tqFw9vQTYuB4ujkZ_u2EO9X4Mz_5CvLwPDM1KfDkF6pWDvPTbQP-YZOJ_l8QVvfTOZPAEulcLxbypPfe5o_DuD8k7lwLlXmFWQRRijnp_fbexMBRdTP1QJdNEBJxTS5B8IDcanWqP2gwpCgzOFdeVU1LVsuGoqcDqotUYrT4ECeRTPDuDgxjW_YGhy9soOr2r9B_A7SCN9yiJd4EzJcJqhJRHOH67qipIe6Ef_wMBNqzLIoY6Yj7NV9kQkQh22HVGhz4MFp19ZjGSiTOKkv4JRgeeCJfnvYd6i2gOJ17ByfULPBmA5JraoM-APMZuzJJqllYx_JdM4aBnNUx7HJ4sjWXcuf_YznY7AYcaf3U3rOjm6t3pcFqmP4Ni7G9jNbo1sVl3J0GJ98CLay56hHKUHQUtw9QHpsd_VWJJDoGSt2NgEfPkHelTu196IMsrnbJvSlBTH8VTKhQuAvHfy9nhungWswf3sC8XTKAuJf51L-SDPnnt2LP1cHAGwdskpMFugPz66KOR85Ye2HVcUEFg600i6Ke_bcTfPbwNaCTDHIiMOs9BkbCnD3376yS2IG5-flQZBfSJXVoZRjEZ2VvLUUSNYSZBjhgSJTeritjBS9W_MSkb8_01hdMfE9Z-FUffG3lTClv57BQQLyDbOkFehkMcIFwEQnBp4iAW2eV4ZjNgcpRGNFpTVfWIAZ0lOrx2P96MGesmTH-QNJipxasoK3nUeXVZSEBSgjm7u-ezQeOI54fNse-QwtbBpfv61tiBHEi7WOdihQPkVxjf7gikR5IQuJQHUapMNgco9zpddBsvbrK5cO1fFVLSVHkbNTM8CJBDPT7QcodI59-fPTdFCJB8KRlSkuHCOayY8csIcgjoQSJGHJhJRQwgd7xhnxpnmAPEypdwTBy3yeJDcVseP25y2xOEvSR-tGB0FA9IsWjv29Ijg6YNjjK3_4oNG6i4JTTVtSmbaId6IHsAUwLJFqFX3S7I9J2n07Vg1r1qclSsqDWGvIkKtPXpFoqFpqLENMNNNhuNsd7SYjIX4ZNbSL68f4VHdgk5VrVYNsPWuRnURDBaKCbudm2Oe8krsnDWuUoaNh46KjyQfKNKHNlKg3B7YysWBYzOjGL-7wlHd6EfiBZ6Q8yCY5b2zLDsWsl16YxMHIbq3ibkwzLHoe9m-Ve0msoK1NMXYmbkWJwpUL--B0sbCczruvM9gx2pKbfCs2_vhzrM0RMQZdU2w-1safgNE3L48kjdJdGI0kLilqgqw27i1rLRcqXqQENni4zCxDJb4-92Ukg3ZbeShOXXbff22PGhgmrxMZTcYJSrp6J2fSDPoSmU0_xIZc4Ir-Sl_GAlEcxDISbbHSBq-UjCoTRmGXpwlhxPZ_1e_KjfZX8QCbpn6u2JhzVQ0Mpyy5IJsHZuoOrnkOBSRKD71JqnysgWGrjy_mg_ILNsJ2Xzq9V3MybHzp3SdWAtT26WOugl3bTsPqgwtn94OYWPL2E9nSPoiroQXTHYTkY_bTTiuTQQD86ExGuZP3Osxj1-ErMbxgtP0aDg78oNserqtvcU6mIYrLR78mkybc9orzrT6Jgvgdh5fRcLmusnNsn4WnWsv2LLceHHUTZEo5UQcIf5Od76I41sgX5WnMvcrDk4RMnouVxXGOPkFb2B1gOok7l8InS-picWyAz4NdfMHOJtso4kJmD3nrRQIQa6TgpwkJ8eptCfeueWZH36qF2iNZDW5_wIQVdVcJQPyPpIhBF9mXzMgulRLGHQDdu72bp8d1mYJHjgONaZXbj72X6MuTl1Th6Ygsdsrdnb2xAv-WPdx9Vim1HTDaKnI9z9WKQNEp1AMSEIqqNCbM8suBh8y09NLdbwKS2_yauXs-BYdXrcMfI4Vj1_cswwJTAuUXnj54IPyF0wrd-bboU8tRJeiYOUf_vkG0dtIwXf7eE0TbRQsiQsDli-5Dzwc7PYV9NA4WJWojolqpIhRCOsBDQcpt-EhXA6YDxilIumVE5cE2xGUPzT5JJuLU6nTyDoI7MJGiduGsWnALouEZt_2_1pIfQjS-5Mf2amvZuLZ2m_LnYRiYetU6u1-aArAsNQdy-V8yqIW8HXmEssFjPCmG9CPO0yts62E9XaF6mMtNqFM-uKMtoINRK2sfoOcLmMgNdP6wzK-gm64xKC91jrq4a39UBtJqrZOsi0z7tQyWB-0=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df =df.cols.rename(\"lastName\",\"lastName_1\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName',\n", + " 'lastName_1',\n", + " 'billingId',\n", + " 'product',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"null\": 0, \"binary\": 0, \"array\": 0, \"numeric\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName_1\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.2 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName_1\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwz5DUVuyIWK5QV81hEx0X4_h_uWaqoLNCfXOwPbs6OiOPhCemtfzMaKlkTpvCAAvXrTpkf4zZ1Of0Q-1y0LvywHMUinm-GjcLg2xWuWj0D9TeDqipKWRfUc1fdMHAsrdTAHZBv5OJoDl3kGp546XkYHP1f2cFe3OV4nrvRuSenPHn8wVWebuYoF7RpfQTjq_96wMscV-QGb8raOufYuHFCnop1lPaqI8kXIPVI4oDrMsK0koQhGi76bZ-7VGV2VgEeLTktm6RUhnEMZF5-lRYAciOneqF5MGOpEHCn9KORPWDIGabcO2EFctGS1znJgj-Hq0OAz46BkFt2JAkcJojcXJX3wBSSMEL3OS6Yg_Q2O0KLIQKjShBSuzmQvql1YjKbx6aGeZnvKd8ZoGcoG26Hsiv6LQDDoGimq8quD1qDyJFPXIQMb8vfUbdcszoB9HUEQNfcgYKPnDWgJf-yh8BOzOHpkQtnG0hNTdDn83mwBtdmTCZ47Bf1OaOdOmcUoRPtQ97BtlIS5cPs8Bc-gwhWd7mecA4KNmPSfLxaYSe53lBGdEeLSYYz-E9YL9zFn7T-x2C3r1KZOxRIAdLG3Y1gnABhg8x9lY-1PYFEVL-ZVbgsKFpO_MwezGDGQYlXMlnuU69OYH_qyVjaXS6sUOMYmsf3iMtKOaO3HP16-RbXF-4UfjOzoP2sD6ecvIuSnSTBE0V9y1V4EMmdp6ijVFrpCOOceESWsdPjFtxJ_hS4nzsEYeXGLykfBDtHiL1kRHfp2LYro8R8aWpCc4DgLfyB5N5fymcw8f8j9VU-Y0ZEmkouOwS1yOZ8oEZPF1PnVRHGSlP_IuA3Lgm_L1HnnhhQjGXgWsBKf0l3HdqNsFd78Tl9C0DE8I7becl5Qsxbq9fhvY9VT0CyQqTzQAHmmdYtN0H2tuw8S8Cv9NPG6ASMTlm7fgz4vYv7jeJCquaCvol5B6aubIJ-iRM5ZLvTggYFRuoM-flEuhewBIDFKZlGetwnPI6xyY-0KtmMh9PuHxCn2L6p7AfAUAvR4teoauSFRP9U6rXXQMhQ-59mfxyB8UAo3FWvpn1WPxrLgaZjqgMnylRpk55M5ytoq-DTGm1P1AVVnx5rB8U0MvY4i8Q563uk7xmdcWA4T96yzX3u3qJa0Yyi5zwvgvJilxIQaLFW1PnNiaBzyf0pigANnjNcq1-k40AgBywk5QGUnEEqlPGx8z6uycXcsX8SKf61k6wndRwZBpLRcjOf2pfSKOXXZMbpxH9n3O7ivPtWlPaybl4Ht08HXcy_DtRre5JPOr1MxXvriqkIvOkEVtd1v3Mq7Wvev7oRLXMaNLZwHu_cJ8Nz3eCW_ByUOcBmvCbOOLvc-efCVWO0gIkOdaPeC4rSU-1GH1ZTAbB95rkXJURMPEEsPak_l7adt9n4TkqwFkID0kXo7l7SgAWMSn4OaLCbXMCDGhOpmQ_jm8J3wM4l2zvaxtIDawSAgwAE9VH0Jxf3ym2CaQbliJ1kOEqfGSQ-xwXh4SNxq5hI8rJOEpxsIkB9Ph6OayOybU-pZcjDLyqbtmvg0Br8UrJ4jlSl-OXUIJ32kWNyjy1q2ItFTzH6JPskzMKCja461JUugzJTc1-4LrhE4BaihIiTSR2jyCLLStRgxusIcuPUyFNnErg3uFh8QH3-Y0P310_WEtA8MkBdfk6PGvY3KnOtqEJ6pYosMCUV-AbG5MjMm--6FC-Ru-GOwGmiApVuOJYljcdHW9gegXzibSF54LduxCfeCpUaqsLUOTHrg1zoGvh8ZH0O46JlU4CpIUnjbnKxM3w-cK3mQWjRcy4ushznQuQn1mKBc2dYPCIVNGB_kXUw0kUhExH6CeIvpGidAe4IuRyv_Tp0wENp-jflkbUdsAV8TUrQ_1fp100t5UpXGfeBY-JYwsL95Z_objCbwm6iNdu6if26ERRJ55lOKIwokOm9UIDus1-ovFuriolcs_IXwsxjBmei5mJs0fb0nWE_CYEeoELJCgAqRTSYT1MqlY16owovOvpLOS177ynvUH2U0XgLxcVD3WioPAqGRR6W8BLbLfyU3s6cStOk7x3dS_sn796vcW5b2q84xYco5OtoIMgf_GAfFCsq9qKOtga6HKgFFcYh0kzgST6Fk9RtE5BWOYo4UGgQmTu7442CxoZitW_9H4GhUZvpx6ONAcVDdhH6YNgaACpSSXs5e3kVDGE-LMQqyGWWFmDR6zdOfcqOgQJu9xlWoWx5WkYdvIDBZEshcoA_vNcUmO0WuPdyq0IC2fIGvzCwrsBgt0CBqmq0fnPwv4n4rG3HkzxjwbLigB16HueqPNQ_b8k17MYpnxPrPGzumGdbZ3akKEFfgVmIbUy3rY-h7MejF-avSpZkw2UDlmUD5bgaL3BoIzCsBO2jeVayB1LtuWsD83z9tmRPbuM2A7niDa1VKsT2efzxIQd3rH1FVil9bY9h06FOjvBYidKAqKIXtAWoNAQJSdLMIXP8UW4OQOQFhdRYwYv7sUwD3j02vHV9PeolUHCK27wcG3SgbUrz8HcxEamMkNDYDXZrE_QFRXES_mbE0hSMlU9fjpGRMqTB31qhwgmsEiIT05cKLcwzfKgCx4McgbUtgZuxHH-J3S--C8dAUZtun8zlABsOGCSvXeuhZlloBn-kCTASAqZZbtS8DqPy1TN31fVNBMgbVvLU5SFcd7UZ7y1RbpXOXsmu8aTh254wd_W7X_T6E1jPVbL2Rm_sNU5ioFjqySJJ63Bx_eFZavEd27btFeoevL81wnaCauQk-NguWvYbIXtn9PatE0LG2410d3vryVn_cfpaK8LyNimS5uJyWu7RXts3C8PZPGjiCkcJBNhAdV2AImtlWiBfopspHxd4CHpQG_twpTHSLykw6ejxRLCo5KsAqJ3pX-_bdNQq5hzdoidnk3p3g7d4rDykVdTntIj1ELexG71PAAdrHJ6QDuy5RHpRgFXOZBPOzTTDJh-pbSNTl2iaNAC6m_BRXQKQHB1egp6S71WI7ZoPTBCW6BTRDGCgZwH-GOTwBpRyb-UGbBo5vddSIndWY3Av52Xq4gRABGB98JxGYMwIU_fs8dPkBz112GwkT-deDASnIsxB1hZhDxhlPq8X7bF34CIQlxtYG7VOHViOjA-WF59H15uJbHBbwctrUAiRrS-0kjGNEslwfVeU2FWNS-xSdvTG7PEjSBbkZmhV9gRWkdUqT8GJ06an-BPxjcDPemKLjGCLyHVdSvSEClvdRMOkoG2BDEzFBfgqSeP1hTsy0T1ABbN-2KPZX50MX96wDhyBplJRR97-kcr5Y5DTWVTiTWum6b-RGq75q51Uulp3eG7QmIHY1L5NxyfhLsLuYIaBb2zPYQFvQZc_oPjEOGmN0JLMhCtfsag7Lb98u30XtSEoY9dDPRG7TG8izAe48vK5ocqPAEQvKSqbeDXQtqmv91kG0GcUUOoq-mqj9wnR_fC_WS8MgbBSMQBlxxhDBcUcFV70x7xTPVxn05T15qA6dcBMO8uwmwEr5l28nuseZFNEU2aLPcHw5pcyoNIzuTi72VF2cIOCfdcukSk3qz477x-KE_bmfMaT25k16086-eJRbk2WiP6wtL8CY86ufTTU0tuaK1UrVjULvJhS95OrLHAv9jjZEJnJB7g2pxZDRGysTDfodmWwxeG4VLyJX8JAIPbamzgsksOaY5bjhwmN0dXOl6xZzQVhusP8ss-cGLefpxKvXhc0VoOZvb2d73oI0VwYCy_zrKknwo-rzaMaY044yrDF28EtzFgBIVQfKymxw6k2_kG8EDBPlM36Nzhf1ghfYM1oJ-MiIcojhkdozeshklc9kSAyA5M044LPpmvO-5CRWN2_VGLb8p0y4b1S2sCxN4QbRWoWCEs5uqcBgBf3TRF-tsBP3VCrFSYH1Xl8IaMcGCnZHaUCF59yL2RE_C5hrnLRlW0cLPKS5rG0cnJ7TQRv5qkhYIRMt8q-znjlCAK-Wt3kGnZRju_PzIpmjwKBORrPWsoEPiM80VF6e_O26_gn1O1QiIw3v-ciRPqK6XCZaTPY5dhRoYd5mjqTbdg7Not_xydXHHaan8ebg_T6Iy3heRy-ntCrS0su-wIvH659jo1K4H40MOF8_ZRKX24poQfD1457Q5xHuTHORf0gEqzuGxFCp9OrCIR_ltXyZOi9GB4HHDZfrNg1QsI-5X1JobVP7eV_TEx087qJJSWUfyDkUMAuAgLUyoRtdER5Z-JWWiTxm56YrJn4nDZkZUYG8ncj8ym3HZJV_piWbaMT_gMH06_KvJAPaGVBWZg2TfVeIT45gTkVLPHwY57ftiR2xdZkZXt9rWk7F6-MwmwCs-pCRMjE8MFolhLsgXINvmA1atIUUGe3NHm-qOFWO_XAGCWgNdgCHIT6qugSzyQi8SkBcvUOscnmMTXRFTTheXU4CyZA==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df =df.cols.rename(\"lastName_1\",\"lastName\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"string\": 1, \"null\": 0, \"date\": 0, \"binary\": 0, \"array\": 0, \"numeric\": 0, \"categorical\": 0}, \"total_count_dtypes\": 1, \"dtypes_list\": [\"string\"], \"columns\": {\"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName_1\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName_1\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"56.5 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName_1\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"pizza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdw0J6BH2eQIKidAY7bp61A-dU5EHDiu3Wt3HFyNKlylVO62ZXbwapi0Nk2UNxJw2MkPUagAoeMzD0bcPykcrv97mnFipdw_ianAl374Nf25560pBgwieZ8LQGi2XJUUz2-l4rqgAYuK6zvgnjzhJCdZFtTiyKvK8-DvcaSIlndef7ZNyb8bS-JQlNF-cHT4XVSV7CXNxzugamCFlmsBe7ByXhA-d5GRJBHup1JBodH5UezEK1f2j_BOUXRw-w9UZfrdZ1jyT5zlkf7HADf7OxNcSv74z2Aq8KF-rVb4aQaEAfTbcOJB7uCUi5niE6Lk4ZuCdJWl62_8UcaUFKgOX_LRJTDM00mFZFjHeKfsUP5eOH409aYiBtXeglEiVYseQwHnkcPZu-Y3yWiInE_tO2MY6GIs2UwAsi3_-P11_wW2YYV2_5vZsxPHz2um1oa3R0wu0VUdwzcmwwR5IE935pwtZo9a9bXOchfuBlJGJvWi5m7kAXLH0LsHya74vsK-2_xamx5X4UXkoCnX14_SrkG-00Ea1r8tlpzlmFEfzjJqm-eJXgt_mWk0L19_gmdsE2k4duqWsMFq4JRojArsrsVGdqUEWvS6CUagPhTxRRwYY7Cwx6yWona6B4NoTKqTrcS4p6foo55Ou8Au9LhtheYoegVleL6U3BHpQJCwlB7hTabOfMSFrwBsi3nZbRfloUan5Ml5O6Y4sGE_3zJ3bZSY3Q4_zsjGbslFFCb3T54JadTnQOyoPCfmjLqiCeO7-gFosx5XHzme4l9zenkI1XW7VGoTZgqaPs8E5yTdU8IMqbY2Cyi_xrNIxIeOEaBzHSjyDEbB03BUMm9mKdAG-pOgCzVUNCSMEPHqlbzmvO2_IZHz2ucAfCgdKYc2c5KYtwi9p55XuZs1q8uwmnYtGmWM5fqV8H4Jmwm9GIkVpsfxsA7Pf2JGvjNF3Z-ULcgFdQQQjzMKoDa9asK3Wuse2KSWXBezR0bGEWIffQI_Sd7rzf3OkxGf6xmY7wSXUyh644hBY8GHXYE592cpbfWPnPj3KtTpBKwWOAHTI2UhCHoJbGjdVNqU_dB12GbLh66zogiMPZ0UW3V5klumq8XFRlcQQprqmhWXnZxICNoRdrvQo8g73JUfd9Gm-vxkcNGQ2KOcZxAKDxQ9X5-7U03dvEvMggLjRa26py3OO7erDSQS32HuxE1S42YslUWHxgdIy0eBJ01DJrzOJFl_aUWvELi16IiMGD92cpck0whNSVFvh-9HioZT-6-JkBV4NWFt9eDjLr7FFMSjp7qL3uGX1Op8yxLUtbguhdfKBvTCfO0F9cThnlQH_W15j4ySeBLHFwHxPAPAVM53vNW2SMJh_JdE00luFsdJUeieeM_crm7WPnoZodm07VCkAjUDsTb-aGiurzto6eVtgHb98z1T9oxz1nO5XsRznsgnF7l9p6V7msmyGfDuT-i5RlCmoyicoBuwjno3riK2MeBBBSuBTnhi5Bxvro5EvZrQmt4OtJbsVb7LA2k1_MJWq-svLpcq6zYgtBGuI5nItkBAwj63wlTCTKnqOMkOAfr_GkNVbt-hLmGKFFBYs7BW7zB3VDA5k3qGQmTipNlhQeJSz6W89GgYD7upuGDW8eyHzSgv_PaFySrfrEGeO6VbwtRrnonsW5GotbkBRv5gv6UrNZFiQ_3wuU2fEKRzRq_QorQkBfMX3eSqCz6NAHhCC2YH-8KwIXgT0n7-mAKod-UGO1spsI0VpIZ054bWxZM0sImPwzPAOBB9kTplg6-UrOItuLxOmsyJ44oEuDgYTfSu-XHsVQiddeYeV6lUp44cJBD8veEw_mBf0CfqgoIX3CEiI9_XBoRO9KdyjF6drYn98uNcf_5ffB3mqg9NIYtZDCPBSyKlHQoZkAaq5heUO9rLCrEkymFkIXsfTnmDgCOA6l-q5FVIfOLPSrRkgtTlWBaPlacJIUjGpS1xj-GBooXcQ8jpttpQvVEg0hPvEnJaXW1Ku7IjFo1jCIupNH8El8d8uiKrYWxyg-6LdV4jJJxWuMC8cjpHkhLLdfLvuEgqE7Fgh8eITYddHUB2wgfWfFrGl-_JOIN__wsDxWuRWQOsgi0NWorzSCX76jYvCBLdgZzecZPc83guWrhPIUFqTBb3dbyGoBgOD6p_rgF_s3FHg-Gg17e10M0oWa2SJKKjaHc3ZND7wqs3zF028bnrCMmf6Je4rrToHEap9kzxnSuMS44G9nTMtBBnjqyTXcut6kLHDeR2J1ra8G8FXVV3LPdXH18fTBfeXcPaiaKiLG_mKEyOyS1oqqFEF0TZl7txx7IEyp4UfOoN9YMhQEnAdMkmA8s0ftMoj-XP-OcdBjgmmdWpaJfjG-kFEFGKTzvESaMdU4Rbrlq5uLhnoCmZEEIVB5g_RykJwWxrbQOeKQOgppAcG9W4HZP9N6tFT5keTOYRAPT_VlLdrCCwdL2JmExFX7D7CPJh-ZKMTenJrOnuH7HjKgnKK-eJiQl8WCz1vLtsgsF_8c2bP2iyOV3UKJSik1XrEW5-ga9GtypgCBiYoAUx9kMBU4nTjxOR0c0vmNkMVSs7-O_SyIqjtfwgJ1JidGGTTteG2nNfuF7yKXRD8wnGjIDmcMHFjNPpyVtK5q0SBGCiJEurEaUFepyS-ySVjqyc2PHQARjHS9d6TNPq39eZlKkipDLGKuFrhspFDGYttUZnM70ZtAhAFGV4MPYqou7kpWCxOhcWISreXqsEEcTvWJOX_AH3DpnTXEuyD-BHi5lD4Vu_-HgHJUP7jZypNKuan_uE_msHZmIeaej7SLECAw1lOR9Fe02RTuqsdQ4qPD_UhC9dxbOVRKO-8mXe3_DCZ6JCp4oigLDRNghRHIuHcclWRTj0M2_ieNBwJGIP26Hqbk6M65yl71XcwEYMA8Cc48U8MVdEym1zP8qmHVIQmaf9nPyaVdDskCK4ML4KaXEAiOnMX_oIUh9NGHKVi77_lpshjOEX2TYVsBPdyX8sSBThyC6vW4QA6Pk17Be1KCw0JP7-j-_hDFK8EarePLA0nFjobQzX_NuYLzg8OpmiwdgEnt3Bp6pTmguW0OQZNtdpDB56bk0xQK3X4A0V67NDSZcAgmMSFcdY7JVmorn1hZz81y9OSluYGG2s6TYQhF_F_AhcXQ_09EQ2m5eeJDGE7ylrkUyYDxNA1gxUCcksz3Xk4HdscrRcMeETBWyOB1c4RoFkwqsch-2zfAFfFnP6tMwNzN_YUMit9iJOFilqC22Zi7pEShU67MohSOm_3RP1v1nojXUkFasQBP8Pe2fWFuGYWXbusvEENWapkqR4-7G6PfNX6K01jvtlgssMtGIVCythN8P5Az-zvQQTW89Bqr9OhPHBeoMUkolPILFK7m35ZnvzYLChlguFW6TxY039k94b2dl23TOvCXlP9PI8Vx1mwtJJzTADm3Ku_u7uRpn3xW6ZtIOgh_5Fc2ZcfiFPE2rwyFO78VzGlfVqeysWZH67HxFEFl8gLb86A7nnouV6x2qIS6NMlaLZhziLMZwFFDsEMt5fLLus1J8muQ1Bd1rMhvJneQ7ZebkW2SBxUOErVqUAqR3-S5NnSRDUb6cVVP9ViFz5_3uNujD-YVl6QtE7rCnjIJwm_4HTiyxv3ugvwu2HN-769wiG7u5vD_hBilHG19msCtvP1dlxzIAspHGggI8Ub_6HIO3yLGi-rG81cuXAIdalMYOF7cGwMl4SdH0fY0NmTMkNOU3fUJtIq8imjpp7vpCNZGKqefDz1YxAai-_g8tY_wGTsyuBrFQilcRoRIsx5EL_MocDQsz-DwqjaRtjWSa0FoU7FGWilAhw4zUsGqYQxixT8C6uNaFfJzt79jJ0Dr2SsJOH3i5Hunj2OFX-ZXKMuKVxDZruACu5Qeyo8fcYujsehaZEVGds0TWXQG51wnH11uzs9BN4nRpMxrqaNbVEnS9p9NQtDd2c8E1tklzv_I7V6lYNhMwJGBuzrLF7kPKqayOpcjS9ptCsS4VQYQto9NfiZHaMPzadM4y4ZQlZDQlI5h9s8RhnLCT1aOAImw76VC6fOn-U46oqi7uWsvziom2k3YNToA0y4pvDnT5zAQypuEUTTuekKp8XA_yYvUUqMwdgFQsSPdJGlmzpI7odxgKdY6rZlL9KifNMexag2ngsYk9oT11iDukKPys5b6Xr3EpVQS_BnuorbPvLwKEK_lajnHQ-b9JNkTjM8w2UXZ6uvL2NoU-HnmKRBe6cPRWM6qAcXlocy4mYvp3NFskgbQQS5cB1osvoxRG_VdVfvN1x0nAzU6MTcaWzyMvbUD5pYm1nn4t7ASgNmu-uNbeF2VTuOUN_g5HbrLiTCFcwKQvLNn4x1GXG9IGCVyYRTJLGH1CDmsh2hSGB8Tt0vZsLfuw7DZP-Y97lfj\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df =df.rows.sort(\"product\",\"asc\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"string\": 1, \"null\": 0, \"date\": 0, \"binary\": 0, \"array\": 0, \"numeric\": 0, \"categorical\": 0}, \"total_count_dtypes\": 1, \"dtypes_list\": [\"string\"], \"columns\": {\"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName_1\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName_1\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"55.5 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"pizza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwz6UboOGqt7h0M8neckh9fL88U7ncyzkKBf_Js3wjOaIIzJtPAG3IE5tTAOHhbdKyACCjdCdqE2W2EH3rNi45RJ0Q6m_sb-UIU9Np3SRExWv8xd_mnqyswFd5TsDXw0s6OF0d4cKy1sNSWBlaVmcCCnE822tYrcJjWM7QTelBzcIWM18s9DeMNdeT5oBjrnkv0q6g4i8xgsovK6ID8xCg8ixFZw1x7-ivRv1_ZCKjcn7cmkfNdanuC-gUplANU0__5K_aQp1lYY9J64Zx1gS-ZQ8Y8Tf2u6YXAScN9xNWcA84uLdhmU0kQZzN9OG6oAlNlvLMgh4MNZAY9DSJ9lYsV8_P24NyZZmVfEkB2634JZmMKTIseibaKPCOo9onl1h5On1GKRFmNxAphWlhl-I0yxyjAwn0kA5swdVXaobyuIN0iT51-fLR_5VzBaLeJW3u29OlkdFEN7J7C1RYp6yi-ez-nYNhz1LobQzgg3cN8o1F5ODA3df2ay9_bQEULj34BoyIN7BPs5q_70JvRhjOLFsY8xk0ZfJAAKOsxN-y06p73xmZ6lZVfU_NEMhKPQe4PeYigpkER9qXYtPk_ZnEcT15oeZECuBbs2SXEgihsnzRqzyGLG28e21CeyzMfdmkvMolZMghURDt3L-uaqwYzaJoDXlX8-9WaGtk8nuqVr4pXYVhqeryRLTP6N5tafoNX_SWYN8nBo3DQ4M8Bks1DyTPgOHZ2P7MF34ZAetO3FGtcnFajh9s22uSAY-neTKfHN_dfzQVX5_50eBVVCni1RsVOjvuXwwVBTbj6YW91FTiJK1vTrE4Kb4gsm94U-yYUzmkGZe7njQR-zpta6bZi4G3d-7BEyHMzxXlp2Z0HKkKHrefNghvV2OmoO9MP6kKD_bXL-Zd6ZFrgIj04EqhccS6JeY1mu2sFWzEQiN40Mo722eRTPKzZcs_WTfyyxKVPUqLS3eyKhf-umfy-ZJUPtk7W6M-XEk08KYpwdJNPivrLtr_19ow8wvk6Ad0N8X_L5YH1DMBPVjYg8wMJxLf2UKhvOz7RN9RBD4DRGrZ-k-tsnD760GAr5J7B9pOV6VLCrs93rpfBF39VAjOnvoqv5MBMNifRLZhlD_WNAIggPrUaNR1sLE9wpg96mce7xeRoMKWeJ0e_2_e303UaHTJtrf-cNjznF9tEzjtz4cehBWa0cRdv0PSPGZp9hytNaLcR5EHhSFNIcuXq0l2zmC9bcFWWPaRgzTi9iZ-I2i5aZNTqDkVcupBCAhlq8mQ8Ul9L1cO3fvKkrBx5tM9R-wp6Cb-cePy4eFr3hIbL0K6uOl_ecPuLOlBWfJoZH0HDU34PPZVBwEHekZ1ErfEl31AwoCbzgViL3ZgY0J-R3a1ivH-N-c2ldBKl5Ek2zxHAWQD5O248sqXg3aoUEYUnDnfeNKjyBKsOjz_XrfVOD7NaZh_angJzu7aud9u9lXrmWhRK6_6SbNVEoNzBkwA_XMVAfKkShjpsTKS_y6LCbdgBej6N7Eoyaqxa87exJKx1AH8rBy3FM8u_BvbtFXyPaGwF0y2PLr_RQA2T-HDpFcXUu3IQQ9tyKbUZZ5fl6vPSktum9jDPc69xPau8-vbQyQbb4sH--QEN97oTmMAmtkdqGjqp-HQTPi-TgKHas00VQzz5uK8uCBnsJr3eF2fozuqoST_46yvqnn70W9AvQSi22uIijzcOnTuoxskZxuO_eB7Z4FJtp-Vm95XHGUm4xihQ5uyJKeuVH2z-dKIR1Rtc03pel_eEE4oswLRYIwsc6xWx4dxI7mJG3SO03kDOU3_hQoi-9wLmtD6-drILMTksEDqFXFLkX5c3J7yhL907OtVkKxk9BwS2JXN_JCOe2NU3yIZySvN7JF6Ufb7ypTPDxnTCEeyokBZx2dMOm2fAokP4TBFFV2GQvlZFcV3O4jgVdWyvrGgZqu_rg8qq0C4hmUDVvAORMrEz_p4EsSK0YA1l0ilUA6z9K1c59PeSwXieZbYOTyGDQ-ALQHxIPk2TlSrOVBzNVUkcEfrEfNrE5AQcOSNJhTnKtDnrvTMydhrKTHuvfgM7OYuy7AenvEwkQlYivNA7mbtEXgu2eviDC-Cod9ZRq041lgE9cR2M6EuvTpuuZ7_ObmzyetVf0YYe9Iw4uC-JfDXL2h5h9e-q9NoHyAUKyuOX4W1Cvm_grMGtdEHJ1BnrJtI7KmXLdiq9Kq_vKtQdU7alDk03wbaKQ49D208GG1oIX0Co6U32WDLk5AMBTNUEbjWVmkoRo-IT3Q3yOHW3RwQ_E_QTmgGEBbikfC4R8TT2IUCD9_a6iGA_hoKTHWAIc6zb0UExGUM2mUmRx01aA5mzblu4BtuEwxKzQ_k43s2pz73ixbL9f51e0Sgk6x1PNEM45Ygk1LopV0eb5rS5or-TbQaaiWkPXJxYXEyoGxGCIRmefl02eus3e7AXOdPHatIREH201hnqDQwPbk1IIWqJL-l2IDQ8HLdl4G69x8XXh32hfiQMPnSvJ-xQ9n9aVfGiGzP-Ffq0u85NPhsdC4eO4g5UMqLoadJg1y0idC_b-N92ZfniTlKVu-R3AkRNFPZZD4dTiDHps6rK71lOfLqmsXILej3bBimkuE4Y4ckIMJsTtDbSwFSvqY-IE6HkljpPJVMTB4NddTF-8KYggNgCtf2XkIbNDRRuwLrw3_JWjVthIYxIGY49dD6Sx_fGCogQgCiGiXB7H2uazVA3HlkqwgpeXEs5E42eXeZdrN9qG-fD4XxrBzuW4avcKUp8V-fIxVkwAhIdcK5pRHrzin8hQ1KPgo7au3kUUG0qvrAlaAVDv-1B0RUko3mHX1z1NPtIerIVAU9x4iOLiowJxYR3eucDMu4PsJwdWQ4tMj6VWGb7Uk-NtC4Crw_2pj6Tj4x1ou6r8yNYv76PbEdIyD_yZfGhS547Gzdm2YZbltsZP9r9GqKNJ0Nys4VxqpzpmzMsjbdYf-omfxrxUNFrGW2msvo5U-Li9PfLSRiKZ4aUW43up91q3dlku6-5tnhO8uB84ALjC3A_dQMYD7dXB1L0eY0QKLabDP35FFsNUrakS4nFSt6bd09dQco4eIxGl-vkfDSquv7DEmgyXH2im4-CfbxmiX3Wo8qJVZR77qDS68V-QLW_Z_P-n_Glx19ZgSr1IOBNJEgbwKoZU0mxBAJxXn4ujsDhE3evPJzhmILdALXFgawMsgWOOlgiwFFnyZWgikjf8KST0Rm10pQ0uuplwWrKBO_qUlfKr41zp70AlWGnvF0S2ovbbYorXE0NCT_rxMEzjCuuIGLR6_dPUKKBz3_Og6amtwyGYObgypdKWcI21xxcNOqnsLBd02F1KLWB8tJjkoIFMZEEh6pqnU4dm7-ZsSfPcL4PxJbJs9NZ4dd1jC8eAQqaLUFEaPPu4OwVXNBRpb53OtOkafdeUO0CL2APzT1hA3E6aFmiJmW5P2-WzE-ariMmfKzXHBBU1WOgi21ZeGxvwNdYK_C4DAWXjKsVfLCQpulQHybGGwwLBmx7Y2AsbM2ohlcj-gyzcdg1lbn9JKGsb1GxgP3yfsNaLBTZSiFvWW47dNzZv4X0E3LiSCMG_7arrONcHTHqfY6a3Uz0cNGXzZYMY-kstCXZqsARpyhQo3smAdv2fdZMNtl2xRIkWijEOeB8Zmhxcuskl0HaAXiSuXWTV5tXf4jvoDXkwwHRyaJUrHVQm1dxeq56R1ClgkwafHBbK251xybdyqZRQi_Ne7DtasNysuqZQDVZ4Yu2z6GMzD1HjIAzDGyXd96urcMr0K7k9zZH0gGX9SZ5ulq-0nUTeyisGmDf6nauECtMqSdO6CDD-WSOT7sWsIw5KU1RvBKh6mlsM6O9xTxko0SrQNYibNAdOgyDGHyzvxxhoMMnbpyopctzCejqglfwB9AZvDPDgDSrNFBhMKIe2BZSgeTgMCcRlR5KGJKoro9iPt0t6z1voDdGYs5RZOpLq_QOrK_gNB5SXONdpvMz1J-tFQ8yArsWTpaU3ngZ-WAIEhDouBKLdr8ic5J4_SfDrpANpVTF1tgyRaovUKfv2u2L30jXTTQqIeycfajhuKeKiwhqKXY6GcfLDl2HhbAJ1Shd-vNMTVlYFmlcr19ei2ohHOahffkI7uAwr37jhEiuDYNq7Utn-DcLSsongBj03pypFcMgVUFtfwLemaNIvW2kmI37Kas9ffUQ5qKQrulc9HQihBE5AN18kbbdS8itZCQsTiFdSPnjgR3SzKdq9ATq1fmbn1-lyTEvXUQhD5MHhaaHUdXXkChmLVwkshGb8WWkiRgdrlQ8NMVi96ClXrZ5QdApC9TW7bi5cQ-oePeTOnvTHYuMoDQCfoft2MzbKZis7sPctBYQAaruZ9vjLyPuI5xfyBDMDNVzg0NsZI9pNQ2o7Q\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df =df.cols.rename(\"lastName\",\"lastName_1\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'df1' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mNameError\u001b[0m: name 'df1' is not defined" + ] + } + ], + "source": [ + "df1.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = df1.cols.rename(\"lastName_1\",\"lastName\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"string\": 1, \"categorical\": 0, \"numeric\": 0, \"array\": 0, \"binary\": 0, \"date\": 0, \"null\": 0}, \"total_count_dtypes\": 1, \"dtypes_list\": [\"string\"], \"columns\": {\"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"52.8 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwyaL3PQ-aUKb__dyRg_QkSn8hzaPAd67C7GDcHkOFkD4ztULRWE5z_2XymVo91BzGP3m7cGmKWjYIol1yzJYsdoQ1runuWu4X-JmoOn260XZgD-18olwJwZF4w7WI9OWs444f_3kphv0U6AtMJg6smymt2ldNeMzhR7kskqBDvCk8UfCrswUpAxn_soQyQ-0Tuku8Qi6hXXL36IpNNRzJX4clYTJ7dCJx1LH6CyNX9xvnzUfrOF7EZDS5tSmeTXWy6LhIbgT_l1Vuk9PQJGiQZKGhsl-7tNbJue06yluzh0N6v0bLWmu6qTwdessQ9hEklHIeZVhbeaao8TdGhAwpSv1SAx3mU6pG4_ZUhhbTOdrjjfDE503XfOiahfPW7AeLnpsUjIQv57kfByFV03m2EyZkwXD6rvrKvEcPkBZbisRHI-hNi-lEADaNJ-Pt5MHmTHhDrCcFQYsfMwlU9UD4jT6uBm9UWZCXBpMJX77tHG7vvgABFzimxSskxPKkSLzNgOBSMWxXXSIZS1vulWNIt93WMN_kKlqNdMDciWb6dkXnSTV6NzdL7GJy4WgGhyOHD_boYXMjnGMuRh39jMtpjoKdw82IIBfHTYiF0yoGUHqxpisep7Vxy43SuiPYyIg_En9GTF89Uqk0BIab-fbOmvh4X6laiXahqfleTp8bgG4_URIpq_FOjV3Acq8torZdvHZpWYIA6rQZFzLYP_XJlukNgTDL87OS-AspdQmBoJATKhaOnucPHanSHWQhiGRq1zLMFkZjS_PpiGelXvXmzevFUM0tdShH6Qs4Q3WSFgPhJ3c_cnqo_VSZFp-rmihG_A4a7SlZYV-E1eKqad5hx1GXjgVw_wpPmGTayYEIAeHKabUiDhqRIN6EKUkR-2TYGLC0gqKXpfczrf5MG7R3w9ASwHNFq7fbGRTLwk9P07f7ZwnkopBlCtrd2NJF_nEVvweX53PQwvLJG2CCs5lbCfVXIxa9fT8gIudC-6Imf9SPORBaRaYq07CimBMqs6a-r9CsvgpwLtJaAkRSc9L0WrQbnqvwOMo2Tzl8hJS8cOW9j3BZ7NubNU6RbPKDi6-sm0X0q8D2pppSYGTUynmwweuz_FvfKuDaZ1uOwqC-vxw8cDZ6bTWT3ikjSUgVqmuB4uR1PQX5eMoR0EdIzPMCgZnuyGPc1KL4AmDSSiNrPk_NoKJGx03RdaB_WaA9VbS1GGpQFO1R0mvTpTeULn1DIe6oMHVpoAtsd6Z6tBKx9o3YE2KqA-BKI5H9dC34GuRaWVP2iAvEtvAEot1oSjcOHn4gv4ubHGcslBp4l1ZU17A5NZyuphKeDQRsgbyqOlj4ot4Tbb-OCXyv5KEAhUZkyKY8rGu7oTbq3jInsEMvGkJgW_DudsUeavRQyhbQjh7VNnD3V-BggpGl0W_ONY9k43XYvs0gyBs1e3ZhxWnCBx98wuMJM0N21Ep20Q7JDH63IR2xoAfpOeu-zj8XipPtpjK5HMU8jSUesQ9aRs5hJJDnPUGCOUfq9jZFUuBJSeB3_Ibo2xnoI4XNtAmfOk6A-cfEglxKo0J3q1P3U5c5tB6O27S8IcY8n7rdaxWHifiXhNAZcT_oIQhbCOeDtoVtmx2OA3G109WDCAQGMKbXiMFJjFArUwZmn67fJrBOhSsCNKVaJsJ-w6CH33OgNxYZ8aucP6XR75uYlaMX7JkqhFUNRyxiw36osKI7WyUTpBE4r8AW6OuqDzDA1IkS2tjh619J-0Um1P-jasytcJgN_whbzBrXxV4TNw_3kc-lsFZaD_h4zXK4coiskspuCWiQ95t2dnXb5brT46IuE2gZ9eNm2ojceFIr11zOwtLE8sJnSl1o7CREzrIXxiDPEH5ijdn4tu1bRPKB-UKHRiw-iUg6qHz3d7ZGFVErCTqD-ONjwJcNY0ChdmG8HsBh5yCiOSZIA3iT3w4fzrvI-VPPkwk0Cq27fwrH8P1IGfUp3RahaNjyxpOgyiCpqeW5wfU4glBVYfcjHRwkkpMI1oPtQYJ5P25aLWH9WJ-Xrp8DuFX4iZvI4IkI9GXhPDxfZzcEtGYILlgJs7s6MwcJDTkZsSfaWMXXLwg4ZjXmefSTrxe6d9D6ZfuLnc22ERL0opiWs9telaq1Sg0rVYEK7pug3yBfSVxnY7N4_XEiiBnJHNcrBmYSU_d1CkWTBRbDzZWLwANiADMu2IlKow7i4Si5MIBLblSBcSUdipMiqDxIG3-lokauCiQ_dc9wr_okmagYKs-wU_YKYwjPvmZysRr2D_FJNMwsWitbuqH6YvAPafBIeKtecufxpE-_-fgxY4kejOrSSoDLGsLwnLSSWxekPMtzla0y5RTPHYmZ8k7x44UESjB5OLknym5MPCmk6bMmu1YnKohu3F1_EMS98fbdpYsb9y0ReZ16Jq8SwuiZ44lnycLZo96Y6rx5mKKKG1M9nz5TPKn1ZzvDSEfbqa0fOSR0dZkXomedyxHiGMinshfbF8hLH60jBq_1YUJwqug2h-pd12tWgi2v3kJpwS5R1xIE9xMQ6oDlRasIp6KIzlShFJwCPKiIk-KC0epSuEoLYKRkLWOx1H0VcP2JJF44MaWoM3-EEoN_GyKhqcvRIRMavTk1FW7wZK_k7wPfR-bDyks_qybXdHWkcilXpDauy8roS38baj5ocV4YCkLTfjLFwMnhRGwDCAdZl5iLH5ZwV0fi8sWRdpCUHlwYZAcr30Pxptflt4-SB0GFLzagzmI9TUuVpytUQxubKEhd5GWA3vVQw8FZZM-W9ckdiIHIG3-mJrdGnJZkQLVYDlWSH6vWeH3-OZQgR5aDSshiknT3eo9Nos4sObzoogTpAI8HImYJEoRCEaRRQVvBpZ3h6wyBsaZ2JdnSDo_yLMNpwUutONbVBQ29eAWtKDaqlBRHxH_r9q8BpdDiQfSIuXM8ihTXFo04mT9cUSx-jygxcdqvWEERIjn06R3YlOZlionyIaTTeRv0bFEQzFBE--C2ThTy3PHI3KsOQ5BHoVvl6iorNHTHrFonYtxkz_w46hM8ezJ35e3PPNI3BSadP_zbVqkxleshrqMw6gcZ0DBT1tx3sxyCD-kY170EQnyg0DmN6ecA5KL3CYD8gsWL8sCzpvWUdf-Cu0jkcbHlRlUjXCHmYL4ilY9AKlsmLt1vkvcC5BQdbNVeGM3VZv8mPFfdvPBf7OihBlfTBugk9jmz0cY_zyAwY2JQ3WVKuT2vKclEP5Z73_CmnXwcLUVutz6_4n6gD-IxVCG1_e2-CUKRMGaUZylQVps3Qb-FQQ4KdRHcwkmx1ul0eLELGap1h3-WaSiLeB9eQeR-gtoQY1WMmrZt7A3jfmxT5jzOE70Oe7_RBmJ0vNDpXKH0oTOksurJmrxgWD-T4X2WSUnvvlF4ubjh3SXoVEBs9G1BE_ms90ucbotc035sl-PnAsSJzF5nT42-7CkPFe2fociBcTItoD0WOSYWII5df5uKJeUaJWAfdGE50P5zY5K3VYpczuHEiPOSjMKN_HOI8c02okny5DjuFqv7ShkIaNqmCZe3vy4k23NCAbu2zBzQICi-f-LUm2PQV8mE4hVxn9IA9K-gVqXSVmSNX9NUOwXEfleutr77wRqxdyXhe8ympT0-w57GQtWGLfUKpx8Q2Qnl_Ejao2eI2ErrmsxW4doPWdmXJ3jXF1FLoZAACJLYoDDPvHLFGfedW-A5-y9FSbR3SjcWWD9g1FaRrSkCcyOWkFLkpnZP-7SDzGNl-PHG5edGgGU7vItgPklvsDofRcEh4CdoRwbqm6legJGXaagwBJZbfq74s5ywHqudNi_IQSpMRmqvWts-Tg1AejuQ_ha2723l-zbK4SyLhog4u-j1eQPpGI3HVR4JY3SYbF4eja7zsfbotVmgW6QRO8aXpJQnHjwTz4LwIGAm1UfmbtpaR3FkU5GKD4OjsNWbrTPX0WBNdVBqxOdejkLIf8xQ9Cs8me-M5WB14tZImDtIIXHTz_5j6Ashd0Vhne3r0rbV0inTn8cOgcRU-xGqYnDYRhed85tnFbKK2e_ShZD5bYsiUtfLdjNJSydGj1saFWqZXV6MSluo5N2ac3ZvM9AgDsPWFLBvtGKbkAgXRwKojVgvZWAmSGAUrPwI9Z5utm4ihkHT2jO-GejZ853wzYO35FXn9tKM1amokeimx5Jzg2imYUe78PTYTuKMlDGI0uJyBVRDEw47wN9_TxigAls3NIqHXlqcPyW2-q0bAbZ8nap1dqbQb19nal93RXiZzny2g9wiaUIiNaODofBWLbH0ywe90RC4P_8GrGxZfn8N8FX-IOQzp5UBS1x0OQGjs4jPGvE1nSm6VVYSZkVpYd6hmVW3kF-fqqYDtfwIg==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df1.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 9 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id1
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName2
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 672\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 875\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 9 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['id***INDEX_TO_STRING', 'billingId***INDEX_TO_STRING']\n" + ] + } + ], + "source": [ + "# df1 = df.cols.string_to_index(columns = [(\"id\", \"id1\"),(\"billingId\",\"billingId_1\")])\n", + "df1 = df.cols.string_to_index(input_cols = [\"id\",\"billingId\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 10 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId***INDEX_TO_STRING
\n", + "
9 (double)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
id***INDEX_TO_STRING
\n", + "
10 (double)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 12.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 15.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 14.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 18.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 16.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 672\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 11.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 18.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 875\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17.0\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 10 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df1.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.copy(\"billingId\", output_cols=\"billingId2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id1', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', 'firstName2']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id1', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', 'firstName2']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol`,`firstName2` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol`,`firstName2` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 5, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id1\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"firstName2\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName2\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 9, \"rows_count\": 19, \"size\": \"46.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id1\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}, {\"title\": \"firstName2\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\", \"Luis\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\", \"Andr\\u00e9\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\", \"NiELS\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\", \"PAUL\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\", \"Albert\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\", \"Galileo\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\", \"CaRL\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\", \"David\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\", \"Johannes\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\", \"JaMES\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \", \"Isaac\"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\", \"Emmy%%\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \", \"Max!!!\"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\", \"Fred\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\", \"((( Heinrich )))))\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\", \"William\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\", \"Marie\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\", \"Arthur\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\", \"JAMES\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdwyQOwwrveVs2TRKt3eH6n6QncHYHyxJ84aRkCJO8TcRwlbyfR3NOdUnboWK4r9jq5k7gk9DPyUIoTtrQywnfDZDa1yuGcfakxTapgib4px5V5dBeTgImCRiwah43oGwEeYbI8d4q-lv1_l_KTz9428KTs2slmxbjsYd1sJsuQTYZYDLt5_PsFJRCN7WIt80orf9qAp3MgY_lT21eyrjaNhkckWyO3k2bIAfC3CyaiaBp45Cz3bCZW97G1ZPFEp9ObfMhHnJyXlbwPKM0PqlAiqlrEPUIrPecjFjXIEyGlmRyZmXWyGuHvO8W0kRZQ6NCRQRXygE_0uafd4H-pj6SzEjG0PwKBn3NNsHmy7LK4b23w3sTSVgCcMr6gHCkxoRSe3wJjIlQ5mT9P-o5w20LdwFWc8KoteZvq3YneBy5vmJaADk4O9t4E4qzCk_zBO98BBwnsDO5NO2bRwc60XY_2-IMwEdIbP1R0ossMyulNrNtVA6KkKAdOQqVTbC59TOrR49vkB2gTT-63T0M6cxvD5ZCGNzjH_qckYtuQ1zE-T7UAF8e39QpUg37sbY7jk171jCnfM_3rp1XfRmDSa4f-m3d7Kj-KpPuSUYLHAs1HNSvStWW0yBYV7AgVzpPDq9pJdyFRsRBp2deE46ZplLWXMYxp9IWLdOAio848hnZZUIpqbENExksVDDgQl7V1kiy62di4HLoCRdoyHD-ZY0XDfd-wVED3C5lEfJ-f6xxEoRS097777OL8-oRExgH2xrUGeTOQ2BHtHTUCTzZkRlqVI-ceXrRT6kY2Q1fUE7cNFc0T_34LtW0k_FtFySVF4FKGq4iwsnk01ePwGVML2O-vEdX6CO1ClZhCwReIK6BdG551vdjbSg9kpTVEY38ubZ3-fqhOVRLHcXw1kRe8r0QWQZqFZRumA1VAfIZLnsjkB_zueS-kYGu7veVlMfj2tBzHqBPpMureXb3-a7XRkw63fMTPlm9otGn-sT_l2IN1u8t2v78VwQZNPPsgMas0xuwgWjm0RJaoV2ROeHgd610vOIB8hmuiBqKt6GBLufWO63y3VtqiYlBWPlsSTzVY7lra6-gyPrl_zmRIQ4myjcumZT1kqSsDBWu6sXTEq-H2PnCTH23RSZdhCPi7Ocb-xyX02thVDnIqxEN1-N7OcgB0ZKdlYwuJYr_TluSQYrZkMCIGe7kpD2AyAMt3geB0upB9YCaIEYWhBoqced5CNr71Oy24JuN1FYQvsdm4bBxQm9WUxpn_kL0hs-TenSR1EqlDrpg_TBG2DLZi3C3zZlbtPDE3ihSBemwPdFqMrbwHH763VLResPTqQwrmh1_rCemF6NWJeFpVpQRUKw-CkxA-O0UQSnVA4cH0UvYF6z9CXxxY00xOCFRFe1JsSBxMc3TxP1RWVw3j3RTV3G-qydqnpqRNPHkAB0YbDH_79WpEa2PQP6sgIVFDenGX3xk-5cV1sPzOlUBxyi2bGBFLUN4tR6y2LzOWblnWMY4guVVPOC479vh_ptd4xozpgMhmR9ioOU_rN0rNoyVWDyUOHh3K-l4ghihdH1DLwKu_L6NhvKp6PDu9K_Ur-1d6-_Dn4mk9CHrh4ZfKWC7TYL6_iLxnvKUJDGfiHiTmSX2sYVq68S9XCU9GTRd7zA6-jB4qW1y7R31cLPYACFgOjTPmVxxByZYAxPHc2idQGiKQh5_LhvrnOmARMtisnQahc3jGpfkx4z2Cpx9FfoNw-Fd4d28nEwwESeUJBlM-y1GScTkskc_tVCyVIEVbSukKclgf1x1Mn7eBSBH_5jdAG5SmDabw7O7NyqZB1csenGaD7LCAKaLX9hVoR11hdDWX0jD2ZQU9S31YOWvlKPItMA2TcIIfwEhZO-weGL_BFRCZPfWKpt4Q0zP2UKgWLzVKVKiIhGxxcTxJjLERnWddbAXFvtG889JzDnWPVc-25oG5DIVQgygcI_ar6k0YJhrKLHg7ISMW8EIo7QtF9BcqLbUmrsPwQJiQFRErps8pR1KZIQ1YQ0wy3UkAphZP6u9a3WcXxC03GcS5TAGn3qDV-0JL9gStxo7gQ3G6Z6-X3GD8lcSjsTY7W8-Gif89K28d53zf_hOoHYN1k69HCd_irGvN70ff9RpNjvOu-h6nMpxvF2Dgvanb_PndWxynJPOJvw4f6e_PxTyyK4yYdDsTfWdqZR714ofewbD9hIOGDFiFTaOKF0VcXZYShmS_55OOPdXH9GZ_QdJZOCopVjvXdBPAiPPIKpLNUep5SrhaYyGRzUQCMJDb5yn4zD97bkFqxyiIYbyHW9QrzMen4VeynJSKguwVjFdy-jMU7WHaix_d8cy_TDm7eak-Dsta3n2eqIXxewyTGNzly-v9Af975625aYrrSinGeWAakJTzk6gT5OuWIUqZCxiCTSHKYX1VB75Q2lCE6GEGHZcquchM1t0MkKI4cD-4kg0oCe-nA5C0V_iYrK4abhMyqwTEEh6zjoCq5rHhrVKBEvvIwNvhWEQS4VT7BIqjA22iMhDqSDAmkNpIh_6iW-O0FZPGDw1pOGS3st5k50khXB9RaUGLt1LADT5GvXFr0_qELFt8m8KN_sD6jcFATPrGPYuL3kE1kgmnpPevoHbUIf-KcKNhQTRBlinJ64qDFdDWr_k4GoUbKriugEByr-gj8hbKrxcATjNRu0p2Wc79EHEq8HaySGsFtMwATvPbYyAi5ObiyewR_U5CdT0ksfe1JqumZR97ZPkOtM87rORP2c7axltnMOMIWZ0Un2rGraGNX0yenoH_ctsQ7yuz72TljcZ74QHYKcg5QiE-hHNeg964bwjnwrTcdDDa-2H30UZ3XlGzo1bzlf4hfWqp4YJYPGxERAfKW4riLCOiHQdnWJWBM-Z8LpVFDPCBC1FEijM1F4NYReigXu5_sJQUzVsAs3Ri92PUuEES8hF1I3T9HbC4jH-4ko8jk36PKo0pvFFSj4LrNRYrIdS73M5952IkGsyqd3-N3dnaH_fd9QYc1dX7KUYBEd-RHkGpbfIb8_3iwQpudnCrHW9oRNODMh5ARTDCvwuue4GEz4Ubeb8jRJiFSZgjDxQpgWxe03PicIfbojcpMYuJJTLfr6bODSa6XMmVG6g5wAWZ1vjPhynVGt6cvulZ2gEw5LiFeb5aj0DHs12dAkKINsjbMnUhYYBEPN3VMMuZjKAngxWcKuGZ1Bn1UkPwa3kr-A0S3_d0JvnHh-wrnk3jSaryefOdzchpRbXeqdYEGlRnG3c4HHA57Zl_PcQupc7rXmzvbfSUmMe1xHbRRqJmt5Zo38qBUS-wMohsgJ4indF4vahJCFR203jXaeo73NQSmLLpvVW3_brYzHzmjKWUKCC_sBeFFPdXwUhtPXOLZ9d4NV_qoFugSB2g8k-GenYokYxqQmgbIwbXWKH9gTJbDgONjySzZEuh4VujbNQuV0rYrsUp5ELmsOd2dgz8_Z6KeVGWo5kdXgXdAU4KPs20FsdBexe6GhFfqnx-bnMyExC0i1mbuoYqS_PIUTvTNidNzBGUKPzCTO3OxItWq-xfnq72dmiVbq6kGRcOMVTUpy2QHcWfmX4lBfR5iRGhY-O9vkKu5FW6eQEG5Y-5m5yNDzB9IbdEeJ3I0R9qTXs_xeVvHDKtYn_5h-irvjCJXTTBNnz7e2bbdgZzRf_wU3B-A3tQwu4sNCg9tx2_OvGrszB0QcjtK4oeTMDERRXTgZWUtUvpanfFjZQhlSerNPoQLyfx_hIzjm4GBqKUEafb8zAW9cuSZe_5igvU6cA5Ps0DHLlK40lZIq3QtV-tdBeUyjj4B-Ii0n4nJ8AW7kkZeVrrR-GelC75e8dNfmSXAxtzzC_vEI1ylo7K1QT6vzg02tcPq8pFHSXWD7rX7MWMnvHB_-A0MmvmVpAiCdARBkETULkABVWbmTlD9Q8lAI1HrH1eNUuse36jRer0nW8Z63tBrUELum7prAlmdaQPToIqaPUQSEamw8RVNvV6eck_LpaGt0RelmiMP6X0DGtesGx6RMN6LoqA0kmOpS6g2KfT5r4nrr32G_o5Eedc-QYrgCcgZROvs9Su1i2CDJZIM4iClGoUwWLSWhnjfolCXtY5OflYOWigZ1_iduftnuzvvpRuufO79XQC-3FWevckoRArJiN6Qf003tMItze8phBf1JrQ661Q9k1P2l0Yzye2raI3G5Ufq4n-9AtJWKNzuMJj0s_WEk0ZxQmUYEKdZIkc6_wP1iMqzRr88XZpel46Vy0u6D2FuPOerY0WcJYGW1ujLWUsFpHdr1nbpWgtpfRXuTkoHaqcc0m9mZBARIVFRK3EC7kQr7Qcn1LDZYVoHgVujZSHZjOpY1nYupiI1ncXb0rEVpYE8K-NjKhrIMkK7xcz2toPq6t6QWSKLB4mfL6mGh7pjP_wUXUPfyR038ncIV5sdfbkxUabGvnDfnIMYPiyIPxaTnm_g0XhHqOT-o0Fx6simYBoznmUKv2LmNKKhOxIXoBIpu4uz9lrpAy5WyygxOM93gOPnrj3fxfDPRR_P66vqF04FQOHLKibRFN4w3nb3BEHidDZImpS_dfgK9jxvQTQjc=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df = _df.reset()\n", + "df = df.cols.rename(\"id\", \"id1\")\n", + "df = df.cols.copy(\"firstName\", output_cols=\"firstName2\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'id1' with function _cast_to\n", + "INFO:optimus:Using 'column_exp' to process column 'price' with function _cast_to\n", + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id1', 'price']\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id1', 'price']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol`,`firstName2` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol`,`firstName2` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"float\": 1, \"int\": 1, \"date\": 0, \"categorical\": 0, \"numeric\": 0, \"array\": 0, \"binary\": 0, \"null\": 0}, \"total_count_dtypes\": 2, \"dtypes_list\": [\"float\", \"int\"], \"columns\": {\"id1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1.0, \"max\": 19.0, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190.0, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15.0, \"0.95\": 19.0, \"0.05\": 1.0, \"0.25\": 5.0, \"0.5\": 10.0}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18.0, \"median\": 10.0, \"interquartile_range\": 10.0, \"coef_variation\": 0.56273, \"mad\": 5.0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id1\", \"column_dtype\": \"float\", \"dtypes_stats\": {\"float\": 19}, \"column_type\": null, \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"firstName2\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName2\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 9, \"rows_count\": 19, \"size\": \"52.4 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id1\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}, {\"title\": \"firstName2\"}], \"value\": [[1.0, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\", \"Luis\"], [2.0, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\", \"Andr\\u00e9\"], [3.0, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\", \"NiELS\"], [4.0, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\", \"PAUL\"], [5.0, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\", \"Albert\"], [6.0, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\", \"Galileo\"], [7.0, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\", \"CaRL\"], [8.0, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\", \"David\"], [9.0, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\", \"Johannes\"], [10.0, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\", \"JaMES\"], [11.0, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \", \"Isaac\"], [12.0, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\", \"Emmy%%\"], [13.0, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \", \"Max!!!\"], [14.0, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\", \"Fred\"], [15.0, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\", \"((( Heinrich )))))\"], [16.0, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\", \"William\"], [17.0, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\", \"Marie\"], [18.0, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\", \"Arthur\"], [19.0, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\", \"JAMES\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdwsdMQtwlSRCLxDoj2XST8iA2rorSt532yl9wKGfyWW_PXd9LTK_YUsSMNpDAvbnZzthC9cefPQ3z4d9fJs9fvbxRekvw-vdiZgJJHz0waZRKcOrD2o8fpxy0SXvMeS3HUmVJ-d4z4ejf9AanngBFys2ksb36_21B7MxoJ4lpRpeGRZt1UQYCuh4EFhgc0bgIEixI62fSnwcey0Kt0ZNTDmqGcXF-p18d3IxUaZ-_lbxeJEi7xo1LU_PNCM4XrQMW9K6D4intnvTd25hQcze8paIAJZqgPfREzXqNG-I_6_rC0niRwQvokfEaskgb9o_JwrQhvBQUFhDU20wZ1039-SZL_id0IfcED54SePvgXn5f8I1EIKiNOlSs5SZDjKtbXc5YJZ0gLG2NmOw3IT6ULqMRg_YUzMftmA2UdoA4z9k1jaLysMcRyEW4RctGuPJthDmCZAPRqOF2285JduB9A22d8LkBhtilBjV_KZS69mB__hmbmp6st9BKyV6EGULZf7TKIiz5RGPV0V01QIS8qwtZVOwMuKwWJaMuNBlS93lWcrSh5_4R44YAIYGEljAr9i_OYnUj32HYjIsgxD5Zo8NszOUL8QQb_ZIWfRQJDUt4cE5sLZPm5z7Subx4AWaHQKcST3D1DGgDl3I_XNgKWCKELfOMjPzXpXw5cdDDoT2d2PszDNS1wdfTt29ocv3S0n62BvuHnH1ovdb64E-0EKlcR-lW0UvB_SVslYOxa6fb6Rul1Cg6s_wv_4izpMTMIVV3t8yjZI3LCEm38rrYO6y42hC_cLpdqLctB9-nJXAiLSclcBYdPN844gQ-8GMFryo3CyH4Fkgj1vYQx5DxMXMSmWHU2LkhV47AmjhfUOQ_wbxEMsB74ctaAM6GiQVKXf2_55gkmwUDQiRzZ5ZO-vY-vEguCXWCrYB5zYzEi6gm468HCnNmoYmclCIS2CJtLuo6z3EpmTPSTLSgQep21rlaZc1M9CVkz9aFG0JyQ2EV39QTbFgMDLhnxZKxAVwtVZZPXzDcQm6oF4j22tf5OjwPvWiGlJiU2uOn5BLPJ8bx4-h6lD0OpCb2LqzuHTc1Gavff8L7URGuyU7YvUqf_TgnAYadpDwL1yBX41svsKUCnu071r9jLfB2gNsUOEoaolyeJ7aCHUH-tt3z_L-BBfFK5d5xV4jng0ELTMQDlypDZ4P4vOgpu6H_JJa2XyqrISVDOE7GlgWQmrxLd-hZ9Ny-1Jf4YLgFj9W4_pW49dztCrQDsB52a5_WtzEsTz5uBc3T16W6VQSbL0KDHJryXdYSt0z7vTGaDd6f3GCNx2tw7CnRPUrb8Jo1BDox1sq0IUAtnu_DOy4U1hrYhvlIlcK1hTKxeTB6vq_eZs0GLqmjB9C8QcrchzKD-Ix_hDTfVTjXZWsetgQiWhrsHIlS-eYnKEDzPuYpAoqB3mOmN_myS0yEGkLSGD087Qd-c9fmNCyflOQJKvBizJJhuTRqhNoE-gPrVrGsDpMOYwt2gJhF73p0ffTDT2c56AKprkqrAT7l5koGgwAvZ402WIvfcNnnCtmQQMvS83i9bT6KsL0VKFmigV-nbGGAU9ZaByobNglR5aff3RJi4g7lPzQUurqbIChvJKwjYuq36E5AMEVnexw0BIcJeOroV27zvnEiWP3KGlPggcVZapt9jTOuVmb2LKCc0QP-Aa3Af7bNbXKbSCgo3wlXes_F7bE_RPF1rWSlE7ElOBbdt60ZnCyBRahRWqiUpBvFeRHswY8DTH_X5c8vfXvvn4Q_7hmiJJ31_kNriB0RFf4MSwx45oI0W1QyfW-9i_T_lVsBsnEeFv_pnlScLoASnkQadHhGuYPc4kOuBQaZDN-MMITnbRpJbKDBAwxdBFlrUY_AgnSXzQA8rSsA9Clcl-CvLyg8Mc-XWoX9T-xTrEfzHeIgNmJfZZLQmv4TCy4T4Aka5KfWuylBYSPybHyOVhDxkppiGGP9KlTp3kXzBsBEUJkdEszphlD2Zp8YKi_LDFuv3qEjC56JHNv00MvKkwEZjiXm-z0-DrOJ7mggNcrbldZIHKL7i2jxkd4R6tlty5ZYDWmeqhSs7YR6-g3lvgQdRukeAliLCRJxlu0T2ozYt1NfwIs41ZSEhH3Z1B0cPf_nHOBiRVJcmWf0F3dcuMcTG2946O52TgiRRu_6Fyht_B5QTCWGzPNSGdPZRJ5KIKHyn2DzG7TPThW65dAH-EdHxgzbrP7CucwjM7oq3_skSxu1aa0M5mfE8jpgDjYH8seesxv5Na77C1VvEtKwo3sfAqFO3KNzEaydWRvygypl96YoUYcVJzEzOt--0gME7na3Os0j96uT1pqgKd5P1TdGR0NH5gge7bwRx_L7mAlN2Op0zwMYwTOrDnVzv19afXq7t15_wKyIOFVIRIIKwgLA1vrFMe5cNWnNS3GcmjLKU-X6G0oQyV5bHdeiMxPwd3PhVu-ccDFjsVl8ueK-AmCRhJhHFMtly0bAtqNELw4xt-k5xvLxehA9_KNCz-J-tAaXVEPRl7JmyvfVPzOs9n_UBmOkvPr1jnYKEyTYi9wxeoqrv8D0hWDdcbXaPU_RkjhgZcXpmpIBosxl1tJrmsUDE-AkmPmXoWcWMpVQ7WitVzMkDdrlmNdj-W0a8VFM8VPINkwun6UwcuZjBmSYeCGTJz8yYrx3CLhD6MHWh52-R0Oc0kkVN7Qd4aepFlwb-WJarKgMa0NQGCd0dBlqfQiK8ZV4q6L8EeS1HhbyMrYlFvth0Wy_BmtSkmb6lZSY6csHvReJ6tL0fczeH3O6Szg2MFi0OTlN4tOkQ6da-fKaE2jvDYBfAV6uJY9pC6m5mN8dLqaDWMcm0Vyx4-XufY7Bc0wl2O3mnrkSGAWxrnTLZZrCJw8levHStO-1aVEvgOp91po5gSZ6HN7j0TeUe5Uchx2E70pHSUTD2VUw5uoxMWbhfGLN-Q4q1DTOr0sluq_G-MgwtWzs7KIdi_9tK68Vs8aaq4WE9FyinLdIPWMxP3NTp6_nIC5pq7iAr-1eHAv3WsK4HwjgZiagt_huwMekJuoQOc19T3dVcgiRkmDIRmjGs_vGRNEdsfNTRpM_6ZC99BWQafeoPOCebnil0vW1KIXqd6D3mNuJaTR8-V-anDXSEOzJeCekjbRBBjfVdTuoayOqjy00eb4JUiqsAs9ekHigdl-M7q4Eyx6LtMzxdMwyguoYcEhZLiNJJfmzG_LGcq-5idse7txK54DnDzbilhN8Yce5oW5KyoxwRtrEu4QYs1EjrJtvKywFu8IpCUHq6dFXFK4dl6kYNLhaoqIXnwzd-INbeLGLIomgCpgIHHmR_B0LzfMLYNNR98wjMBGPxGGqtHx-Jouv1RSrVxp1hQkSL9Wi4ouYCKMxmbtyw0poUwaIDjC2GglfZhuVELDg0eWhgnZQMxX4d2OAl3yDAKjiimDbk-g-VrZDXkM--jurApnYpUmpk6uao0o2A8LexcWvYftygJvqgO0e9CiLs3zvQoqiRpMntO12EjKzE7oW-d8-0M_07B94JX3PMdp2msiILniPBrrI4OhXQvJ1GcDYjGJBqGJVCw9s2C4iVvsdwaNkbY8yq6bYHTKcyOcUkW19e-ijprudjX5bA6Beccc--pV2WZzDgMoWASw6sNI82ytG-Y5FDZ_iszwHBg4al0B-0K1-4Jwq0kEkYa13PcGA6GDd01fsSj26lkxuUq1k02UOpdd7SmyV5LMXhrdond3ldJAQFyy7de1QT1JUFNDLoePJp-aqqoUdUC21PWG1Mw_NMXiim5ziSCmdlAWj-OKa7nezv1eZdS5EcEt_NY2qvHMPpQzE_izyYyChvl5bhgUJi20oM1xvtrGKuXZiYjab9hjV5eV9fDlT9Y_StFWm-Mh17M9g2OL3r-b4oX8YqDm3l4XhUt9OiJcLIPrL8KHJRMPwIRA7eJ6ojSzibCrThmjRGQLVEq-Jn4-SX8gIeXOeBVVzeOUpJ79f0oEVNLyn39-F3fcOSuLaQvZGN6ETZ9KcJvNWOFkn3sPKvwtj4ARCPZ7L6MMM7H-cfYB6PAsxtA5p12HZANxpy5WWCDKT_3bBh5i4TYoDBe48PxZnIG0v0qwa657vLJ62pilf3Fo0qyM8moy3u-4jOF6KriePzcL_OD4EbihyT-L4xv8HjopE0CK8pjAKVwabFPvm2UAz4yM7n9Ss0owEhVty_VuI-I7FVU0DAvnBdQP_5wNaiBiwKSkqZUMMmRS2Ydy3775K2jzVV6DOUv8I20UG9XkTZgkBBvx_5n4t9WhHkSutZfUWbiegayTclrCbekzYlT8fQaqoS3UmxImaACIGkb3sdQR_z_GeP0V0BBtfZgr3cs_7lO4zUXaTx1E8G5x8aHCYIbuUygMz17SRm_sztcmQ5X4nKGzBulwEx96E5X8fNEoNw1DGtRivu57wD3nznd4UHQHemBNRcCTIeLRL97XtWeGtLZ9C0BHK2lwN5zMAZpKpGcmR6r1kWwtmdwtwZFqMqjUciZaMTQzPd3j6AmeXz0LSXeg7E7NSdBR6IgVF8jFve7qI4w==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df = df.cols.cast(columns=[(\"id1\",\"float\"),(\"price\",\"int\")])\n", + "df.send()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 9 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id1
\n", + "
1 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName2
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 672\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 875\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 9 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id1', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', 'firstName2']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id1', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', 'firstName2']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol`,`firstName2` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol`,`firstName2` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n", + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 5, \"date\": 1, \"array\": 0, \"numeric\": 0, \"categorical\": 0, \"null\": 0, \"binary\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id1\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"firstName2\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName2\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 9, \"rows_count\": 19, \"size\": \"46.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id1\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}, {\"title\": \"firstName2\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\", \"Luis\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\", \"Andr\\u00e9\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\", \"NiELS\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\", \"PAUL\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\", \"Albert\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\", \"Galileo\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\", \"CaRL\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\", \"David\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\", \"Johannes\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\", \"JaMES\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \", \"Isaac\"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\", \"Emmy%%\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \", \"Max!!!\"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\", \"Fred\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\", \"((( Heinrich )))))\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\", \"William\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\", \"Marie\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\", \"Arthur\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\", \"JAMES\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:gAAAAABdwwtJWl8c30rQXtJm9GYmqs__B3LkGPn6CmIlrCCIpVRMClOQQZyB0R3ihcMuqb0ReHdARL42-KhFNegle0LWSczAbvV4Akgje9LnQdK8lt4TTCMmkiO5Phd5n80UZzwI0PWxiTpIlv0XZGphivOnPbofq3YedieRmqoZyu7t1Qy-2sAXHgdvE8O2JQBGYr0eRutJje9-4vDCAku6XOjZYwwVp_9ltRemmmlWRPlkSGWBCspL1RIdrseqkA11Z00bfDBBfhCB8EgfQrqvFST6x6R9_SBMUYsL1sKS7LLXzvTE3Uy68LcIsKuspj8gMaBge60IEyECvMDXPvf3QwEXK1cw8-kjyB5NuqZQJbum8MJ9uSc2fNnF2_IcdxUEXEVmE9ysuqzQf8vejLSf2pLruqdfaT-nfyeoMgzBJKOD6GsjULMUlSjuBk7R9E9PLQvHEoI7GfELc68uTo-Mt6F3Nb0aaXTvvNe4GGHlfPRsf8k0UvZ13gBLxNewzhqMPrcC9qyIiReBGZPU3uqKXYDiCpP_NukvL3cuWpwjAZaNNYfudeb1eoTOLuBchBn7Jx0lL2QlY-sgPpBqZC33eVgttd6YDNCF7v2MXw-6jkXYT7CjUnI9CkALI4UXtH0QWjQsRRgoQD-yCK1LAaIt-7b2u7zrpKjZpndRRuxu6JQhU7EpyVJPIEn0Ntxd6DbVqOpoiQAaFCpNkf5oPZQn14wsBdncAQ4OfEcoVmbcNHO72l7BmzK7uctUIG6LO0K_zYKZafzUNBvkvZ5uhywW88KNq3xhryIh4TraPLWY0VJCUcJRxHjnl8I_rzM3JTjJZ6ZIcsH3kgyGNYgz_E1oQmhw1lkrR8Tl_I7u3efl2DQUSy-ozHJ2MqHjctHovyJNALHB0EdNJIEZm3Dz9dWxNrcPajMrsW2-xXCsaCwWKEfRjEcAiFHxfm1rA-1W7DtoJkEnAuxljmztvbnZ5Z3UjXgeR_Juy-ygrH6zMGQiVdAOXvIJ9N-Q2d-vZP4V8b1YZc8iAewv4Si4KxjF2ZR3wA7SqOR7uEmpWjkkD1uyncxQ8LHE36UMC4a6HFgB0KyOjd68XsOux37f3lemDf-Th4-yzvNCoy-ZuwHzgk7yWgOf1fAiz5cfDLT4c_tWoo5WRtFreTQkev8raWMvW5hsRvXsDUhxbKF7e-jXaT43cRGrnF1BvHgmh-H82j4aGcb2Fj_QRn66LuQPKcZgpiDUMmlOa9AiOd9dgreLXJpoimEMNkFpMMVyCRe1kt2OeQnRvwTOrNrm9Zq-QUhi9pAdbYpy9v7J4wwoF7ShHw8KA7ZX5wCsxRT7KlbcS9Qn8hUhc6ariQL55y_QYBCrzrg-UnNJhA6vPPAoxK_orPMB7G4CO5rnGoXbDIMD0WShmgqwdZv-TAQh7hgnxE0xbjUuLtkWLFcbe2sp9lNjNreIYLwy63eLZ6y0-a9cfqTqgg1mSVgFfLB44ASIpYpzfrRom7rRaHmuiLlnzmdzpVeOhbhOat5g6KhrDk0MwQ6iKW9IEKj5tH48B_hayziogsw2lJj8ueF7sqGW7BBSJspYS0u8tSzYMgquXKvrIVXLWj-_hywwN48Yzulc_2H9PTlHzDFGE68ArvhpoNRpNFECGF0QwytNKp3Fio582jo3qe8dpqKdGjOuevu25gWUEfo04GS10zoamT4tIxtLb3e3F475IABSjhrsozvUwEIq5eAw_8eQ9ggHcR76-OxRl0EXEURrtpPDi-DxvCrEv3JoELX41bf8JsF4aNtUTinlhtHELooSHDmzT6ANR3RgfwWvqIAj-MnUJffl6Bh8qL-LqXLL_RlAv8xXqyrBLfFRiI97ml1nnjVqeu2EVU8MlVNmRPyv31opOj10VmMQogR4DGlRM9oAvof7aR3WGhVuY5HUGmU_t3d1Qhzu5jGPzzqLtm_0eZt5LMNqGC9SOnrXqdOItMYoXR8K4oeD0fL7AUbXYLtjjsGtxUAvy5UD2flA3uu0chPfx4UUC5Dlce9Sb-6HPaRmU8thLMKSMHcb4QxsiqKwM0POfCSAJ7K32ZUMTUj9-tjE9AkSRvTI_vLuFSfjDvnUBA-bXGOq9Oxzu5tq4e5qj132Tu436GUSWf8eGNWZuZvpnFXNDUgFoQUMGDIRDpeVYpPQFzIuzBENBiG5a3tp_k_pWZSX1VlW4U0OrR3G6x3I9v5D-nRoB2cZiJzJuQj1jOc1N1y5CjyPVakhtZDsIk5c0lonGxkIdrhTw8oLG-rThVfYlNuczFlWg6BhrnK8-2bCP341RUj9i69iv4ps7soxxRLoakST1OX4R5adiOcWiyT9p-CHGXgCoIKLXOHYn4ptnQvewPj8gpS_R4kdc8zDNK6CTwFsaR-CpDtLLfVYdPZamRI4wo9waENRYyM2ME3NTGv2nozbw7xOTzujoD4gvHLVlkqR6tXc2FcKkJB8Qkaf10C1--hVmJPztoEECFm6aXNrA7geY2CgZbwhK-P0Wwsxr2epnjJq3DeF00UXgkYt6Wn69cNk9ktypEGFXuazvCZu33fNQ5jOsZIuyEsTsYm3Ac3WXhc_WdDlCbEwO9OYhxOiBe2fyh-coO-9ZRxhmnGsQ6E0KpzqFe-VKkg-WVbm8JVr3fio7P-jD003xxaHWfIGe4D6_U6RUQB7qw8H2j9n2Eib8j2HREvzJi2D_O9-TSM5-tobCDRNuAQlqotUe-zsDmXw3x34gM_H5vJfmYXAaDSPC1qsI2zRYfgO-sfjn96o-rkcwAtK-uIfkslT4uGnKPEIuNe2-ypKzpRLxr1mRTz3KQLmwoPqv_mBXFZFKwtDenqdWHKuqylIX5Udlec5LVdKS5VdS_FIh-oHJZWnvxazTxjSxU2_hK0THZsAL3UOgGNs-LJbNhlJdbhFmXqO7OfUIjHc3BgQzEkbAqbL7Gx1J7U0G0DvLPTVxIPb817mVAMJI8jm5eUIMkpBuOFd7mcC9xj1PwsXJbcP7VIragh7cl1lEAhYTzhhTgUEL8Gvhzkx8F17-uGJKaU7ykXPCu2fmGzEg-on_5BsgBIMIvrsk4fD9Jdf4QB_Fp1q1qgstQjIVOea6GRDOcDOeN0jbAXNfBgO61Ha2jjTj7_qflqDBScKZXUuTiFIGRMbz8yCunWnEb7zMxDtIdgsF5QSo_xIzvJGmv1MgfLYSKLRIKhyQ6Op_nHyuDoCie1Jqel1zO7werqYWeyXXjsG7yiZrBdA9MOwab1R5BEQrkYKk_jwHHSgmW45ejQX2AZwr1x0sWHzAb97MLz6qFJxYSCBTluqfKIs5DT3RFtkXe9WGvIsPvUekDC05hFDEYeTEtz84gaSXh4Ww5Jh-Fr0XnAYJZ02l94mehUw2Vc1DSXomHoRmKote5ou9BoEpiOl4d80TtLdOfLeeA6Uk4toqlJObZ3z-9HIgyTJwMWvFPCCC35EwsqFCxv2NyDKAelmNQD5-QSWO9WlCjNEqxSYHuTF89SEauIU_ALSVa5eh9ZkiTnIfJreTJB--4yHp2CAJ0dRpNhMobYUNhrWq_g2G7AnsE4ANNbeAgE_Z-G9KqzfRdIu1LdetAWLkpMPWc5zoJOL8W-f8CgemvXioAvxRnIm3liZw9il78rTCEQTlJxTDEG2kh5HN_jmODKeWXLdMmEE9JobZLzbQwHYGyg-UdKPI2Y3y_Xmrsq8ro3Xs4GeL07ngw29lGy4AOhUb6tFZUm9WTa1Zm7omjZ21HD1SD3PfCsYWgCnP5MbOEhGKKF-sYNf6Lk7m_gXbYH3fUqBHFhJopEoXlSnkjac_IRKbvo6SaNjRYOUMfjM2oP3Oxf4riHxZLEYltrN-KtxE_N4uqWQ27wJGuQy3K4RCPg9_gd5eGAE0P1Xbi6kknwtuI_OnaVjTVMZv_c-9AdAiJMPbx_9Ev7B9KGPWxsBgjlbtXYrVq7fYfzm9XsLR80OF76dsMRgGYoVfDtEiSRaO9xTZL810vuZAmBq1iwGbvS_6qXl2BOI10cKYRwwwnWtANgzK984z05gyrACucz-4awR1Zw-8MbB_gg4zgEFep0QeZl6csRGRg11jL9SoTisInz3_uumMfIgimrGREodePsCfbv_dk6XPCznkoVDEnqrf8vx3u5db0-diddpk8TGBv7bcYxXIhvAqGte0LXMNEcJU6bC1jcKnq9eLIj6Y8bRGoBdJfOGwN1Vz6YiKQpYdYhep4QGok2EJJKZD8Ci6vTo03CMNFKB4Ac2m3YqfTblDdiQ_kG_6outdhZeP0b5Vs2SLrJCpRRiz4drXAh3Q78pbb0veHsGDeT-ZjZ58Q5hZmQXTRAQ9xGS6eOKuk0vJIF2OOAeJxmw8VleyB63Lb45PnvWCrhVXvdcV32sM_DrQTBwnhRFDtI1Ps_QPCc9e6G20D5a7mmWFgk9Vdgd39uYXm1ySBLY8jLox38Vbdu5NhQ0LvlSB-BQGg_czcw4tCPhnUlB_T7ifDMAqtLUQYtD5Q4BywndjBH1FQPP5MpPdE37gm63pcU0pUqlrjwtPvpKI1rTUlfENBoqrOx4uYcherDHoUSidL8nmxGQ1aRwrs7eZC37LpqrT1fpXI8=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df = _df.reset()\n", + "df = df.cols.rename(\"id\", \"id1\")\n", + "df = df.cols.copy(\"firstName\", output_cols=\"firstName2\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "'missing_columns' must be 'id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', received 'id1'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id1\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"id2\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;31m# df = df.cols.copy(\"firstName\", output_cols=\"firstName2\")\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\multipledispatch\\dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 278\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mrename\u001b[1;34m(old_column, new_column)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 319\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mold_column\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_column\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 320\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mold_column\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_column\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 321\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 322\u001b[0m \u001b[1;31m# TODO: Maybe should be possible to cast and array of integer for example to array of double\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\multipledispatch\\dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 278\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mrename\u001b[1;34m(columns_old_new, func)\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;31m# Check that the 1st element in the tuple is a valid set of columns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 281\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 282\u001b[1;33m \u001b[0mvalidate_columns_names\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns_old_new\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 283\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcol_name\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcolumns_old_new\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 284\u001b[0m \u001b[0mold_col_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\columns.py\u001b[0m in \u001b[0;36mvalidate_columns_names\u001b[1;34m(df, col_names, index)\u001b[0m\n\u001b[0;32m 228\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mOrderedSet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 229\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 230\u001b[1;33m \u001b[0mcheck_for_missing_columns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 231\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\columns.py\u001b[0m in \u001b[0;36mcheck_for_missing_columns\u001b[1;34m(df, col_names)\u001b[0m\n\u001b[0;32m 243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing_columns\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 245\u001b[1;33m \u001b[0mRaiseIt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing_columns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 246\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\raiseit.py\u001b[0m in \u001b[0;36mvalue_error\u001b[1;34m(var, data_values)\u001b[0m\n\u001b[0;32m 76\u001b[0m type=divisor.join(map(\n\u001b[0;32m 77\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;34m\"'\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"'\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 78\u001b[1;33m data_values)), var_type=one_list_to_val(var)))\n\u001b[0m\u001b[0;32m 79\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: 'missing_columns' must be 'id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol', received 'id1'" + ] + } + ], + "source": [ + "df = _df.reset()\n", + "df = df.cols.rename(\"id1\", \"id2\")\n", + "# df = df.cols.copy(\"firstName\", output_cols=\"firstName2\")\n", + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# df.get_meta(\"transformations.actions\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_name': 'foo.csv',\n", + " 'transformations': {'actions': {'rename': {'id': 'id',\n", + " 'firstName': 'firstName',\n", + " 'lastName': 'lastName',\n", + " 'billingId': 'billingId',\n", + " 'product': 'product',\n", + " 'price': 'price',\n", + " 'birth': 'birth',\n", + " 'dummyCol': 'dummyCol'}}}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "_df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+--------------------+--------------------+---------+----------+-----+----------+--------+\n", + "| id| firstName| lastName|billingId| product|price| birth|dummyCol|\n", + "+---+--------------------+--------------------+---------+----------+-----+----------+--------+\n", + "| 1| Luis| Alvarez$$%!| 123| Cake| 10|1980/07/07| never|\n", + "| 2| André| Ampère| 423| piza| 8|1950/07/08| gonna|\n", + "| 3| NiELS| Böhr//((%%| 551| pizza| 8|1990/07/09| give|\n", + "| 4| PAUL| dirac$| 521| pizza| 8|1954/07/10| you|\n", + "| 5| Albert| Einstein| 634| pizza| 8|1990/07/11| up|\n", + "| 6| Galileo| GALiLEI| 672| arepa| 5|1930/08/12| never|\n", + "| 7| CaRL| Ga%%%uss| 323| taco| 3|1970/07/13| gonna|\n", + "| 8| David| H$$$ilbert| 624| taaaccoo| 3|1950/07/14| let|\n", + "| 9| Johannes| KEPLER| 735| taco| 3|1920/04/22| you|\n", + "| 10| JaMES| M$$ax%%well| 875| taco| 3|1923/03/12| down|\n", + "| 11| Isaac| Newton| 992| pasta| 9|1999/02/15| never |\n", + "| 12| Emmy%%| Nöether$| 234| pasta| 9|1993/12/08| gonna|\n", + "| 13| Max!!!| Planck!!!| 111|hamburguer| 4|1994/01/04| run |\n", + "| 14| Fred| Hoy&&&le| 553| pizzza| 8|1997/06/27| around|\n", + "| 15|((( Heinrich )))))| Hertz| 116| pizza| 8|1956/11/30| and|\n", + "| 16| William| Gilbert###| 886| BEER| 2|1958/03/26| desert|\n", + "| 17| Marie| CURIE| 912| Rice| 1|2000/03/22| you|\n", + "| 18| Arthur| COM%%%pton| 812| 110790| 5|1899/01/01| #|\n", + "| 19| JAMES| Chadwick| 467| null| 10|1921/05/03| #|\n", + "+---+--------------------+--------------------+---------+----------+-----+----------+--------+\n", + "\n" + ] + } + ], + "source": [ + "_df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AAA {'rename': {'product': 'productName'}}\n", + "are _actions True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'productName', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:'stddev' function in 'productName' column is returning 'nan'. Is that what you expected?. Seems that 'productName' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'productName' column is returning 'nan'. Is that what you expected?. Seems that 'productName' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'productName' column is returning 'nan'. Is that what you expected?. Seems that 'productName' has 'nan' values\n", + "INFO:optimus:'variance' function in 'productName' column is returning 'nan'. Is that what you expected?. Seems that 'productName' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'productName', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`productName`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`productName`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"productName\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"productName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"54.4 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"productName\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"productName\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"productName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"54.4 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"productName\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwnrDa3Fc_oIvAOKcEqGUZESgrErowl2SjuAIZxs8K8iDVm-_QMgp5A3XahLukkLjuLSsRaCiKrBTYmK39izyounaAUipu_2wiXMNa1XAyFs1ShLLaxxpaY5BCkoRb40ML8gkL4QR2O48l-q6jImXxC0GnTf0dgZNzpNL3v72TY8VixVoKjTjB5yU9lgt85EEsyAZGTs9McLKrmHpwSMAtD6CJBNVdQ3UyG7Fpz1TU-KxE1E7HtFlFgF6_Omd70OnTCTOACofHWJdTzDbG2BO4-GXWguG1tyyLUz26nHRGSxbyvjGsvouGPw29wRU3gYA2PBg5olo0CqZWg3r8xqSik7X7q7K5gezpVI4-FqWWSvZJqpy1ORbxzCUV7A3vIG6t3kT9TIPFW65Y42K_CkNC_Nfimo0CT6rqrZ5pv4MkLIQk0-PISD2AVBwJ_WxyBWXWH6GfmJ8_SK49oaZl7gH-N2xWVgz2tl4Iw27iER6sfW_wmH8gFU4MkqayXYg9k533J1UYhZpX1UeuVOqNqqEuHCAXMShr0ovYQlf020CRuXW0Ljo8o_dmrFVz2j2ddshAsnyHefbyR3zredTrnmZASHugp7W7OKKWq96YUwpa251bUgcjv6_O3OzRH_06jmiRLwQIQA3jcYOqqTbfTqdmiHWUvgniVYaAQnBi1NuL31FoGxvzgRlBg7tLOadPJ-fl9PH-8OlH7MBz3bLzIjwLYfIGzN5ntykVz196PozRW3M1jo7gAABxijxGYIC-40oEsstkBwwAysVjH2Eq5f9xWkcE16qogt47B5-bLttrQpLpMJh6AqfucagWfed4B_3t76KOquLGCmx2zgGN2MrZ_jH97SjGNXC-SRTAomUCw5Tbe1HSZz4fMnBqOCkHPiCbfqMUvMxtJGfZ8rGPcesV41EnxRlshK_FtaSITPpYjXIXJfm1ey5rotGSMe-NNP0-9qIQt_eb9PcAMgLK2wRyurjguDivYYff_REhPUFrOke3QI3ttOAQUvIaVczoAPHX82GU8MYxZLUrqrt9g3nLwZOeIBti5Fo58X3TOmLg2neo5nroSFfOVQP-WZ99dOWOxT6y2xSk_JDFYNL39xIFxKFm1bXJD6CE6EfuizAeUEJf6yU7myTkcpVMGGLNMlQ0o3EaQn3Egfyu3vRW26K3eNZXNuBPHjYSCApvSphRAQLRWnZLQKZguj6LoYp0E4vna3dEhGgk7U2mnln7E4VGnJbM3nhOX00elseZ0ESV0YceIFj7FWZWE1tv8q7iBO9ZgFW20Oc2IKuIzgI45eizRVg0PpdkYSbwfjHLk0U5HWleFZU25ZBtiZTUgScbs-VZy3oxX9KaQ995ttdD_Gy-klGdx_nkgVd8CEFC_I5Ey9Pm2eEbcnYnR9eOKjl-lJtGU8PqW9eRdHufmGfHh7IHhwtXFl85M3IAustir4sMrvnRKl2MMYf7D2pqs2z3X7-CCF8EX6xiqa7B4GwgZVA3RSknBULi6J73wjnHew2NRKDyEh9ZUEZLRkmXPy4S_g4f6D-MIHPKWqMiVmkTr_AVR0Pg28Um5U4KI2TGtxNrkyN7Fl5D2aG6QzuMTtmdO0p4qPPFUbKAa3ioeTmqqe83wIAN8Q0Rbw5qgmupqUwborbRoYbUbvyeAbTRBGYciUyt0tRDLORbPEcx9etca75qzJ3zv8wVprMyVXji3L42rT4Qrqyo5z3wKFHRNmn8eorNK1oMEyvgpTT3dqE-dRDWLQ2xMInSxLj79kcytwTX8DB-nOEkYA4ur_VILllePIgAyw-9HRXwtmnQhU1HBDQKS8sfmdxfX5fZMX-4sYXbfmm7WozEhZF_z_AcSAtFc2srsN8gnP4eHFpGcMZD8K1gISwSy-aypmJXCmV0Fp3YYXBNClKEM_CYNLWSVEmwv-8zgqBtsynBwAkB-V1I4C5VMHVHUHpej17owx62TB_qBG1JTlASFbpGexY5yk98ymSfKTq27pt_kX4NTtitQqczTsKZVYckk_JAzOp2IKa9HBW4hyZkkbYqCdRYqvrDD5j3peQlaEZIHb7vFcic09HmxfsK0b_wu55S2dlT29FniFnIb9w5XhkbpO11bRyzVTDR-L2Z4N-DkNdGH2iP0KzFSwKoyYgnwDhNmvfsLcaNGW_NGWXjQlE7Pv1iLmGd4hrxqoFkLhWFJ5FYvPDJZQL2I26MIKE85Cpg5ufEiZji2QkPvpoiiv345C0rj_b06FD0ph-MOIGOYSy-VqZoGfJJx-NsTGq4fKl9hiRqa5jO5VSLZLG8BgruJPvbWqqrCkv8wU5YfYt5w7wlmmcn4GdmjTRW3RNxX_M177lLD_coZVL4gIRcaLd4MPYVOrZg6WU6ozsRxxVsf2PfrEDwHC_WMGndyejizR1YlgN1hQj2xnXiOLSxJiTLuQjZU8LhJtX1CTWKBt9L2dm7C1hdc2mmJxX86a6jZchj1HrYcfM4-CKCvpGlCcfmw1E49xo5V3qpkCkI2WZcjU_wHjnzPiwRKuKEhWif_1jG5azyIxv6ds1XbNWxDNd58LeU_SPiHYrTHs_eVw94IP2rxCGHfT3SiTxNxXdYE-xRLIRutJ2CJVWJ6C-gyzkBlIZK2-BtY3jGQstblgGXTBGOPsrRguGKq0xpjjLVcNqWTXZ-U3bvHWMpwXX2AKvpI_pMKk3Idjkw4T4a04-IS_XnsdnyoIJgLosYv0re3WuK9FpWV5GPLH6W9-vVKKAJsUHjuexii3QAmoZyvAZ2Gz-WFUkR0hY9YuTmAey5Rx3JEdLH62yk6Icu6Yp4EzLPwcvBtPNxIPX5_uXJHXkbew2yQpQPGdWr0BtpIcHEYu-v0ICHVX4ExB5UzDhOI1zgBFPlZExZ8yjik1VXq9GFIdfnnqOjW3JOOgP7otbEmbtjeNJ3uxtpymYmcyzQyzMhtXF3hMPHwYPNPfKXeYMbQKgrICw4u0ZPaVVopNmnBO8iLUGzpXbvwM6DEBRxjSDGfQMpB_pb7Q1p4pzOr2BexGf9A2MGjaNuIgJGsEWBy2OELTV_GwoA9Qgnc1_QGoiuFmVejr0xHX1VQhBQaho_4AtbxoOdXkwlTsXd8CqzniaZbv7uVRXMpNy_kio96WsQntaokj7oIFUFx82F2Q2zuhRJ3X-XTHYWK0O3y-YVxybkHGkHQF_ZaHM6lOlLQM7NzKRpujhBXAmtHVUXY8_D_sq5hh8irO_1sHKWwPYVhJQXwGui_L4eINPIGmmY1r-VDMeOnGyTaNc7UDlB6Nna1_hsjelJAhOzNRPZsRJlM28_vcZ7BHFtq-nr6R4qYiF9iRz9BEyBmf2pi6FlmIY6UGKBilDlQkYg0VanQbWkeCBewGYNuvDqwhj9tOGZ0H4l-xcC47V6-EHAbCXh7aHKdjGXUOIlYvXgyp0lLjSpSq0v8bOXJ8LVpWVMWXF8dRDcnKhJDOZT2GRhDwNjjI9t-ZIHaCVNSNu2rN8j1AgVnmCTZU2huvz3r4LplvaCut13YmaGSNxm1cqH_OWMbX8ImiR0rSMyAINCpaGVMUr6VVNyfdAlWMtBYEhLoq0uUuKg6YVJXSGH_lrWO61p2Yc9N7E7AXAFsdN8HAiriSTBO857Ja3A2U2NBYIloKSe3RDRp0ejHaYSvSZWmWEAIAWvlqnYquaNHQYzy1MtgcHPHo7afaE3Z53a3asyYKv37H_bxwlDBZFDA5pNH_4x4UiOCtVbHmBnlA5lI0wa2Pts53YC8Q3HVe1DzHHhh0zQBEL5Ztl9sbCDwFmAbDjNOhK0Pb4QeRTN0rgID0mVWXbpGxkI1wDTcf0R7uCwTpjPmYWiIRnrbZeKZxU1FeImXbx2Bu7ehuHq1aW_15UFnq_lCzEvmtJg1RqD0UAQMVvYMI0yGx_2TO-1Cf6LUI1Vyya7tln_5vYaYmPBT4NrFSKAdeTQVT5Q0tiFzQWVgR5cnvCTVyINkLN_29a9s43YBp6wFBEBOQBuxCQ0oPRBFcQmjJYVmrgufJDLcmXPBTyAnRWbyU4pUIjpR8k10QvRniFm_DawgL1baIVnNg1I0fl1QLji2MHtyPnKfAfu2bE_cmTvhMD9kPjaAcdFQr6DV4ivDUi9PPvjUYtjOl3HnYhAIsNgXJAuUt6g4T6vySpYYC1fJdP-sXRwc7CeDjsRD0lpHtzbciAQjnFXQDKnFj13tcrbXHX_tw0sDAZJS2aLJryvnTcnfkMgwkqsGQmoa_Sh3eSLruDyJApKVQjQLDs--HUnu4iyyn6Vn7hvM3Luc3LXa7ZujRhDqLS-psgxpfMfuh7xaL_YQfjA3O8c8H6qpUIGLHpvbbO0FBNsr4h8ghTDzaZMqKbGq6Hr2wYQo-lMvFBPYKjn_H5E4lEWpOasNOvdR3QDNnXqtzAQDxdtASauGpHXKIrcH6iWeoSr13b3qdYr4nXlC0cDfhUtQ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 8 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
productName
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 672\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 875\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 8 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = _df.reset()\n", + "# df.show()\n", + "df = df.cols.rename(\"product\",\"productName\")\n", + "# print(df.get_meta())\n", + "df.send()\n", + "\n", + "# df.cols.keep(\"id\")\n", + "df.table()\n", + "# df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'transformations': {'columns': ['id',\n", + " 'firstName',\n", + " 'lastName',\n", + " 'billingId',\n", + " 'productName',\n", + " 'price',\n", + " 'birth',\n", + " 'dummyCol'],\n", + " 'actions': {}}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.get_meta()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'transformations': {'columns': ['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol'], 'actions': {}}}\n", + "AAA {}\n", + "are _actions False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:'stddev' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'kurtosis' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'skewness' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:'variance' function in 'product' column is returning 'nan'. Is that what you expected?. Seems that 'product' has 'nan' values\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"55.3 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"55.3 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwnr_JBXMQPm9p_KLif6N4n1x057Lh1800IftqwwM00RFDONE4G8-Om-72OIma9CDM4edtHhFx2VDJNqPJyPIjY7S19Iegi_WVIp6Ic8p2U3U_x9LTWvDm7mdLLWVRgUEL30_sxk64BQPHUpcGVx1Q5Qc-hphGVnuvMzoWS89F5NaUhBHqBjTf6CPmuXLBSZmroYZRcbFJHdrwKnGc1E3HMMo2zeGDbqqUQis4qBg9gqoeMtLHtiVjJjxf_CAdE7BdoORbaiSNmcZUFvVF7nVMrSVSU6Ytlw_zo1p8Sj03dQhLROSgQjSwbaOQBtNwYfAwGiNrfffrMaurQRBJvayqjJAU7SeN232A-8-EQvqGPyWbVj3zVfizSI4FSvPUy_eDpxKCLQgUF8SVcVDbASDc9k-s4RxMP86VNCwii2MYpp_F1CZRXUphdZaezYMx5j_j3PFUNcvucCUafYh94TF6KjK3UXheWjWiW9l0_voCNQv9KLdRr2vMPrla4qO9w7IGKbFCjTFSfvuwXt3-xs51KibwmMgubuz5NMbdYJsiOjGoIeD-LKXWE1fFm8NSzgfGOP05yGyVQWMZ6AGaoYsGSe1VLOkxt6FR9G5h0ac_h4YJtr-w4tME3Qh2hqiaT9dRNQDbD2fHIWeX1olw6ew1ZqbVFqv_0ZRiFoe_2c9hJBfdahIiuSSPn2se4PLIkICW2veU2DH1aF9_U_Lc99ckyjsgBuSleQU-pe5uEwdWhtMCOA1Tjoy5Oap0nHX4FlOjMeRYSD1UcVDEyGhXdENCfz9oOvwC-t78wnMtvurxNKdVAVfBcYGHVaimcdCKXl7AU--gCmu8PQU9J65l-Cv5-1hzoXdZYteRS3YLlwTpc8DIbRT7F3xAP93MrJLWjOySzE89Xi0xh7dEZLYg2mGuSApTa2WQl-8xxcR6cfD34pomC0Y8KL1pPRfoZw32u5GSLfgmWLOQ4scFRf6eu7acNLRZJNfK-k9Jx2pwBPDiWzRJ488F9qpW17LbXeHZt-ERg5VM9Obueu6Jzoyy3CMz6X162utCC8lyiZWDwehSr-z2ttWF0bchx_Gk5lA63Vd69k4XyNtrDW14HTkelrclWGK93oprUy6rIL-Q_x5zLBC-BW_brpdJ8hwgbkYMevbV6pC8fSwXwKaxb1FNoaQETeDMTN02v9-xhgbaEpolU7nuHNQ2SpuWpry1s4vLUY02W_GH-5xK5XenxxNIN7V4g3G0qnRW-X6zpB1jYC-cmlGwknQMonX1F35vfRXBQpLg2te9Px_uX0hkJQwJoFP1yvDN46nprPhtgxjLloJ8CTWdsTtCGgNfS-fqmfDxHN1Zenr3GLg-Wk1niJe0JA9OYecHNHrZWhdzJJYNFJJU-rLsmSHP7_RMTD9SsglEgEm4zP1J2-qhXFwRoMAPBNHd1pyXWtvviC9XHgc208fRsP4bhN75fzI3a-GhIkU5EgCqS577jlS92Y3Pw22Z25DoSVD56_N7kg8v0SgBI0mMfKrQcurYudfBAwctriFZ7K2K3JtU3yiEN_fxEb0x6ikHHXyyBGcFsqVSOmmHkKyJbrW6liitiscBmOBtAie0brLDfCBMkTpOTKAWupfwSi6K5LCLzD9bCAUobVqAHJAjtxPKRBVh9Y71CeJBA9n3jvTrGyVsBWPxO1oWkH6oHwj4n5-e1o03uz4-6t0ddOdCznF-z01lHe9oA95NWCa5I6mEamVLmY5cIOtk9BquvydcHLmYT_DUcODXcJOH8R2dvNy81kYSLwyjW5e_Wp6Xxxzb-yBW5h6G-f-WoVvPzHeteD_r5jb1JUd-3mmwoNkpj4mVxhDP6QQrRM1RRle5Dfeumt39EepIxpsQo4pCkn0kp_gQO-t3M5l1k8XSZVzFgPE0Vw_Ywospk7xlJsyvqFH--TnltM1qqfluMu8ll06C-nr_kutjBEYMpjWjSUqDx1mjxP0GiNgbWfdQ44PNmDfZdNkFU-fa6kqZpRSidZFpxqi0Xkd4CeHVOR9DqdDjHT8qNf3fJzw753WQdo_GPROEgJ5JmSPOdMCvg6Lw4tlBR4Ex5GKWQHRPapQMZ90I-xms1skVHZHWIOikx4S0ktRdWRQST_2ko6tR558TX9lvEg9HZ6s0B7ehLs2ufeunAOlJGVuqKPsIXyJaieaOSoPNYrRn3D6vkiOEEPtzyudEaMD08seeEs79nAmDS1fHc2Amun_a8N2dMOBLYRDytFvjpRq-cTXGJZpfR55Beyr2z-ZVbGmtLuws1cm8eO8__IVZLL-vz2InX6-9miUYybitmsMdqSSffmBSDJQHFOeIonv4Cy10BLn-oKpsSPrUf9CA2ARKtgsuxrXlNPtQLxO2riR5juIPhfZt-tp0tv6tZ0msAeEbicnxUZWx7-1kRYt-s858mGNMmIy1QnLhRJL4lJkWaqX6QmeGlQxcPxjwOFQcapUY8Pc1yVeJPrk2m7XaJHL6UIJHrVXhYIsV898AAFGdA4RrfKkMzc9LgqNh7S42RUALqrV8kv21IB8i13TmNgcX9bCg-U8z-HS3Zs557zyuO0HhORK-hvBSC3aXeDPxzhGcndrYUHqv53vFORvWzBfnrpUYQNa0f3l6gonxkuEuKESoLRTpNKAJQFEO9dLSz6wGMzF1HBS0Z3xJlVqL_RbacPAUOdZM-hjl-eMhjRU6ZZbxSXoNwVM5ii5fRp3rK538pguf_bJ4-ttFZ0Bkb_cIFum0uxRiU5EzpzqIkO5f_rJ4mmYihMP9AyFh3LmoMxv6pdr2hyZKqTNCvzVSEy79V5wrBGOeW2hIUinp57mNz1HcP0Jx9fxAiRDVEYn3qvTZqs4bnai8wHIaA5G9e8VzNiCI3_aKvSlGgtLn-QyujpPNgE3mvupNAsNvZ6q3UDjZqYyQPE6xNc6GyoDZFs1AXicfcM0RUEMl376XgizwSmBpb1awYeDfuAtxcbSjf-PwA6DkLpGoY6Wqu2D_HzhM-b0t3_NJ3XKu1wcdBGHNJjw8A5e4TtuUfx8aa_1TGVzBJAO6T5lBJFiJDKgvC0hwayYzgpm6mqG9sFiBQorVRHpY9sJLAPvzqo6C8WfWgohfeWXk1RHwoZ68UEfqS3SiLS1_VqnRshELN2qEngr-_jR0QIO9KaCPjMxiIG1r6jqp99YS0yudiuRSuFVjF4OXrIN1B-Nvlg4yFej7eIPVl8hrKR_ZO4oHluxjHLNJ7w8dhtrd-zrtHXSDGtxkr4i3wccMHbGsQJyX8CitW3th5RFRosVbpPxIPNGXfpovBoKN0ayCVjWuep4UVDA8lMri5EvA3rKxBBuREUr9qe7Thoi_kvgoktzbQnD23Gzn1_6-mZ5gRqugyeL4LGZ-py6KdvuCexFChU9adA1Q9eVNap3BEf4Ogq6qjBQw9mHSDF6kExlRKaZbzCipIyBRkcr-tQFgicHBZZ0HWWmgIVUWdUQBMJU2Ac6SaB1dVDWyWBzwz9LrVKb02d6VeJIzq4anSH1NLEDgQsxwDmzjaD-spNqaEzbjFEqy296U27JkLCoeTujN7vITWGvSELAZ5hyMY5DroTJiNksJsvk8O1G5I1fAfuYheS3FR-cRBa5WIlUR0kkL4PEXjfSJuOzd7ogYwK8HkQUy_Pb0IuF0CX4A7zkCTvZPHfx_TnQTKu5HzH5MPZjk-3HRpgXKZ-agTfyvp1wUf5dHZEAry4MeB_kiNRaze-Mkg5w5HSQQpdvfYjlF3jN1NnUOjDjjYs-O8d5YocvI8UcUmrM_23L4s5Ihf1JBSGjr6ch747KyWicdgXyjwLlj_NhUysnRIa39xO0lPzuJtgs64qqdpvJz4t36iHO801PfBDxlqMtKTKieGs0h0XoqRjinybGifjI9CnChQJpL8e2W8cXLuuOucbV3_qVMuDcGqd4skDiqB6eiuc5zioyWeVgjb9d4wtV0KrgE0T1VG7sv7F8-5hGAAf7FDViXkbYK07qypQ0b6ZwUVYm7hXekFOuF4L0_jqFKMKI0Xjl1LPAgo7MH7MFaAjq3k_KSUnERpFGDS7yJ6wcEMq78PVb01fkSftRMWOS-ydXZZMQitxl2aFoU3arrREQkF3OlhOQmtlt1WImswEgPtjELm8hNkxIrQ6-hsoP-krwbriL5ZHBVdvgDiKIYbRxjF_-W61E5C3CdRTUHkZX7TsuAqNUwCFuIbnfDAQob30aHgetwYBDIFaVpum8a8kLRR-uqwoeIYU8dd1TFROHHuxVo-HQCyN2I8uY6dlWNHSNVmNBC63MSvFOnkGhQ3nPjLXqixq2D9Vn2lKZDAB1eV6LmJg-mlsFmILNFjC3OnU3PwVDLIhBMTDYI42IYNFyOhX_5HmOUXHlfYXMKOTfTelJimbNCHhE0llWVybTbzjyZPnN1HVcJ-t-ij0nROzNNT-F9ojsu2Cbc8P-3RjcMg==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 8 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 672\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 875\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 19 rows / 8 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = _df.reset()\n", + "print(df.get_meta())\n", + "df.send()\n", + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AAA {}\n", + "are _actions False\n", + "{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"55.3 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"null\": 0, \"array\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"55.3 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwnsHiI9jfaVOOo9nj8_rAVimH7bGVA8gZiikeuyPSnYOeE_EojVWFto-9Vpu4aME-1wrNKxBgm4RyGG-1jM9-nSm0Pwg1F8O8DH_R13QPCo10vQY3XWPfiApWH5iVMAiHfTuUZ9-j76Lb5KY240-yQ0dYX8JeA63b9ELeznl5wSexEjXKuKyvPtv_XlVyC2YbB6CycoKF_IQHV3e6w_ntgN7jmy-RQDmbkbxLAyffqluW3sf6yIhuex2owuHFgbS57D1ilRm3U1Jz1RheA5UZBNUxUbyR-gEuGFmwxUWOfaQCll0a7os6c_FYA5XgajZdVaTEoODSlnPBbtkEYMwE7JIVZ90re4dE662SlT5S1OQYvekBdF-arobL2q2Zyssg1odWmy-dxfKGSDdUU1wFGIXjMWNc4xEZScvc4A_jk6dXwtOEZJnapPMl65tkLhPdlT3gMUXjOpkpBbcNZm93_YGKY3B9qhqRSulOUMgOrTvWap9O9T2D7iPRFlGsqngNgHw1n4ioQozI39k6DDxXfiPgwLCrdjJzIQxQJklSjPwM_7X6w09uhyHLOHMh28odYjlCiWFj3GSl7PPDaqkq9Vj7Bw4prPNpA_3VxQ6_jsN58T-6dZ3CaQRQTMAD_fQFosEkk4OKsKowKtuB-iXVoI9UODAR72F9uoWJUzwY64A-yJvM71Ch-s1iXN2FZmMD7Q89iGnM8k54ts7yJzCKJ12L4MNnbc2AJrC69KXo33W7wygQjBG_l0ofsEfrPQsI8FQYIUrQiUCtX-9bQzwzzNbR2WA7ZJTBcvfW1QOui0-2BhElZ9BXTfXZLl0ADjiwcN-gHiqEZ0E0mCHsWYBFIF04yL97aNaNt0XczekHbsi8yHYwFAFOJvT5omNMJ2C60MQh9URVuoAmAlhJGj6TOfemmIpz-wwRpj3oCh-8014fIlGk_R7mTm4OQTZd2R5VXGOjS6TQDH5rPnzOV3bhpVwMfMSTDBgiyn3QAZIDsR-IC8XM1CM8bYE6nqBuHz8_8i3_m87e2sLnb3it4wqFsD_9MqTRXtaZCIJ96hTGUShZcwjb-SQvRMhkSHNDJf5H3V393681pUNPmuOuh3oR-tLIF2njI8vQsaH-xN1j7f5scSeFOSFtIYq-QrLNoMYqVa6cOSm2A8fQNpLnuFw92D8Wam3BIbwPmHggFzAzQnWxyZhHOIoELJCj4CR7OP7tnMHAVSVqST_qqUq7RDnzkfPtj8VoMthGxr9z4MsYjSu9eOxO19-5ccRQnI0mKkwi-f9ZQlHLI3i1yVu6tXf3Zq1jmqFjDQikh46vV4GMzGKNYOeR4EyCRNWmgB6T_5g66-0B2daQSB0i_PiGiYZfMQ_rI_cWCGbGw4yVBo2Lyxi2cuMrrxtFeXtG6pnTGTwFTb9v2AGN0gw4X-b3xiLhNJwWxStCAicpTULp9Kzj49jh59ffBlD_NxcSQKxtSVCXs0AoavQztr6DGJrhydFpOzmeXHZhc4SoY5oDon2Nbl0b_JSFS1lqwt7pNArLjjoDKUZ6JIo8ixtXrHKLM0DWrdXM2mddF-9WifOlzcsuxni-9BBy3JtLSQPSYfaTgGL1wDLfsbWVvZTXd1khMzusIrjQajp6O_or_7BRknnW3r-ZhXzZla4vNowoM4UTgBdGJrSyQmbMo38tqbmIvkhSHIH11ATIgWROntsDJzk6QMWsomTSw2tApUu3kw-rBlJGJ7YCdZRAdL0K6rbJkbkcF2CWCNFRyMSZU6b2Me8gnZ77svEc4wTnV440w3-IICa9Tjbnut-CHmYoO5DCUbrzCwdyT04SGZ4zHUKIfDYciZyFaA-hM2Apv7JSmBdqYutaqaKxggeVVbc4Tis-IMy8NWG_QY1SwRLKBUDuRJIYRJhOAdFuqAb46UPz4WTnNJv5yy7_0I4aXNhYemP7mxGH9Iduy7RMyraJqhGdz5i1M3v2D2qe6X3zMkxYTvWrQvsVB4VWppQFPabKkPv6Jg-QPqEq3XOzPEdi22EWqxCJFIi8sTjHqHdqxvWHlQl1EwK2ulRFbnK3XUmaKK0VdY-yKsmLGCuNvhQP-jmjGdAD3Wd0aZ4IE0PX34Af9eEH3P8zWtjeq5vMeiqh-ShHxCXWkkEuXpd89xd5OZ2rB_dVmXg41WUAFFqO0VscqwxaKaBih9sCOlP5sMiB-YOFE23fe6rIjgw1YIt9JYfK2t4F-TmlN9fCRzI9tDSZY1-bzNruF3eZuthmzG0lu2H3JsZ0gWnXXghxwxYPfTVPn7SdcPec8CTNNK6LwI0Aoa1klwXk7TuWIOZi-IvS9SgVBzNVIOZSe3LISWIJKpkGOIBYfCeRUgsK1bBgS6nWHTRrpykkLZloJYRFoDHXHK0fLb0TQkY7f-uAIN-7WpjACEUVYK_oOdOrxRfJ_Y7qbvPIzW2LlEu1l-DO8YLHsNU6j1vL268a6fmYc-_1qVY1s130d6xlWj_XJTR3xWXXBdrON3XwEhkFq65uXMXyaQB1sbPHd668yqzDnRvemyby00PS4OzdKfZxv-HicxGEOB01HqQuHkjfI13lGgWvC-fQDFf-tyTx4FLMHOGBFw3P_JBdqDKny-Ouf00F77L-YdXGmJZLM00Vjah7H0cwu1JfX3tpcA5ygeLdaUdCppCQKBTmehSHdGExPdfzTeXH2caDhyQaPJnjMzJ3IDDJuDAp1hdLnVdlnInc6snD_ywR2BtcXBQSSWL4vzwX95brXrV6KIRbYuUTA8XZNfgg5Ev_Rq7mcAlKM427YKTLrzzwNwVZ6xHQiLLneOXWOoaN1I7v0ZSf4plhaRn67ojMydpnSe5Kp1MhKHvuuYxbxb559bPH5JhLh12KaIRT3_HvqZIg5EBxzI2U5t_YZu0dt5MOW_9w2ZshGEOpCJX-SP0qVm5yW0DHp6Mkrrl0uZnIPyUU_XOMmyxKGkJnaSVPgdlMDLmSw2fWBD3mfEmmXcdUEu5ws8Bs_muP1g-0IFwDG87CWeCZdDzAne4mVktY-xKnky3KTcEdaNHKiyLDwRC7_4dvn1syuxasZZVqvF9d4Zjk8pfcuLa28_ouwZce6zjpTFDN1AnR1nvtZkQfKnMNb_4e3uIXmmlPp4sAfTh54RMm7krPbGCmL7Inn8NkImHOZn3mCuzqVyK1pQ3fz_OWR5BqcaijmlR2VUUFUZlXpmHWS41KrLZ6eqFCP8ctBuQkE010jc4fQePTm09s1hJk_C0saLBX0MxmT7koud8K8OYzH-gBI2pW38f3OCrNpua1jhhgwWwaZeLoUTDXqqgsxCLfyLpsrbIigBfAePOxI1bervFDLuqb_nM9TKvqhq-CGFdAVZyHOcRWwEgqu-AvvEm2nTtei9q6RQGgE44q6570DCLUepFNm64qMTixvfpQwHF0SJB-55hN3KzGF6spiKUd32TZUFu3G1hZUKNkd9Y5euulGQWC7it67D2xOzLzN9HRTrctaI4nDuEsPFb3yiRofkdafbRonvdLPmFqvMnGqXqPEUCJWJtBaG3ptUb-pfNQQBac-OAoruY346dC5huhqhmGTFR0u8Bjz4PGOfLe6NalpyRo0rKzmiY7hbVpKs27P8O7Yljt2gLyMr2R1IJ0du7J37GC85Ch-f77beZzNzonykh4ZA5VmanzIPe-x0xMPVcA2AUgQZsjeqkqqzyHTRvsXV42g2LGsPsnKMWLJyrgAMJeOG36REggitGpWes4ngtE2S3eFZqby-IxPU_j5ymOkjuKN2iOyD-tEZKoEyZZM-RjHqbSRCVK-CrqUr2W2rpoF6MSeNIa7T0fbgPD71GJ44CXfjX6x8zLFiYebysj3keRg6sJ-yW0iYchKsSNTNOzMWPJxnTDRBXTzPLDJ4TPr-55AgF8XJXDZ-lNupNz8-zYBAwr-DbPZHdmNU-jw8tDMd6q4dvdBa3VFGiNf-TpEJGDzilGlU5R-NHhhUmQPfU1iyFc2kI3j_vpyneLwTPVQffWhAB_M9tMMErSNOrv2QlD7SJ2bM0aRVvh6tpUAdw-3MY8nhrxsW_Qk61Y9Y9OZad9qI6x0aNCWe1DpnlZG9zQd2zV7h1rlTsH4i8a2vSTGpElGpZlzC5tjcg7AaRoydCLp-6LSfjrcNVp4dcLQIJmWDhkUGtMvyT5Da90Sjjt4s9VsgWMyY1DAajHj4qxrwv4JGS5uYoX_JtN2zhF3CpriZ92OT8pccNc8ktaccCd4AFrZP4vQeILks_SyrrPoXlt0_6zVh8TjRWZSYFWvaEjBZwbRFdDhg7ReUN9Cwfv2Cxc76YzfyE6n9LnDCrrHw9LBi6UTltHdq7WdC3yHcAz4v6qMk02EOgEGsbVMW8WgKDmizxvADomm8i5Gl6OCbKrGYg_e_ugMl2BHRO-yMpwTZAJyLAJ6UTtUWsIHvyZEjxcqwFdTZ4_7IO59ArGA==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Downloading foo.csv from https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\n", + "INFO:optimus:Downloaded 967 bytes\n", + "INFO:optimus:Creating DataFrame for foo.csv. Please wait...\n" + ] + } + ], + "source": [ + "_df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\", sep=\",\", header='true', infer_schema='true', charset=\"UTF-8\").limit(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df = _df" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.keep(\"id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+\n", + "| id|\n", + "+---+\n", + "| 1|\n", + "| 2|\n", + "| 3|\n", + "| 4|\n", + "| 5|\n", + "| 6|\n", + "| 7|\n", + "| 8|\n", + "| 9|\n", + "| 10|\n", + "+---+\n", + "\n" + ] + } + ], + "source": [ + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# df = df.cols.drop([\"billingId\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "'missing_columns' must be 'first_name', received 'id'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"gloat\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mcast\u001b[1;34m(input_cols, dtype, output_cols, columns)\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[1;31m# Parse params\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 342\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 343\u001b[1;33m \u001b[0minput_cols\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_columns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput_cols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 344\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_list\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mis_one_element\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 345\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\columns.py\u001b[0m in \u001b[0;36mparse_columns\u001b[1;34m(df, cols_args, get_args, is_regex, filter_by_column_dtypes, accepts_missing_cols, invert)\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[1;31m# Check for missing columns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 141\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0maccepts_missing_cols\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m \u001b[0mcheck_for_missing_columns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 143\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[1;31m# Filter by column data type\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\columns.py\u001b[0m in \u001b[0;36mcheck_for_missing_columns\u001b[1;34m(df, col_names)\u001b[0m\n\u001b[0;32m 243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing_columns\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 245\u001b[1;33m \u001b[0mRaiseIt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing_columns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 246\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\raiseit.py\u001b[0m in \u001b[0;36mvalue_error\u001b[1;34m(var, data_values)\u001b[0m\n\u001b[0;32m 76\u001b[0m type=divisor.join(map(\n\u001b[0;32m 77\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;34m\"'\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"'\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 78\u001b[1;33m data_values)), var_type=one_list_to_val(var)))\n\u001b[0m\u001b[0;32m 79\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: 'missing_columns' must be 'first_name', received 'id'" + ] + } + ], + "source": [ + "df.cols.cast([(\"id\",\"gloat\")])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.rename(\"firstName\",\"first_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.cols.keep(\"first_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"binary\": 0, \"array\": 0, \"null\": 0, \"categorical\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id_1\": {\"stats\": {\"count_uniques\": 19, \"min\": 1, \"max\": 19, \"stddev\": 5.62731, \"kurtosis\": -1.20667, \"mean\": 10.0, \"skewness\": 0.0, \"sum\": 190, \"variance\": 31.66667, \"zeros\": 0, \"percentile\": {\"0.75\": 15, \"0.95\": 19, \"0.05\": 1, \"0.25\": 5, \"0.5\": 10}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}, {\"count\": 1.0, \"lower\": 10.0, \"upper\": 11.0}, {\"count\": 1.0, \"lower\": 11.0, \"upper\": 12.0}, {\"count\": 1.0, \"lower\": 12.0, \"upper\": 13.0}, {\"count\": 1.0, \"lower\": 13.0, \"upper\": 14.0}, {\"count\": 1.0, \"lower\": 14.0, \"upper\": 15.0}, {\"count\": 1.0, \"lower\": 15.0, \"upper\": 16.0}, {\"count\": 1.0, \"lower\": 16.0, \"upper\": 17.0}, {\"count\": 1.0, \"lower\": 17.0, \"upper\": 18.0}, {\"count\": 1.0, \"lower\": 18.0, \"upper\": 19.0}], \"range\": 18, \"median\": 10, \"interquartile_range\": 10, \"coef_variation\": 0.56273, \"mad\": 5, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id_1\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"first_name\": {\"stats\": {\"count_uniques\": 18, \"min\": \"((( Heinrich )))))\", \"max\": \"William\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"David\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Isaac\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Emmy%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Max!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Fred\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"((( Heinrich )))))\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"William\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Marie\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Arthur\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"JAMES\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"first_name\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 19, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Newton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"N\\u00f6ether$\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Planck!!!\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hoy&&&le\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Hertz\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Gilbert###\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"CURIE\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"COM%%%pton\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Chadwick\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 19, \"min\": 111, \"max\": 992, \"stddev\": 280.19735, \"kurtosis\": -1.04115, \"mean\": 556.0, \"skewness\": -0.2137, \"sum\": 10564, \"variance\": 78510.55556, \"zeros\": 0, \"percentile\": {\"0.75\": 812, \"0.95\": 992, \"0.05\": 111, \"0.25\": 323, \"0.5\": 553}, \"count_na\": 0, \"hist\": [{\"count\": 3.0, \"lower\": 111.0, \"upper\": 159.94}, {\"count\": 0.0, \"lower\": 159.94, \"upper\": 208.89}, {\"count\": 1.0, \"lower\": 208.89, \"upper\": 257.83}, {\"count\": 0.0, \"lower\": 257.83, \"upper\": 306.78}, {\"count\": 1.0, \"lower\": 306.78, \"upper\": 355.72}, {\"count\": 0.0, \"lower\": 355.72, \"upper\": 404.67}, {\"count\": 1.0, \"lower\": 404.67, \"upper\": 453.61}, {\"count\": 1.0, \"lower\": 453.61, \"upper\": 502.56}, {\"count\": 2.0, \"lower\": 502.56, \"upper\": 551.5}, {\"count\": 1.0, \"lower\": 551.5, \"upper\": 600.44}, {\"count\": 2.0, \"lower\": 600.44, \"upper\": 649.39}, {\"count\": 1.0, \"lower\": 649.39, \"upper\": 698.33}, {\"count\": 1.0, \"lower\": 698.33, \"upper\": 747.28}, {\"count\": 0.0, \"lower\": 747.28, \"upper\": 796.22}, {\"count\": 1.0, \"lower\": 796.22, \"upper\": 845.17}, {\"count\": 2.0, \"lower\": 845.17, \"upper\": 894.11}, {\"count\": 1.0, \"lower\": 894.11, \"upper\": 943.06}, {\"count\": 0.0, \"lower\": 943.06, \"upper\": 992.0}], \"range\": 881, \"median\": 553, \"interquartile_range\": 489, \"coef_variation\": 0.50395, \"mad\": 230, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 13, \"min\": \"110790\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": 110790.0, \"skewness\": null, \"sum\": 110790.0, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"pizza\", \"count\": 4, \"percentage\": 21.05}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"pasta\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"hamburguer\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"pizzza\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"BEER\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"Rice\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"110790\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"null\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 17, \"int\": 1, \"null\": 1, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 8, \"min\": 1, \"max\": 10, \"stddev\": 2.95285, \"kurtosis\": -1.4482, \"mean\": 6.05263, \"skewness\": -0.22564, \"sum\": 115, \"variance\": 8.7193, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 8}, \"count_na\": 0, \"hist\": [{\"count\": 2.0, \"lower\": 1.0, \"upper\": 2.29}, {\"count\": 4.0, \"lower\": 2.29, \"upper\": 3.57}, {\"count\": 1.0, \"lower\": 3.57, \"upper\": 4.86}, {\"count\": 2.0, \"lower\": 4.86, \"upper\": 6.14}, {\"count\": 0.0, \"lower\": 6.14, \"upper\": 7.43}, {\"count\": 6.0, \"lower\": 7.43, \"upper\": 8.71}, {\"count\": 2.0, \"lower\": 8.71, \"upper\": 10.0}], \"range\": 9, \"median\": 8, \"interquartile_range\": 5, \"coef_variation\": 0.48786, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 42.11}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 19}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 18, \"min\": \"1899/01/01\", \"max\": \"2000/03/22\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 94.74}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1999/02/15\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1993/12/08\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1994/01/04\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1997/06/27\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1956/11/30\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1958/03/26\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"2000/03/22\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1899/01/01\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"1921/05/03\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 19, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 13, \"min\": \"#\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 68.42}, \"frequency\": [{\"value\": \"gonna\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"you\", \"count\": 3, \"percentage\": 15.79}, {\"value\": \"never\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"#\", \"count\": 2, \"percentage\": 10.53}, {\"value\": \"give\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"up\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"let\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"down\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"never \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"run \", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"around\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"and\", \"count\": 1, \"percentage\": 5.26}, {\"value\": \"desert\", \"count\": 1, \"percentage\": 5.26}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 19, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": \"foo.csv\", \"summary\": {\"cols_count\": 8, \"rows_count\": 19, \"size\": \"46.7 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id_1\"}, {\"title\": \"first_name\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"], [11, \"Isaac\", \"Newton\", 992, \"pasta\", 9, \"1999/02/15\", \"never \"], [12, \"Emmy%%\", \"N\\u00f6ether$\", 234, \"pasta\", 9, \"1993/12/08\", \"gonna\"], [13, \"Max!!!\", \"Planck!!!\", 111, \"hamburguer\", 4, \"1994/01/04\", \"run \"], [14, \"Fred\", \"Hoy&&&le\", 553, \"pizzza\", 8, \"1997/06/27\", \"around\"], [15, \"((( Heinrich )))))\", \"Hertz\", 116, \"pizza\", 8, \"1956/11/30\", \"and\"], [16, \"William\", \"Gilbert###\", 886, \"BEER\", 2, \"1958/03/26\", \"desert\"], [17, \"Marie\", \"CURIE\", 912, \"Rice\", 1, \"2000/03/22\", \"you\"], [18, \"Arthur\", \"COM%%%pton\", 812, \"110790\", 5, \"1899/01/01\", \"#\"], [19, \"JAMES\", \"Chadwick\", 467, \"null\", 10, \"1921/05/03\", \"#\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdw0TzYz9b2k_0Gzq6Z82hEM-4DltPbyMbUz_Yv2Vw8qqrSWQlNsbvYbD4Z61HiFMmlhRJaZNsh5VrxbqXdz3GJ60PHC8ieYgwhdwCHVEx8uSrMq7rAa0TDBuibEteGuuP0sbYOvdVV1Qc5IBU4zesHsNKzaPdE4Xv4pmKV-bHR_kW_HSAyAvgFkEwaY6-XS-aN5fTJZ-sEjgU6K2Ob3EjAUZL2Ok39T8m-GuPSk_xuqbuQD2gRNJCccYqzJhJk4su3EUQo0fm4nr0zbP8Tw2M6m1EXWtWqmqTWrV-mUzKyr3GSUwp0y8Nn2ApjHELQgn_ZTm-GidE0ztxLGYEeeCWdSJlQ_vfgH96I40cZA3aCbwz0fdyVFz0mc-_alnInMNnHUUhnWvOvVmSHsRycui31bHCEhrQGmbhgp5bQ0QNagX3poUR_DA_0JDyBKknciJcgPsYRugMgHCQb8fkO2Jl_uNfb-IVEf9krDCusHTJbuKwoUsudfYWZodEGC4xx59slK9rizpeHYPtQZVEz9bF5UtrIp_YztqlK5GjFuFMc-Bp73pwGRzsb5ew7SOMjyWNn7SjfCVTShrBnFCayxk2zRPNFPdPO3pR69e6_m7f6rhRFDl8eCl-HrimzhIzoCDevfZC9Et5Fxwe6JBflV9YF6fpdnbcZ8a1ALHgnBsSBxsk7ous-J863PbE4oX3f4U-MIN_loj12EpiPUAEK9w4-MHMOTpoSRQ0REdzc01ZfUtyJBowzYx4-nxbT30lOPvWzBYz5ZOoFCqcd9S6H5aGNo0tlS-jPhowbW6NDZ6akXtXIP17kpSLYqp7Zq7YW08ALzPqVXGo_-S18rlsZbV28wWnVnwkl67wsFCj3ucYjbaMzzCAZkdDZWB0Rhdq6euzjlYOzqcG15SEm7wcFoEbgnvxJ7vi5NiZR418tOMCk-GyCudrKUtjV8hzGWsDB9jPrAKpm74ryztyQu02DLKUu9lrSGuDHTLk5D_oe8D2ikXTeKyU24Krs2RTutq7DK_NhjqbDbe9ZbDeMFPnjKZieAtD-2_ZNlNl_Chp7nb5wJq23q1q5aH-G2z2KI_4dA4Rrra-e8gt0kLtsy-bpFAeTKIQONLS2xge5xDk6MjXJBeXE8yBrTwlZFPYaMab_ZzVdTs8nYolsDerlT2kEqldISkMUp2iv8yooFWSj90zBaVDrNOSY2awAbxQOui-7ZcSdaNcbdnV1hbN3XC_1776Cx-DRsUkSr6TcjvVV9csdg9qV3Qt8GStA8gygBeYkZ_-nYyuXrG3V-TGVeY3JPblvF22scjW1lz0zdkQLzrzAvgyFSP201evGVEaceEd7KkLF3Ed3XxW6Box879E2TAnLbq8AiOnt5QOVFEsiCA3DmhEBzP_AzycvVVrpR8zOdIB8IPC8vQc7CNz3p6TyEDlx2hdj6V37BN5JNMos5hRoFK2q60y53bPTSLElWNrV4gGaljro47-pjkNx6t_CX_2M0xhSThzAOtFyTe1h21G3oEd_dJsGBK4mq6UcC9MswAHO_DWk4sRKQuhzNUG6H0-H1Kz6FOoqWWUPpB50XXE4a700D1By9F2yH5p3QH_Ep7lez-2ghYziLKN_XQF0b-2jzDwOUcx-Hln2t1Kl6hAK51tamrVb18zWvQ6nu0z9E_uIYSJAOLzNJcwIYcbCRWF9UQLmP5fltiXatVkOo7naq8z7F4JfHR9DcgF_InMto2thPFJI18VroVNaSieDnQ9s_tqLFRxOIuelfxDZbF9L_5mGL4UV4bEvzk7b36IdLVrLqcqZhY-vZeMig49XGOpfzhCZnuLzXuFgFQTvfeOJOmb774UgeRnqtOKtpg9XqrvELj4XznmPmm1cfsC1nv9ctFLbqzVKdfDaZG1Rd6F5g8C4cbBRSsKMNYVuq9VJvTQguKJ2fySeV_1l2AnQ6GZoidy9dCuST1jyLIbEFoR4yVHSpE8DvXvWtBX-lfX2Y-IE4_KCISf245WZ0ygEbS3sC0RMjEBXcetU0SgfPMhHH2kaEjJ_MlqoNGdzr2Gy5zyHq5S9zHb7AsO_64Wu-qLFmRDjkNH8Nu2eubFWw5h4M4eBuTUJ8WPcr9xol9zyop476R-rBg2JOHDF_L83xSvPp_SJNZjyH9gWmqJCwqO01anV9nDaIylUQx4eYrwJaZgfd8KKQ_Fmr4wZqa8MNc2OAll40pve2Uvk01dfIdlM44Ox9aveft5UPDODE7QhdHsiRP-YOvj-1XLhl-d6fppy4r6C3jAO2fRI8s70VbHPXaMZ__ohG8IzVuX_AUYPjkbIZ5wRM27-9_2grV4_9jBb5nItiIU4thb2mc8J8843hwBn6yu9UO2ATn4BZjFY4R_3BeeWGvuuB3uGsI3ypsJQ8Xrj3qNTS7ofLsBzwsURrJrq55dtaa43N-Nc20zSfqd2jjlgP9qBrivEY38MqQeAOYeFhQv4MfykOwaHXvpx4_ZNU36WGR8o9apE6mE5nBWbi0po1IayFIUleAxE36b7Jwe_dCO_IYj0qA9TwSJkdlw41WIfjcJ1zEiG331Ougm6b_wWyHN_tlH5EtimVyu9b9NEAt6MgH0LRpIrwEL_KrKryfRLutiIsq_kd2xM-Hy9Ga4Om4QL_PnvwX1vIl6evQwFDfweh1bpdAdPAHuF_kQf0wemHvjLIYtqmyX-v8hbL5TxHNq8URRvzX5pOQYytYELnt85_IpEqPZVgBPxsgZsekMWVNUn_fCjmeBJXmUX9Ti9hDFQgq4rndjFS-JElNtjV5Ie94ClPK7GZi4gU-Zd9QRDlsk7P8skKSA1icdniqt1pi3j-z-Essi7W17MHyIZgK1Im_FbgUHIgbQ457TCcNtK3S5tahT6C4FDrEo2N4ywJpjmc7DKY3TF8ki4UYaaIyxHspfksRCMoTkBEAhrTjxtjkc2nctA0XBr6v8t5A7VzIFnyA8k5DEfaE0udgcqpD67KrRCr5oXGEyoC3A3N5x6O8CT4K8GXeG2WJWRvYWIB5ZyCNCsDETOk0sWN0-mWnGRpFN1zj_Jn-gX5Fq9OX3fcz3NFWME7simB4dfUWsLMWG0JyCNPHLyppQLBAMED1PdqwaXIBo9EcMCZlE0ljkh3M1i9w96qwnEBOfDcb6_iYtKy33uNYTDLWxvFrnttFVUsGmxdxpRksXKRLCzfxvcrYQ8hUE-h8aKzo12MoyJlwduhSereDg3OmEcCMyky-PZZCI9Ss99yMlQuqECrTBPUu9CYJTrqQoMYWHi6xY22R1wWjKNvBrO9NNnn76-W62MMBtZE6lx7eEn--l15GMHLVcCCUqL0szjO3fpCOSV5VZwcgCBGPhvb947UMlMvuJ_8_dsMMVIf9yOI4ap8fWEyIgnqIsEXX2H6PQdg-mh6ew_0BxGbYfofmZChLNNMSEHMWsCcX6d29uyZ0UmrvtTXiGF5wNNDSy-186-kZEyjzWNxlbmDwvxUqZxHoVjDGHPYVQDGXbk_aQetFAzkvtwOJjoM0Za3mt6abtLQMzG1jyGFB2pBMtBmqxuSpPqubf6ye8sYCZOH5d_n7jA2BI1CNGAQvFDTr0XE8RExZ1n0pcJjsgmlhywVpf1vbowq7_PcmxlIiGzx2w1-tJD8oKen3ToyhScFagp9hgzoA7KioaUfVgYc7y-dHIWzpApBDzJW8atDnUnGbgjMXwRE7RHG0FoOyBL2bwzflMW94w3EHuPQMzFHfNZy4NgAzqRyMLUI0aD9e8z0RK8Qpw17u5BrGYefYXdNXHv_X6c2fHM9VfXfrynuFpPTju9XhzZgWbbkY8jZTNZ1TkKGiE2aGvS30-SOJgJGmlAm4VIcT6tmOnZNIuQI50NxGKq3ByUWdidlLqoEYCHKlFvr1MpPVRIOcMvjmttO55ZD0mWU7xjqbBAfVr4nblxR8ZRGpLSu0KbK1lJYbY0gOEnlBoxNDw2DJpPQea-x6oEIYP6eZc55uTQW1uNbvo6auwLcHmWWkKgI7zxaAqOEWVwlhwIjKqGuuqbKFXxyZZ-IHlH8wTHMXsw9O0shdaMebmxBNrDIQ87AIGPrK9EmevE1yoLeJaA9O7-rXAA_yUeMs48QI9RH3fXujZRpgNtYtwADhHJLX8BHrgvuOQqcT1sHjVSISKSUcQ4uuhZgh7cgnOJgR3O-YVkhuELCQy5dDbwHz4oShprzIfdrdvSg1Ilx43O7h8l9uWMS2iw9r7NbhKJcPNauijdxc2oOJSg08Yw_CRshy3GI2s3bBSWhwJt-Dcs2ySQ8IRoz9ii0LuoBykd17riJc7tckRKFdDpcPOtMBjfivtOGHka8fXw1CVpev9HaQ3YjURBeJbwI1ZMHSqQxJr8_K6Lr_mhSnoWKwAbDXeOp_MFvRiyjdQdwJl4HtM8Ujl6X3ATwc85v45NDQDTCur-JgzlulVoZRryBidM72PglqsJN4h93Mr5JOx0HevxIM=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "a = df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 10 rows / 7 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
4 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
6 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 10 rows / 7 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Batch Histogram 0. Processing columns['id', 'firstName', 'lastName', 'billingId', 'product', 'price', 'birth', 'dummyCol']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`firstName`,`lastName`,`product`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"categorical\": 0, \"binary\": 0, \"array\": 0, \"null\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 10, \"min\": 1, \"max\": 10, \"stddev\": 3.02765, \"kurtosis\": -1.22424, \"mean\": 5.5, \"skewness\": 0.0, \"sum\": 55, \"variance\": 9.16667, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 5}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}], \"range\": 9, \"median\": 5, \"interquartile_range\": 5, \"coef_variation\": 0.55048, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 10}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 10, \"min\": \"Albert\", \"max\": \"PAUL\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"David\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 10, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 10, \"min\": 123, \"max\": 875, \"stddev\": 215.52491, \"kurtosis\": -0.22885, \"mean\": 548.1, \"skewness\": -0.53543, \"sum\": 5481, \"variance\": 46450.98889, \"zeros\": 0, \"percentile\": {\"0.75\": 672, \"0.95\": 875, \"0.05\": 123, \"0.25\": 423, \"0.5\": 551}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 123.0, \"upper\": 206.56}, {\"count\": 0.0, \"lower\": 206.56, \"upper\": 290.11}, {\"count\": 1.0, \"lower\": 290.11, \"upper\": 373.67}, {\"count\": 1.0, \"lower\": 373.67, \"upper\": 457.22}, {\"count\": 1.0, \"lower\": 457.22, \"upper\": 540.78}, {\"count\": 2.0, \"lower\": 540.78, \"upper\": 624.33}, {\"count\": 2.0, \"lower\": 624.33, \"upper\": 707.89}, {\"count\": 1.0, \"lower\": 707.89, \"upper\": 791.44}, {\"count\": 0.0, \"lower\": 791.44, \"upper\": 875.0}], \"range\": 752, \"median\": 551, \"interquartile_range\": 249, \"coef_variation\": 0.39322, \"mad\": 121, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 10}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 6, \"min\": \"Cake\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 60.0}, \"frequency\": [{\"value\": \"pizza\", \"count\": 3, \"percentage\": 30.0}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 30.0}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 4, \"min\": 3, \"max\": 10, \"stddev\": 2.76687, \"kurtosis\": -1.64355, \"mean\": 5.9, \"skewness\": 0.04247, \"sum\": 59, \"variance\": 7.65556, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 3, \"0.25\": 3, \"0.5\": 5}, \"count_na\": 0, \"hist\": [{\"count\": 5.0, \"lower\": 3.0, \"upper\": 5.33}, {\"count\": 0.0, \"lower\": 5.33, \"upper\": 7.67}, {\"count\": 4.0, \"lower\": 7.67, \"upper\": 10.0}], \"range\": 7, \"median\": 5, \"interquartile_range\": 5, \"coef_variation\": 0.46896, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 40.0}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 10}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 10, \"min\": \"1920/04/22\", \"max\": \"1990/07/11\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 10, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 7, \"min\": \"down\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 70.0}, \"frequency\": [{\"value\": \"never\", \"count\": 2, \"percentage\": 20.0}, {\"value\": \"gonna\", \"count\": 2, \"percentage\": 20.0}, {\"value\": \"you\", \"count\": 2, \"percentage\": 20.0}, {\"value\": \"give\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"up\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"let\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"down\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 10, \"size\": \"48.5 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"]]}}" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:{\"count_types\": {\"int\": 3, \"string\": 4, \"date\": 1, \"numeric\": 0, \"categorical\": 0, \"binary\": 0, \"array\": 0, \"null\": 0}, \"total_count_dtypes\": 3, \"dtypes_list\": [\"int\", \"string\", \"date\"], \"columns\": {\"id\": {\"stats\": {\"count_uniques\": 10, \"min\": 1, \"max\": 10, \"stddev\": 3.02765, \"kurtosis\": -1.22424, \"mean\": 5.5, \"skewness\": 0.0, \"sum\": 55, \"variance\": 9.16667, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 1, \"0.25\": 3, \"0.5\": 5}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 1.0, \"upper\": 2.0}, {\"count\": 1.0, \"lower\": 2.0, \"upper\": 3.0}, {\"count\": 1.0, \"lower\": 3.0, \"upper\": 4.0}, {\"count\": 1.0, \"lower\": 4.0, \"upper\": 5.0}, {\"count\": 1.0, \"lower\": 5.0, \"upper\": 6.0}, {\"count\": 1.0, \"lower\": 6.0, \"upper\": 7.0}, {\"count\": 1.0, \"lower\": 7.0, \"upper\": 8.0}, {\"count\": 1.0, \"lower\": 8.0, \"upper\": 9.0}, {\"count\": 1.0, \"lower\": 9.0, \"upper\": 10.0}], \"range\": 9, \"median\": 5, \"interquartile_range\": 5, \"coef_variation\": 0.55048, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 10}, \"column_type\": \"numeric\", \"id\": null}, \"firstName\": {\"stats\": {\"count_uniques\": 10, \"min\": \"Albert\", \"max\": \"PAUL\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Luis\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Andr\\u00e9\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"NiELS\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"PAUL\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Albert\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Galileo\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"CaRL\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"David\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Johannes\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"JaMES\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"firstName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"lastName\": {\"stats\": {\"count_uniques\": 10, \"min\": \" GALiLEI\", \"max\": \"dirac$\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"Alvarez$$%!\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Amp\\u00e8re\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"B\\u00f6hr//((%%\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"dirac$\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Einstein\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \" GALiLEI\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"Ga%%%uss\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"H$$$ilbert\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"KEPLER\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"M$$ax%%well\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"lastName\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"billingId\": {\"stats\": {\"count_uniques\": 10, \"min\": 123, \"max\": 875, \"stddev\": 215.52491, \"kurtosis\": -0.22885, \"mean\": 548.1, \"skewness\": -0.53543, \"sum\": 5481, \"variance\": 46450.98889, \"zeros\": 0, \"percentile\": {\"0.75\": 672, \"0.95\": 875, \"0.05\": 123, \"0.25\": 423, \"0.5\": 551}, \"count_na\": 0, \"hist\": [{\"count\": 1.0, \"lower\": 123.0, \"upper\": 206.56}, {\"count\": 0.0, \"lower\": 206.56, \"upper\": 290.11}, {\"count\": 1.0, \"lower\": 290.11, \"upper\": 373.67}, {\"count\": 1.0, \"lower\": 373.67, \"upper\": 457.22}, {\"count\": 1.0, \"lower\": 457.22, \"upper\": 540.78}, {\"count\": 2.0, \"lower\": 540.78, \"upper\": 624.33}, {\"count\": 2.0, \"lower\": 624.33, \"upper\": 707.89}, {\"count\": 1.0, \"lower\": 707.89, \"upper\": 791.44}, {\"count\": 0.0, \"lower\": 791.44, \"upper\": 875.0}], \"range\": 752, \"median\": 551, \"interquartile_range\": 249, \"coef_variation\": 0.39322, \"mad\": 121, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"name\": \"billingId\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 10}, \"column_type\": \"numeric\", \"id\": null}, \"product\": {\"stats\": {\"count_uniques\": 6, \"min\": \"Cake\", \"max\": \"taco\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 60.0}, \"frequency\": [{\"value\": \"pizza\", \"count\": 3, \"percentage\": 30.0}, {\"value\": \"taco\", \"count\": 3, \"percentage\": 30.0}, {\"value\": \"Cake\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"piza\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"arepa\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"taaaccoo\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"product\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}, \"price\": {\"stats\": {\"count_uniques\": 4, \"min\": 3, \"max\": 10, \"stddev\": 2.76687, \"kurtosis\": -1.64355, \"mean\": 5.9, \"skewness\": 0.04247, \"sum\": 59, \"variance\": 7.65556, \"zeros\": 0, \"percentile\": {\"0.75\": 8, \"0.95\": 10, \"0.05\": 3, \"0.25\": 3, \"0.5\": 5}, \"count_na\": 0, \"hist\": [{\"count\": 5.0, \"lower\": 3.0, \"upper\": 5.33}, {\"count\": 0.0, \"lower\": 5.33, \"upper\": 7.67}, {\"count\": 4.0, \"lower\": 7.67, \"upper\": 10.0}], \"range\": 7, \"median\": 5, \"interquartile_range\": 5, \"coef_variation\": 0.46896, \"mad\": 2, \"p_count_na\": 0.0, \"p_count_uniques\": 40.0}, \"name\": \"price\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"int\": 10}, \"column_type\": \"numeric\", \"id\": null}, \"birth\": {\"stats\": {\"count_uniques\": 10, \"min\": \"1920/04/22\", \"max\": \"1990/07/11\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 100.0}, \"frequency\": [{\"value\": \"1980/07/07\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1950/07/08\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1990/07/09\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1954/07/10\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1990/07/11\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1930/08/12\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1970/07/13\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1950/07/14\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1920/04/22\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"1923/03/12\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"birth\", \"column_dtype\": \"date\", \"dtypes_stats\": {\"date\": 10, \"int\": 0, \"decimal\": 0, \"string\": 0, \"boolean\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"date\", \"id\": null}, \"dummyCol\": {\"stats\": {\"count_uniques\": 7, \"min\": \"down\", \"max\": \"you\", \"stddev\": null, \"kurtosis\": null, \"mean\": null, \"skewness\": null, \"sum\": null, \"variance\": null, \"zeros\": 0, \"count_na\": 0, \"p_count_na\": 0.0, \"p_count_uniques\": 70.0}, \"frequency\": [{\"value\": \"never\", \"count\": 2, \"percentage\": 20.0}, {\"value\": \"gonna\", \"count\": 2, \"percentage\": 20.0}, {\"value\": \"you\", \"count\": 2, \"percentage\": 20.0}, {\"value\": \"give\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"up\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"let\", \"count\": 1, \"percentage\": 10.0}, {\"value\": \"down\", \"count\": 1, \"percentage\": 10.0}], \"name\": \"dummyCol\", \"column_dtype\": \"string\", \"dtypes_stats\": {\"string\": 10, \"int\": 0, \"decimal\": 0, \"boolean\": 0, \"date\": 0, \"array\": 0, \"object\": 0, \"gender\": 0, \"ip\": 0, \"url\": 0, \"email\": 0, \"credit_card_number\": 0, \"zip_code\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"categorical\", \"id\": null}}, \"name\": null, \"file_name\": null, \"summary\": {\"cols_count\": 8, \"rows_count\": 10, \"size\": \"48.5 MB\", \"sample_size\": 10000, \"missing_count\": 0, \"p_missing\": 0.0}, \"sample\": {\"columns\": [{\"title\": \"id\"}, {\"title\": \"firstName\"}, {\"title\": \"lastName\"}, {\"title\": \"billingId\"}, {\"title\": \"product\"}, {\"title\": \"price\"}, {\"title\": \"birth\"}, {\"title\": \"dummyCol\"}], \"value\": [[1, \"Luis\", \"Alvarez$$%!\", 123, \"Cake\", 10, \"1980/07/07\", \"never\"], [2, \"Andr\\u00e9\", \"Amp\\u00e8re\", 423, \"piza\", 8, \"1950/07/08\", \"gonna\"], [3, \"NiELS\", \"B\\u00f6hr//((%%\", 551, \"pizza\", 8, \"1990/07/09\", \"give\"], [4, \"PAUL\", \"dirac$\", 521, \"pizza\", 8, \"1954/07/10\", \"you\"], [5, \"Albert\", \"Einstein\", 634, \"pizza\", 8, \"1990/07/11\", \"up\"], [6, \"Galileo\", \" GALiLEI\", 672, \"arepa\", 5, \"1930/08/12\", \"never\"], [7, \"CaRL\", \"Ga%%%uss\", 323, \"taco\", 3, \"1970/07/13\", \"gonna\"], [8, \"David\", \"H$$$ilbert\", 624, \"taaaccoo\", 3, \"1950/07/14\", \"let\"], [9, \"Johannes\", \"KEPLER\", 735, \"taco\", 3, \"1920/04/22\", \"you\"], [10, \"JaMES\", \"M$$ax%%well\", 875, \"taco\", 3, \"1923/03/12\", \"down\"]]}}" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:optimus:gAAAAABdwZejGbkHKC6U-na6gkSV2sDHx_mbsdruoPDPeylUTylwS8pvVkDp7FXxtMqgx8b9GukL_NP04cFIhAldgd-VtqtyOeDHFQnw6RsTeT4uDrkhRRYyPeQymMjA8Ai1DqFWTXYHLWnT8kHTBaxZjg7V1QexPFsNqiGv-1Mb_pJfn6I2slBBcR5LYnzuiRegJzeJj7-jNp7_-tRyfIxpYAdi-Pxdalzf1Fgyu4oG57h-3upbo2sIg1VQnP7DC-p5lUgvax3dltP3RhGDA81LL3jJbkBNfGfZNYfrVntT78AK1fmbY2v5Ik5Y6ZuVhXZCtG2ygB2BAC34vhCQ730q3ITgDJa6MCBdjN5riGh2sH8HGEEW5wPhkGM9DE6_dWkzIeabcYU0sW4STt1RzetLzd2SEM95AkyO5D_BADeLZ7lOaNbm0dWyNRCCAneGVyoA6HmVsuIt_BHc1iIW326sknv2Moej1sULUCeCBp0whq_zJ3TYKkOD-ReH_wFs1AAq73plTqyNA2iD8ZNx7qalMvyijlVro-YKuaOZCrKHRKumL_eMqgM7ky5MlHNSiK6VGIMBOPj85FEbXPfK9CICUEi01l-8cKtJLAkrWHLI1IxXP7WMAd3Nc3MeVYu-pSr8-wABfTlZ-m6kx_gfq7hM6WB-KqZyAqgmOEIa0Jl00OKvhX7XUqiUKyer5Or-FY3UT8rgfekbt68Ddbkq0hC3KhNcoka_iH7c6MqpKLW61-i_8EyVnuKIlLxmInL0XceN6bcb5IDC2pWybq9wwQ_98AxVxVHVh29Onyl6FeI2NRrPPtkPI-0Mu7tFeu0XqNy7uKLQ-O3nmJL-s-VkNkyvja6hKTHstGZDqUpktpNYAV_IG5bpo1ZW7pJhnEOaXTYFbrcWuhMxOSrcmhOXVFKTztYKXsajNcejvxWfEsULdH_F2-8Gse1ZIO8z4NXsxoyXzyx4ytAn7VOn-jLYeIzJPO6PG8Ei-k8wiKLUVr0F_bAIIwRAs6RJhUb1PKmG30ydYPtxCZk6Ney8IEuTZnWhan0cdRU6UgBkeaFuz7jIwmthdrGC-3mHyVpLaWdwimYN8niUrJeSpQzCUr-daLxeA5P7H_0SS9rrHLEVykzvxw2psL7WN7HYj3q4Duf_y01XY7pjJHaE43pUyaclVmKwvbHTZlsuc5gz9V0vcLD_hwizs8pBR3z6BuqqaDXcSiuKPxEbJSO-UX8TNz9qR5eflkpyG-vZDkeThFIU9Qqip3NePj7KN5Fx_rBblrhouui3NkKEeshGdwYraFLi3VoohOLxgPo88fFbO4YTnCJ4ZQG02A9D6OF4OCmm2PCyKE2VwDy5BwA0HUakhsGF3OunxRiASRKrVfWhgFAYTpxPha2qvEjjMpVZocJA4xN0FS-mfTAuKoJdT7yksIjH-13D97GiKtDhx9Bh1HvRxqv6cBlqJs-JHKr0NLXiUAK2rK-G_GzUR0LhAaQpq9yeQuUM49rngv8y7Qhn911_EL9BbECZ3WTc_PuM6YJmZ5xq-dG3vf884C3yrX9ngR3KT0kT5vhgS4Qnjh1TWodUF3EdLs_fASbOmFJ5uI5eCDgTzgAMYpc90HByHbIcUiT_xfMN-n6UqSg2dHoPWDfhEKDHHu3j9DR2Q47s4NGnGKNQdaRt_RNQa3Z6OvzPu9iAaPm5NQx3vi1OM0A1gF99YQgDBlqXopnTOT5ETb8Sfi905FfzHQ-BpOPjJC1uCYIhpw8h5mMbcyiHiv_GD_xaQRrdEi4IikFw_QdMH-5kVdW34xM5mQ1GTXlAMQ-lle1GOjv1w58lQyR3KxySyJqjGXgcW89ZNmDH_rzDi779vx2z_2WrfMZ-H8aBWuJo4Q6F8UVDkzmWg9TSKkTWC7x3Gxji0pvnBLq4Tutw7Rvr6fdbZkp0j4fvrvt_jGAQp7Z5afjExau7bEAG9GT86JVjNFWaWqrDj55jETBcoxJvaiyequ4N2hf09vdtZxtMoTsJXUYzsz10iTo-VLzzgqEiFqyGFHo8xSmPXUBClLTdyCRy3l6ybOvXHa53k_ilpXCa4Pa82LLKSoXV34WW_BwneCME46ebQ8ZkXCH-f7Ij-ved2eR8rI1s5ml7BWwpcgxi53modIE-r7_It9AfEcTAVno1G9erjrbs4LeZ4canfR45Hgu0wZ39hIUoLs1uJaaR0kaNxTQGsT7iriynWYTIRuQcCbchggoSG6uyhgWytlurJvS1FvBG3zDG_cJQxXX4wzCj7-iKPK8bkFZnL1E12tvrEmMD6oGxrzJsTW1ZQXkff9u-D7aYH2Tf7A-SRNuS02XtmprxPTvKXsE4gEIaZVqM0Bdz5_tn9H9CnrFo6zuAtGAOjItt8SNb6CgZVyIavFko3rbZQKqYaWAGrPEpY7K1ZnQ9abgN3AbEAPVqCHmUbPSLer75J3BUN-KmCJFnyp2OjbIwM-0bQJOcJ8qkjg5uiRcngXcGh2ShMuMwkwQKDDC8T1XpQICEjNxuFTqbPmbiuVnPyT0Gd2XhmPcbBy5W8wAHJ854v7hC-Jpiv-KZBQjljIFncZ1v2yfJp1BZF41zM4L3Paiuoi-EnXY3TtSJbYxrSglrRmp3OlOHNGIJqI4jJvHgvSppAtmMn9pIwOgWRdd_Jck7dPEr60lev9rmNT5zFqin66MrlZ_5lskjEDGPNpJlGixTu_IEL9Wd33dlZgG4H0HTq4pO-AFnRuqJCzXVpNujw6EMmCW-a0vlL5Jqlcy95ITUskuTL7Mf9J402S6LXa5r-ni6QUX2wdx0zvljJ8fmS2GTlh89qQlI5uMaJ4nRYCR6Rk_sb0D0dGCudLUXB2B-yH2aLkILGhfYp1zzKh_OJDIJ5NLrOguNq6OF9I8CCzXKjV4ngP8c0mknu3K2Blys0t5ey4f1ARf9o-d5tI9kgPOaMq619yncIW7U-68Epg5zv7IVnFrNIBneAJcwUSnooIToo2u2bb_hXx_2osJ_P1dW7Xk0k3QPnaitBOd61OCu52scYJySLUmpWnIVsTdQCmAG3dJVrUZWctj64piHW59zxIFmdC8h74n3A0jUCm0f_Y1WqaUeoXZTWhs_ClxuxhRFFd1cDp-CuzRKIFTe8fHTvfiaehTUBoVeT4FLrDbqiAh2KTMcFpwKQ-GYdxS4KQwJ_WjnJha_EAfd--3zt34=\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Send!\n" + ] + } + ], + "source": [ + "df.send()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'max': 10, 'min': 1}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'id' with function _bucketizer\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 10 of 10 rows / 9 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
buckets
\n", + "
9 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Galileo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 672\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " arepa\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1930/08/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " CaRL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ga%%%uss\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 323\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1970/07/13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " David\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " H$$$ilbert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 624\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taaaccoo\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " let\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Johannes\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " KEPLER\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 735\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1920/04/22\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " JaMES\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " M$$ax%%well\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 875\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " taco\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1923/03/12\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " down\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 10 of 10 rows / 9 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.cols.bucketizer(\"id\",2,\"buckets\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DataFrame[id: int, firstName: string, lastName: string, billingId: int, product: string, price: int, birth: string, dummyCol: string]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.limit(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 5 of 5 rows / 8 columns
\n", + "
1 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
id
\n", + "
1 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
firstName
\n", + "
2 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lastName
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
billingId
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
product
\n", + "
5 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
price
\n", + "
6 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
birth
\n", + "
7 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
dummyCol
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Luis\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Alvarez$$%!\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 123\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Cake\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/07/07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " never\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " André\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Ampère\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 423\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " piza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1950/07/08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " gonna\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " NiELS\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Böhr//((%%\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 551\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " give\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " PAUL\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " dirac$\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 521\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1954/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " you\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Albert\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Einstein\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 634\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pizza\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1990/07/11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " up\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 5 of 5 rows / 8 columns
\n", + "
1 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wall time: 9.97 s\n" + ] + } + ], + "source": [ + "%%time\n", + "df=op.load.csv(\"data/crime.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "df1, df2 = df.random_split()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variable Type Data/Info\n", + "---------------------------------\n", + "Optimus type \n", + "df DataFrame DataFrame[INCIDENT_NUMBER<...>double, Location: string]\n", + "df1 DataFrame DataFrame[INCIDENT_NUMBER<...>double, Location: string]\n", + "df2 DataFrame DataFrame[INCIDENT_NUMBER<...>double, Location: string]\n", + "op Optimus ct at 0x000002D045142668>\n", + "sys module \n" + ] + } + ], + "source": [ + "whos" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Processing Stats For columns...\n", + "INFO:optimus:Batch Stats 0. Processing columns['INCIDENT_NUMBER', 'OFFENSE_CODE', 'OFFENSE_CODE_GROUP', 'OFFENSE_DESCRIPTION', 'DISTRICT', 'REPORTING_AREA', 'SHOOTING', 'OCCURRED_ON_DATE', 'YEAR', 'MONTH', 'DAY_OF_WEEK', 'HOUR', 'UCR_PART', 'STREET', 'Lat', 'Long', 'Location']\n", + "INFO:optimus:Batch Histogram 0. Processing columns['INCIDENT_NUMBER', 'OFFENSE_CODE', 'OFFENSE_CODE_GROUP', 'OFFENSE_DESCRIPTION', 'DISTRICT', 'REPORTING_AREA', 'SHOOTING', 'OCCURRED_ON_DATE', 'YEAR', 'MONTH', 'DAY_OF_WEEK', 'HOUR', 'UCR_PART', 'STREET', 'Lat', 'Long', 'Location']\n", + "INFO:optimus:Processing Frequency ...\n", + "INFO:optimus:`INCIDENT_NUMBER`,`OFFENSE_CODE_GROUP`,`OFFENSE_DESCRIPTION`,`DISTRICT`,`REPORTING_AREA`,`SHOOTING`,`OCCURRED_ON_DATE`,`DAY_OF_WEEK`,`UCR_PART`,`STREET`,`Location` column(s) was not processed because is/are not byte,short,big,int,double,float\n", + "INFO:optimus:`INCIDENT_NUMBER`,`OFFENSE_CODE_GROUP`,`OFFENSE_DESCRIPTION`,`DISTRICT`,`REPORTING_AREA`,`SHOOTING`,`OCCURRED_ON_DATE`,`DAY_OF_WEEK`,`UCR_PART`,`STREET`,`Location` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Outputting 0 columns after filtering. Is this expected?\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Overview

\n", + "
\n", + "
\n", + "
\n", + "

Dataset info

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
Number of columns17
Number of rows319073
Total Missing (%)370778
Total size in memory100.8 MB
\n", + "
\n", + "
\n", + "

Column types

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
Categorical0
Numeric0
Date1
Array0
Not available1
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

INCIDENT_NUMBER

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 280876
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 319073\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
I162030584130.0%
I152080623110.0%
I172096394100.0%
I172013170100.0%
I182065208100.0%
I17202252490.0%
I16200187190.0%
I17205442990.0%
I16207132790.0%
I16209817090.0%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

OFFENSE_CODE

\n", + " numeric\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 228
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " 319073\n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "
\n", + "

\n", + " Basic Stats\n", + "

\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
Mean
Minimum111
Maximum3831
Zeros(%)
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "

Quantile statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Minimum111
Maximum3831
Range
Interquartile range
\n", + "
\n", + "
\n", + "

Descriptive statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Standard deviation
Coef of variation
Kurtosis
Mean
MAD
Skewness
Sum
Variance
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

OFFENSE_CODE_GROUP

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 66
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 319073\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
Motor Vehicle Accident Response3713211.64%
Larceny259358.13%
Medical Assistance235407.38%
Investigate Person187505.88%
Other180755.66%
Drug Violation165485.19%
Simple Assault158264.96%
Vandalism154154.83%
Verbal Disputes130994.11%
Towed112873.54%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

OFFENSE_DESCRIPTION

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 231
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 319073\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
SICK/INJURED/MEDICAL - PERSON187835.89%
INVESTIGATE PERSON187545.88%
M/V - LEAVING SCENE - PROPERTY DAMAGE163235.12%
VANDALISM151544.75%
ASSAULT SIMPLE - BATTERY147914.64%
VERBAL DISPUTE130994.11%
TOWED MOTOR VEHICLE112873.54%
INVESTIGATE PROPERTY111243.49%
LARCENY THEFT FROM BUILDING90692.84%
THREATS TO DO BODILY HARM90422.83%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

DISTRICT

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 12
Unique (%)
Missing1765
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 317308\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 1765\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
B24994515.65%
C114253013.33%
D44191513.14%
A13571711.19%
B33544211.11%
C6234607.35%
D14201276.31%
E13175365.5%
E18173485.44%
A7135444.24%
\"Missing\"1765%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

REPORTING_AREA

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 925
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 319073\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
202506.35%
11123720.74%
18620160.63%
32918780.59%
11718320.57%
14317750.56%
62417510.55%
2617470.55%
46516500.52%
13014230.45%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

SHOOTING

\n", + " null\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 1
Unique (%)
Missing318054
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 1019\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 318054\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
None31805499.68%
Y10190.32%
\"Missing\"318054%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

OCCURRED_ON_DATE

\n", + " date\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 243244
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " 319073\n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
2017-06-01 00:00:00290.01%
2016-08-01 00:00:00270.01%
2015-07-01 00:00:00270.01%
2015-06-18 05:00:00220.01%
2017-08-01 00:00:00220.01%
2017-01-01 00:00:00210.01%
2015-12-07 11:38:00200.01%
2017-05-01 00:00:00200.01%
2016-04-01 00:00:00200.01%
2017-04-01 00:00:00190.01%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

YEAR

\n", + " numeric\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 4
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " 319073\n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "
\n", + "

\n", + " Basic Stats\n", + "

\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
Mean
Minimum2015
Maximum2018
Zeros(%)
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "

Quantile statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Minimum2015
Maximum2018
Range
Interquartile range
\n", + "
\n", + "
\n", + "

Descriptive statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Standard deviation
Coef of variation
Kurtosis
Mean
MAD
Skewness
Sum
Variance
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

MONTH

\n", + " numeric\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 12
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " 319073\n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "
\n", + "

\n", + " Basic Stats\n", + "

\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
Mean
Minimum1
Maximum12
Zeros(%)
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "

Quantile statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Minimum1
Maximum12
Range
Interquartile range
\n", + "
\n", + "
\n", + "

Descriptive statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Standard deviation
Coef of variation
Kurtosis
Mean
MAD
Skewness
Sum
Variance
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

DAY_OF_WEEK

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 7
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 319073\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
Friday4849515.2%
Wednesday4672914.65%
Thursday4665614.62%
Tuesday4638314.54%
Monday4567914.32%
Saturday4481814.05%
Sunday4031312.63%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

HOUR

\n", + " numeric\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 25
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " 319073\n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "
\n", + "

\n", + " Basic Stats\n", + "

\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
Mean
Minimum0
Maximum23
Zeros(%)
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "

Quantile statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Minimum0
Maximum23
Range
Interquartile range
\n", + "
\n", + "
\n", + "

Descriptive statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Standard deviation
Coef of variation
Kurtosis
Mean
MAD
Skewness
Sum
Variance
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

UCR_PART

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 4
Unique (%)
Missing90
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 318983\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 90\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
Part Three15855349.69%
Part Two9756930.58%
Part One6162919.32%
Other12320.39%
None900.03%
\"Missing\"90%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

STREET

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 4873
Unique (%)
Missing10871
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 308202\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 10871\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
WASHINGTON ST141944.45%
None108713.41%
BLUE HILL AVE77942.44%
BOYLSTON ST72212.26%
DORCHESTER AVE51491.61%
TREMONT ST47961.5%
MASSACHUSETTS AVE47081.48%
HARRISON AVE46091.44%
CENTRE ST43831.37%
COMMONWEALTH AVE41341.3%
\"Missing\"10871%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

Lat

\n", + " numeric\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 17545
Unique (%)
Missing19999
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " 299074\n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 19999\n", + "
\n", + " \n", + "
\n", + "

\n", + " Basic Stats\n", + "

\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
Mean
Minimum-1.0
Maximum42.39504
Zeros(%)
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "

Quantile statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Minimum-1.0
Maximum42.39504
Range
Interquartile range
\n", + "
\n", + "
\n", + "

Descriptive statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Standard deviation
Coef of variation
Kurtosis
Mean
MAD
Skewness
Sum
Variance
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

Long

\n", + " numeric\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 18225
Unique (%)
Missing19999
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " \n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " 299074\n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 19999\n", + "
\n", + " \n", + "
\n", + "

\n", + " Basic Stats\n", + "

\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
Mean
Minimum-71.17867
Maximum-1.0
Zeros(%)
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "

Quantile statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Minimum-71.17867
Maximum-1.0
Range
Interquartile range
\n", + "
\n", + "
\n", + "

Descriptive statistics

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Standard deviation
Coef of variation
Kurtosis
Mean
MAD
Skewness
Sum
Variance
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + " \n", + "\n", + "
\n", + "
\n", + "

Location

\n", + " categorical\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unique 19930
Unique (%)
Missing0
Missing (%)
\n", + "
\n", + "

\n", + " Datatypes\n", + "

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "
\n", + " String\n", + " \n", + " 319073\n", + "
\n", + " Integer\n", + " \n", + " \n", + "
\n", + " Decimal\n", + " \n", + " \n", + "
\n", + " Bool\n", + " \n", + " \n", + "
\n", + " Date\n", + " \n", + " \n", + "
\n", + " Missing\n", + " \n", + " 0\n", + "
\n", + " Null\n", + " \n", + " 0\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "

Frequency

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
ValueCountFrecuency (%)
(0.00000000, 0.00000000)199996.27%
(42.34862382, -71.08277637)12430.39%
(42.36183857, -71.05976489)12080.38%
(42.28482577, -71.09137369)11210.35%
(42.32866284, -71.08563401)10420.33%
(42.25621592, -71.12401947)8980.28%
(42.29755533, -71.05970910)7830.25%
(42.34128751, -71.05467933)7730.24%
(-1.00000000, -1.00000000)7450.23%
(42.33152148, -71.07085307)7350.23%
\"Missing\"0%
\n", + " \n", + "
\n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:run() executed in 53.66 sec\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wall time: 53.7 s\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "op.profiler.run(df, force_process= True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, datetime\n", + "from pyspark.sql.types import *\n", + "\n", + "source_df = op.create.df(\n", + " [\n", + " (\"names\", \"str\", True),\n", + " (\"height(ft)\", \"int\", True),\n", + " (\"function\", \"str\", True),\n", + " (\"rank\", \"int\", True),\n", + " (\"age\", \"int\", True),\n", + " (\"weight(t)\", \"float\", True),\n", + " (\"japanese name\", ArrayType(StringType()), True),\n", + " (\"last position seen\", \"str\", True),\n", + " (\"date arrival\", \"str\", True),\n", + " (\"last date seen\", \"str\", True),\n", + " (\"attributes\", ArrayType(FloatType()), True),\n", + " (\"DateType\", DateType()),\n", + " (\"Timestamp\", TimestampType()),\n", + " (\"Cybertronian\", \"bool\", True),\n", + " (\"function(binary)\", \"binary\", False),\n", + " (\"NullType\", \"null\", True),\n", + "\n", + " ],\n", + " [\n", + " (\"Optim'us\", 28, \"Leader\", 10, 5000000, 4.30, [\"Inochi\", \"Convoy\"], \"19.442735,-99.201111\", \"1980/04/10\",\n", + " \"2016/09/10\", [8.5344, 4300.0], date(2016, 9, 10), datetime(2014, 6, 24), True, bytearray(\"Leader\", \"utf-8\"),\n", + " None),\n", + " (\"bumbl#ebéé \", 17, \"Espionage\", 7, 5000000, 2.0, [\"Bumble\", \"Goldback\"], \"10.642707,-71.612534\", \"1980/04/10\",\n", + " \"2015/08/10\", [5.334, 2000.0], date(2015, 8, 10), datetime(2014, 6, 24), True, bytearray(\"Espionage\", \"utf-8\"),\n", + " None),\n", + " (\"ironhide&\", 26, \"Security\", 7, 5000000, 4.0, [\"Roadbuster\"], \"37.789563,-122.400356\", \"1980/04/10\",\n", + " \"2014/07/10\", [7.9248, 4000.0], date(2014, 6, 24), datetime(2014, 6, 24), True, bytearray(\"Security\", \"utf-8\"),\n", + " None),\n", + " (\"Jazz\", 13, \"First Lieutenant\", 8, 5000000, 1.80, [\"Meister\"], \"33.670666,-117.841553\", \"1980/04/10\",\n", + " \"2013/06/10\", [3.9624, 1800.0], date(2013, 6, 24), datetime(2014, 6, 24), True,\n", + " bytearray(\"First Lieutenant\", \"utf-8\"), None),\n", + " (\"Megatron\", None, \"None\", 10, 5000000, 5.70, [\"Megatron\"], None, \"1980/04/10\", \"2012/05/10\", [None, 5700.0],\n", + " date(2012, 5, 10), datetime(2014, 6, 24), True, bytearray(\"None\", \"utf-8\"), None),\n", + " (\"\", 300, \"Battle Station\", 8, 5000000, None, [\"Metroflex\"], None, \"1980/04/10\", \"2011/04/10\",\n", + " [91.44, None], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"Battle Station\", \"utf-8\"), None),\n", + " (None, 2, \"3\", 4, 5, 6.0, [\"7\"], 8, \"1980/04/10\", \"2011/04/10\",\n", + " [11.0], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"15\", \"utf-8\"), None)\n", + " ], infer_schema=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DataFrame[names: string, height(ft): int, function: string, rank: int, age: int, weight(t): float, japanese name: array, last position seen: string, date arrival: string, last date seen: string, attributes: array, DateType: date, Timestamp: timestamp, Cybertronian: boolean, function(binary): binary, NullType: null] function function Leader pasta\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " pasta\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.replace(\"function\", search=\"Leader\", replace_by=\"pasta\", search_by=\"words\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
200 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age_5
\n", + "
17 (bigint)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age_5000000
\n", + "
18 (bigint)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
200 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.values_to_cols(\"age\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rank***INDEX_TO_STRING\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank***INDEX_TO_STRING
\n", + "
17 (double)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3.0\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.string_to_index(\"rank\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age***INDEX_TO_STRING
\n", + "
17 (double)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.0\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "IllegalArgumentException", + "evalue": "'requirement failed: Output column names_heig******ONE_HOT_ENCODER already exists.'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 327\u001b[0m \u001b[1;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m format(target_id, \".\", name), value)\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling o1336.transform.\n: java.lang.IllegalArgumentException: requirement failed: Output column names_heig******ONE_HOT_ENCODER already exists.\r\n\tat scala.Predef$.require(Predef.scala:224)\r\n\tat org.apache.spark.ml.feature.OneHotEncoder.transformSchema(OneHotEncoder.scala:89)\r\n\tat org.apache.spark.ml.feature.OneHotEncoder.transform(OneHotEncoder.scala:105)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mIllegalArgumentException\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msource_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues_to_cols\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"age\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mvalues_to_cols\u001b[1;34m(input_cols, output_cols)\u001b[0m\n\u001b[0;32m 2105\u001b[0m \u001b[0mdf_indexed\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstring_to_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput_cols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2106\u001b[0m \u001b[0mdf_indexed\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2107\u001b[1;33m \u001b[0mdf_encoded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mone_hot_encoder\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_indexed\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_cols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2108\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2109\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf_encoded\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\ml\\feature.py\u001b[0m in \u001b[0;36mone_hot_encoder\u001b[1;34m(df, input_cols, output_col, **kargs)\u001b[0m\n\u001b[0;32m 94\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 95\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mPipeline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstages\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mencode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 96\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 97\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\ml\\base.py\u001b[0m in \u001b[0;36mtransform\u001b[1;34m(self, dataset, params)\u001b[0m\n\u001b[0;32m 171\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 172\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 173\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 174\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Params must be a param map but got %s.\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\ml\\pipeline.py\u001b[0m in \u001b[0;36m_transform\u001b[1;34m(self, dataset)\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 261\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstages\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 262\u001b[1;33m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 263\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 264\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\ml\\base.py\u001b[0m in \u001b[0;36mtransform\u001b[1;34m(self, dataset, params)\u001b[0m\n\u001b[0;32m 171\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 172\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 173\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 174\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Params must be a param map but got %s.\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\ml\\wrapper.py\u001b[0m in \u001b[0;36m_transform\u001b[1;34m(self, dataset)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_transfer_params_to_java\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_java_obj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msql_ctx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m-> 1257\u001b[1;33m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[0;32m 1258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1259\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 77\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mQueryExecutionException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m': '\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 78\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'java.lang.IllegalArgumentException: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 79\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mIllegalArgumentException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m': '\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 80\u001b[0m \u001b[1;32mraise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 81\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdeco\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mIllegalArgumentException\u001b[0m: 'requirement failed: Output column names_heig******ONE_HOT_ENCODER already exists.'" + ] + } + ], + "source": [ + "source_df.cols.values_to_cols(\"age\")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "splits = source_df.randomSplit([0.2,0.8],1)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 3 of 3 rows / 16 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 3 of 3 rows / 16 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "splits[0].table()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.table()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "source_df.cols.unnest([\"date arrival\", \"last date seen\"], \"/\", output_cols=[(\"year\",\"month\",\"day\"),(\"year1\",\"month1\",\"day1\") ]).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'weight(t)' with function _cast_to\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)_0
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)_1
\n", + "
18 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 0\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.cast([\"weight(t)\"], \"str\").cols.unnest(\"weight(t)\",\".\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['4',⋅'3']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['2',⋅'0']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['4',⋅'0']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['1',⋅'8']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['5',⋅'7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['6',⋅'0']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 16 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pyspark.sql import functions as F\n", + "input_col = \"weight(t)\"\n", + "separator = \"\\\\.\"\n", + "source_df.withColumn(input_col,F.split(F.col(input_col), separator)).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:Using 'column_exp' to process column 'weight(t)' with function _cast_to\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 20 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)_0
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)_1
\n", + "
18 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)_2
\n", + "
19 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)_3
\n", + "
20 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 20 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.unnest([\"weight(t)\"], \".\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 19 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
year_1
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
year1
\n", + "
18 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
month1
\n", + "
19 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 06\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 05\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 19 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.unnest([\"date arrival\", \"last date seen\"], \"/\", index= [1,(1,3)],output_cols=[(\"year\"),(\"year1\",\"month1\") ]).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 22 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
year
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
month
\n", + "
18 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
day
\n", + "
19 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
year1
\n", + "
20 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
month1
\n", + "
21 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
day1
\n", + "
22 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 06\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 05\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 22 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.unnest([\"date arrival\", \"last date seen\"], \"/\",output_cols=[(\"year\",\"month\",\"day\"),(\"year1\",\"month1\", \"day1\") ]).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 22 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival_0
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival_1
\n", + "
18 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival_2
\n", + "
19 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen_0
\n", + "
20 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen_1
\n", + "
21 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen_2
\n", + "
22 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 09\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 08\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 07\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 06\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 05\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 04\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 22 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.unnest([\"date arrival\", \"last date seen\"], \"/\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lat1
\n", + "
17 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
lng1
\n", + "
18 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8.53439998626709\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4300.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.334000110626221\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2000.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7.924799919128418\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4000.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3.962399959564209\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1800.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5700.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 91.44000244140625\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 11.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.unnest([\"attributes\"], output_cols=[(\"lat1\", \"lng1\") ]).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "ename": "Py4JJavaError", + "evalue": "An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 208.0 failed 1 times, most recent failure: Lost task 0.0 in stage 208.0 (TID 826, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\", line 1371, in takeUpToNumLeft\n yield next(iterator)\nStopIteration\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 372, in dump_stream\n vs = list(itertools.islice(iterator, batch))\nRuntimeError: generator raised StopIteration\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\nDriver stacktrace:\r\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1602)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1590)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1589)\r\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1589)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat scala.Option.foreach(Option.scala:257)\r\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1823)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1772)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1761)\r\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2074)\r\n\tat org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:149)\r\n\tat org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\", line 1371, in takeUpToNumLeft\n yield next(iterator)\nStopIteration\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 372, in dump_stream\n vs = list(itertools.islice(iterator, batch))\nRuntimeError: generator raised StopIteration\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\t... 1 more\r\n", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mrow\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 15\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msource_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_unnest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoDF\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\session.py\u001b[0m in \u001b[0;36mtoDF\u001b[1;34m(self, schema, sampleRatio)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mu'Alice'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 57\u001b[0m \"\"\"\n\u001b[1;32m---> 58\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msparkSession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreateDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msampleRatio\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[0mRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoDF\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtoDF\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\session.py\u001b[0m in \u001b[0;36mcreateDataFrame\u001b[1;34m(self, data, schema, samplingRatio, verifySchema)\u001b[0m\n\u001b[0;32m 687\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 688\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mRDD\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 689\u001b[1;33m \u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_createFromRDD\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprepare\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msamplingRatio\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 690\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 691\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_createFromLocal\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprepare\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\session.py\u001b[0m in \u001b[0;36m_createFromRDD\u001b[1;34m(self, rdd, schema, samplingRatio)\u001b[0m\n\u001b[0;32m 382\u001b[0m \"\"\"\n\u001b[0;32m 383\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 384\u001b[1;33m \u001b[0mstruct\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_inferSchema\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msamplingRatio\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mschema\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 385\u001b[0m \u001b[0mconverter\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_create_converter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstruct\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 386\u001b[0m \u001b[0mrdd\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconverter\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\session.py\u001b[0m in \u001b[0;36m_inferSchema\u001b[1;34m(self, rdd, samplingRatio, names)\u001b[0m\n\u001b[0;32m 353\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;32mclass\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mpyspark\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mStructType\u001b[0m\u001b[0;31m`\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 354\u001b[0m \"\"\"\n\u001b[1;32m--> 355\u001b[1;33m \u001b[0mfirst\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfirst\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 356\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mfirst\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 357\u001b[0m raise ValueError(\"The first row in RDD is empty, \"\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36mfirst\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1391\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mRDD\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mempty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1392\u001b[0m \"\"\"\n\u001b[1;32m-> 1393\u001b[1;33m \u001b[0mrs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1394\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mrs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1395\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mrs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36mtake\u001b[1;34m(self, num)\u001b[0m\n\u001b[0;32m 1373\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1374\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpartsScanned\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpartsScanned\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mnumPartsToTry\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtotalParts\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1375\u001b[1;33m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrunJob\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtakeUpToNumLeft\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1376\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1377\u001b[0m \u001b[0mitems\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\context.py\u001b[0m in \u001b[0;36mrunJob\u001b[1;34m(self, rdd, partitionFunc, partitions, allowLocal)\u001b[0m\n\u001b[0;32m 1011\u001b[0m \u001b[1;31m# SparkContext#runJob.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1012\u001b[0m \u001b[0mmappedRDD\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmapPartitions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpartitionFunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1013\u001b[1;33m \u001b[0msock_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPythonRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrunJob\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jsc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmappedRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpartitions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1014\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_load_from_socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msock_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmappedRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jrdd_deserializer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1015\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m-> 1257\u001b[1;33m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[0;32m 1258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1259\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdeco\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[0ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjava_exception\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoString\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 326\u001b[0m raise Py4JJavaError(\n\u001b[0;32m 327\u001b[0m \u001b[1;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m format(target_id, \".\", name), value)\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 330\u001b[0m raise Py4JError(\n", + "\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 208.0 failed 1 times, most recent failure: Lost task 0.0 in stage 208.0 (TID 826, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\", line 1371, in takeUpToNumLeft\n yield next(iterator)\nStopIteration\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 372, in dump_stream\n vs = list(itertools.islice(iterator, batch))\nRuntimeError: generator raised StopIteration\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\nDriver stacktrace:\r\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1602)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1590)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1589)\r\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1589)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat scala.Option.foreach(Option.scala:257)\r\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1823)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1772)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1761)\r\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2074)\r\n\tat org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:149)\r\n\tat org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\", line 1371, in takeUpToNumLeft\n yield next(iterator)\nStopIteration\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 372, in dump_stream\n vs = list(itertools.islice(iterator, batch))\nRuntimeError: generator raised StopIteration\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:149)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\t... 1 more\r\n" + ] + } + ], + "source": [ + " def _unnest(row):\n", + " _dict = row.asDict()\n", + "\n", + " # Get the column we want to unnest\n", + " _list = _dict[input_col]\n", + "\n", + " # Ensure that float are python floats and not np floats\n", + " if index is None:\n", + " _list = [float(x) for x in _list]\n", + " else:\n", + " _list = [float(_list[1])]\n", + "\n", + " return row + tuple(_list)\n", + "\n", + "df = source_df.rdd.map(_unnest).toDF(source_df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for name in dir():\n", + " if not name.startswith('_'):\n", + " del globals()[name]\n", + "\n", + "for name in dir():\n", + " if not name.startswith('_'):\n", + " del locals()[name]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "1\n", + "1\n", + "[('Optimus', ), ('op', ), ('df', )]\n" + ] + } + ], + "source": [ + "from pyspark.sql import DataFrame\n", + "result = []\n", + "\n", + "c = (Optimus, DataFrame)\n", + "for name, value in globals().copy().items():\n", + " # Get non privates vars\n", + " if not name.startswith(\"_\"):\n", + " # Get optimus import\n", + " if name == \"Optimus\" or isinstance(value, c):\n", + " d = (name, type(value)) \n", + " print(1)\n", + " result.append(d)\n", + "# else:\n", + "# del globals()[name]\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['df']\n" + ] + } + ], + "source": [ + "from pyspark.sql import DataFrame\n", + "result = []\n", + "\n", + "datatypes = (DataFrame)\n", + "for name, value in globals().copy().items():\n", + " # Get non privates vars\n", + " if not name.startswith(\"_\"):\n", + " # Get optimus import\n", + " if isinstance(value, datatypes):\n", + " result.append(name)\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank1
\n", + "
17 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.create(\"rank1\",source_df[\"rank\"] + 1 ).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank1
\n", + "
17 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 11\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 9\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.withColumn(\"rank1\",source_df[\"rank\"] + 1 ).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank1
\n", + "
17 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 14\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 16\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 20\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 16\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.withColumn(\"rank1\", source_df[\"rank\"]+ source_df[\"rank\"]).table()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
names1
\n", + "
17 (double)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.create(\"names1\", source_df[\"names\"] + \"1\").table()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:optimus:`names`,`height(ft)`,`function`,`rank`,`age`,`weight(t)`,`last position seen`,`date arrival`,`last date seen`,`Timestamp`,`Cybertronian`,`NullType` column(s) was not processed because is/are not array,vector,byte,date,binary\n", + "INFO:optimus:Using 'column_exp' to process column 'japanese name' with function _cast_to\n", + "INFO:optimus:Using 'column_exp' to process column 'attributes' with function _cast_to\n", + "INFO:optimus:Using 'column_exp' to process column 'DateType' with function _cast_to\n", + "INFO:optimus:Using 'column_exp' to process column 'function(binary)' with function _cast_to\n" + ] + }, + { + "ename": "Py4JJavaError", + "evalue": "An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 16.0 failed 1 times, most recent failure: Lost task 4.0 in stage 16.0 (TID 47, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 376, in dump_stream\n bytes = self.serializer.dumps(vs)\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 555, in dumps\n return pickle.dumps(obj, protocol)\nTypeError: can't pickle dict_items objects\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\nDriver stacktrace:\r\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1602)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1590)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1589)\r\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1589)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat scala.Option.foreach(Option.scala:257)\r\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1823)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1772)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1761)\r\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2099)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:939)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:363)\r\n\tat org.apache.spark.rdd.RDD.collect(RDD.scala:938)\r\n\tat org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:162)\r\n\tat org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 376, in dump_stream\n bytes = self.serializer.dumps(vs)\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 555, in dumps\n return pickle.dumps(obj, protocol)\nTypeError: can't pickle dict_items objects\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\t... 1 more\r\n", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msource_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrequency\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"*\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mfrequency\u001b[1;34m(columns, n, percentage, total_rows)\u001b[0m\n\u001b[0;32m 1883\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1884\u001b[0m freq = (df.select(columns).rdd\n\u001b[1;32m-> 1885\u001b[1;33m \u001b[1;33m.\u001b[0m\u001b[0mflatMap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1886\u001b[0m ).collect().show(10)\n\u001b[0;32m 1887\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\rdd.py\u001b[0m in \u001b[0;36mcollect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 832\u001b[0m \"\"\"\n\u001b[0;32m 833\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mSCCallSiteSync\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mcss\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 834\u001b[1;33m \u001b[0msock_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mctx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPythonRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcollectAndServe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrdd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 835\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_load_from_socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msock_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jrdd_deserializer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 836\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m-> 1257\u001b[1;33m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[0;32m 1258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1259\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdeco\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[0ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjava_exception\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoString\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 326\u001b[0m raise Py4JJavaError(\n\u001b[0;32m 327\u001b[0m \u001b[1;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m format(target_id, \".\", name), value)\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 330\u001b[0m raise Py4JError(\n", + "\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 16.0 failed 1 times, most recent failure: Lost task 4.0 in stage 16.0 (TID 47, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 376, in dump_stream\n bytes = self.serializer.dumps(vs)\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 555, in dumps\n return pickle.dumps(obj, protocol)\nTypeError: can't pickle dict_items objects\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\n\nDriver stacktrace:\r\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1602)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1590)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1589)\r\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1589)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)\r\n\tat scala.Option.foreach(Option.scala:257)\r\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1823)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1772)\r\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1761)\r\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\r\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2099)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:939)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\r\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\r\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:363)\r\n\tat org.apache.spark.rdd.RDD.collect(RDD.scala:938)\r\n\tat org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:162)\r\n\tat org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 230, in main\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\worker.py\", line 225, in process\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 376, in dump_stream\n bytes = self.serializer.dumps(vs)\n File \"C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\\python\\lib\\pyspark.zip\\pyspark\\serializers.py\", line 555, in dumps\n return pickle.dumps(obj, protocol)\nTypeError: can't pickle dict_items objects\n\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:298)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:438)\r\n\tat org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:421)\r\n\tat org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:252)\r\n\tat org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)\r\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:893)\r\n\tat org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)\r\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)\r\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)\r\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)\r\n\tat org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)\r\n\tat org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)\r\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)\r\n\tat org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:939)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2074)\r\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:109)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\r\n\t... 1 more\r\n" + ] + } + ], + "source": [ + "source_df.cols.frequency(\"*\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['1_copy', '2_copy']" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[v + \"_copy\" for v in [\"1\",\"2\"]]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
names_copy
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)_copy
\n", + "
18 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 18 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.copy(input_cols=[\"names\",\"height(ft)\"]).table()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['function'] ['function_copy']\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
names
\n", + "
1 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
height(ft)
\n", + "
2 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function
\n", + "
3 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
rank
\n", + "
4 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
age
\n", + "
5 (int)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
weight(t)
\n", + "
6 (float)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
japanese name
\n", + "
7 (array<string>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last position seen
\n", + "
8 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
date arrival
\n", + "
9 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
last date seen
\n", + "
10 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
attributes
\n", + "
11 (array<float>)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
DateType
\n", + "
12 (date)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Timestamp
\n", + "
13 (timestamp)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
Cybertronian
\n", + "
14 (boolean)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function(binary)
\n", + "
15 (binary)
\n", + "
\n", + " \n", + " not nullable\n", + " \n", + "
\n", + "
\n", + "
NullType
\n", + "
16 (null)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
function_copy
\n", + "
17 (string)
\n", + "
\n", + " \n", + " nullable\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Optim'us\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 28\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.300000190734863\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Inochi',⋅'Convoy']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 19.442735,-99.201111\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016/09/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [8.53439998626709,⋅4300.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2016-09-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Leader')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Leader\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bumbl#ebéé⋅⋅\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 17\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Bumble',⋅'Goldback']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10.642707,-71.612534\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015/08/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [5.334000110626221,⋅2000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2015-08-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Espionage')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Espionage\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ironhide&\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 26\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 7\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Roadbuster']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 37.789563,-122.400356\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014/07/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [7.924799919128418,⋅4000.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Security')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Security\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Jazz\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 13\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1.7999999523162842\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Meister']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 33.670666,-117.841553\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013/06/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [3.962399959564209,⋅1800.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2013-06-24\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'First⋅Lieutenant')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " First⋅Lieutenant\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Megatron\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5.699999809265137\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Megatron']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012/05/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [None,⋅5700.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2012-05-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'None')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 300\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5000000\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['Metroflex']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [91.44000244140625,⋅None]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'Battle⋅Station')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " Battle⋅Station\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 4\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 5\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 6.0\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " ['7']\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 8\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 1980/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011/04/10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " [11.0]\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2011-04-10\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 2014-06-24⋅00:00:00\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " True\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " bytearray(b'15')\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " None\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 3\n", + " \n", + "
\n", + "
\n", + "\n", + "\n", + "
Viewing 7 of 7 rows / 17 columns
\n", + "
8 partition(s)
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "source_df.cols.copy(\"function\",\"function_copy\").table()" ] }, { diff --git a/optimus/profiler/profiler.py b/optimus/profiler/profiler.py index 7ecbd634..57ed3709 100644 --- a/optimus/profiler/profiler.py +++ b/optimus/profiler/profiler.py @@ -420,8 +420,8 @@ def columns_stats(self, df, columns, buckets=10, infer=False, relative_error=REL :param df: Dataframe to be processed :param columns: Columns that you want to profile :param buckets: Create buckets divided by range. Each bin is equal. - :param infer: try to infer the column datatype - :param relative_error: relative error when the percentile is calculated. 0 is more exact as slow 1 more error and faster + :param infer: try to infer the column dataType + :param relative_error: relative error when the percentile is calculated. 0 is more precision/slow 1 less precision/faster :param approx_count: Use the function approx_count_distinct or countDistinct. approx_count_distinct is faster :param mismatch: :return: json object