diff --git a/dev/.buildinfo b/dev/.buildinfo index 5a3c0cb3e..065c464a4 100644 --- a/dev/.buildinfo +++ b/dev/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 2477e942aab9c78438a49ee8fb3d50a8 +config: e111dff184e2451e0a92cf034a83814e tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/dev/_images/sphx_glr_01_encodings_001.png b/dev/_images/sphx_glr_01_encodings_001.png index 0819b4e21..3a8528524 100644 Binary files a/dev/_images/sphx_glr_01_encodings_001.png and b/dev/_images/sphx_glr_01_encodings_001.png differ diff --git a/dev/_images/sphx_glr_01_encodings_thumb.png b/dev/_images/sphx_glr_01_encodings_thumb.png index 96e985853..c7a1afc3c 100644 Binary files a/dev/_images/sphx_glr_01_encodings_thumb.png and b/dev/_images/sphx_glr_01_encodings_thumb.png differ diff --git a/dev/_images/sphx_glr_08_join_aggregation_003.png b/dev/_images/sphx_glr_08_join_aggregation_003.png index e93efe16e..b63a7b4f5 100644 Binary files a/dev/_images/sphx_glr_08_join_aggregation_003.png and b/dev/_images/sphx_glr_08_join_aggregation_003.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_001.png b/dev/_images/sphx_glr_09_interpolation_join_001.png index c229641e3..cc108f1c5 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_001.png and b/dev/_images/sphx_glr_09_interpolation_join_001.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_002.png b/dev/_images/sphx_glr_09_interpolation_join_002.png index 5d6089a58..56e66bc7d 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_002.png and b/dev/_images/sphx_glr_09_interpolation_join_002.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_003.png b/dev/_images/sphx_glr_09_interpolation_join_003.png index 5613c13c5..a3f4a4be6 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_003.png and b/dev/_images/sphx_glr_09_interpolation_join_003.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_thumb.png b/dev/_images/sphx_glr_09_interpolation_join_thumb.png index 3ccd47ec6..d59810f56 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_thumb.png and b/dev/_images/sphx_glr_09_interpolation_join_thumb.png differ diff --git a/dev/_sources/auto_examples/01_encodings.rst.txt b/dev/_sources/auto_examples/01_encodings.rst.txt index 183e70f49..b3883f666 100644 --- a/dev/_sources/auto_examples/01_encodings.rst.txt +++ b/dev/_sources/auto_examples/01_encodings.rst.txt @@ -440,44 +440,45 @@ corresponding columns: .. code-block:: none - array(['supports, support, recruit', 'shelters, centers, sheriff', - 'rockville, fallsgrove, downtown', 'animal, individual, virtual', - 'engineering, mangement, budgeting', 'station, state, estate', - 'facilities, maintenance, accounts', 'silver, spring, ride', - 'behavioral, health, school', 'equipment, management, automotive', - 'patrol, 4th, 6th', 'safety, collision, section', - 'communications, communication, telecommunications', - 'warehouse, delivery, operations', 'highway, welfare, services', - 'gaithersburg, the, clarksburg', 'nicholson, transit, taxicab', - 'development, planning, stormwater', - 'assessment, protective, process', - 'construction, instruction, building', - 'mechanical, special, commercial', - 'technology, systems, telephone', 'district, urban, 3rd', - 'family, sexual, crimes', 'security, mccf, unit', - 'custody, members, customer', + array(['services, highway, service', 'engineering, training, planning', + 'traffic, safety, alcohol', 'sexual, family, crimes', + 'district, payroll, squad', 'construction, instruction, building', + 'silver, spring, ride', 'behavioral, health, school', + 'delivery, operations, special', 'gaithersburg, clarksburg, the', + 'welfare, childhood, children', 'toddlers, custody, members', + 'supports, support, network', 'station, state, estate', + 'director, officers, projects', 'management, fleet, parking', + 'maintenance, facilities, eligibility', + 'processing, programs, accounting', 'security, mc311, mccf', + 'communications, communication, immunization', + 'administration, battalion, administrative', 'patrol, 4th, 6th', + 'procurement, protective, fiscal', + 'development, residential, stormwater', 'investigative, investigations, criminal', - 'administration, administrative, battalion', - 'assistance, eligibility, emergency', 'automated, traffic, office', - 'master, registered, firefighter', 'manager, budget, projects', - 'specialist, special, environmental', 'candidate, police, cadet', - 'officer, office, traffic', 'operator, bus, operations', - 'income, assistance, client', 'liquor, clerk, store', - 'rescuer, recruit, firefighter', 'technician, mechanic, supply', - 'program, programs, procurement', 'lieutenant, captain, chief', - 'community, nurse, unit', 'school, health, room', + 'emergency, centers, center', 'nicholson, transit, taxicab', + 'automotive, assessment, equipment', + 'technology, systems, telephone', 'rockville, twinbrook, library', + 'dietary, security, partnerships', + 'operator, equipment, apprentice', + 'recreation, planning, renovation', + 'master, registered, firefighter', 'candidate, police, of', + 'officer, office, police', 'specialist, special, procurement', + 'program, programs, projects', 'technician, mechanic, supply', + 'legislative, principal, executive', + 'coordinator, coordinating, depot', 'sergeant, cadet, police', + 'recruit, firefighter, rescuer', 'community, health, nurse', + 'warehouse, welfare, caseworker', + 'enforcement, permitting, inspector', 'manager, budget, engineer', + 'school, room, behavioral', 'captain, rescue, chief', + 'supervisory, supervisor, therapist', 'communications, telecommunications, safety', - 'coordinator, services, service', 'accountant, assistant, county', - 'supervisor, supervisory, sergeant', - 'craftsworker, customer, public', 'sheriff, deputy, autobody', - 'enforcement, permitting, inspector', - 'correctional, correction, records', - 'information, technology, recreation', - 'administrative, principal, executive', - 'crossing, purchasing, engineer', 'warehouse, welfare, driver', - 'corporal, erp, behavioral', 'worker, social, leader', - 'librarian, library, telephone', - 'equipment, investment, investigator'], dtype=object) + 'liquor, clerk, store', 'librarian, crossing, library', + 'sheriff, deputy, autobody', + 'information, technology, technologist', + 'lieutenant, attendant, facilities', 'services, service, urban', + 'assistance, assistant, income', + 'correctional, correction, corporal', + 'administrative, administration, administrator'], dtype=object) @@ -559,7 +560,7 @@ Let's look at the cross-validated R2 score of our model: .. code-block:: none - R2 score: mean: 0.922; std: 0.013 + R2 score: mean: 0.919; std: 0.017 @@ -695,7 +696,7 @@ to plot the feature importances. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (1 minutes 16.726 seconds) + **Total running time of the script:** (1 minutes 20.106 seconds) .. _sphx_glr_download_auto_examples_01_encodings.py: diff --git a/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt b/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt index 5244a0a35..b233532e7 100644 --- a/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt +++ b/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt @@ -504,7 +504,7 @@ as a set of latent topics. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.903 seconds) + **Total running time of the script:** (0 minutes 1.818 seconds) .. _sphx_glr_download_auto_examples_02_feature_interpretation_with_gapencoder.py: diff --git a/dev/_sources/auto_examples/03_datetime_encoder.rst.txt b/dev/_sources/auto_examples/03_datetime_encoder.rst.txt index 106bb51e9..a3029da7e 100644 --- a/dev/_sources/auto_examples/03_datetime_encoder.rst.txt +++ b/dev/_sources/auto_examples/03_datetime_encoder.rst.txt @@ -610,7 +610,7 @@ and transforms datetime columns by default. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.509 seconds) + **Total running time of the script:** (0 minutes 4.276 seconds) .. _sphx_glr_download_auto_examples_03_datetime_encoder.py: diff --git a/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt b/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt index bc8356af6..85eb82281 100644 --- a/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt +++ b/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt @@ -1711,7 +1711,7 @@ introduced into a grid search: .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 19.485 seconds) + **Total running time of the script:** (0 minutes 20.298 seconds) .. _sphx_glr_download_auto_examples_04_fuzzy_joining.py: diff --git a/dev/_sources/auto_examples/05_deduplication.rst.txt b/dev/_sources/auto_examples/05_deduplication.rst.txt index e1647d9a4..4b304247b 100644 --- a/dev/_sources/auto_examples/05_deduplication.rst.txt +++ b/dev/_sources/auto_examples/05_deduplication.rst.txt @@ -335,7 +335,7 @@ or |MinHash|. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.891 seconds) + **Total running time of the script:** (0 minutes 5.398 seconds) .. _sphx_glr_download_auto_examples_05_deduplication.py: diff --git a/dev/_sources/auto_examples/06_ken_embeddings.rst.txt b/dev/_sources/auto_examples/06_ken_embeddings.rst.txt index d7c5deb74..a6510c37a 100644 --- a/dev/_sources/auto_examples/06_ken_embeddings.rst.txt +++ b/dev/_sources/auto_examples/06_ken_embeddings.rst.txt @@ -305,7 +305,7 @@ We will start by checking out the available tables with .. code-block:: none - {'games', 'all_entities', 'companies', 'movies', 'albums', 'schools'} + {'games', 'all_entities', 'movies', 'schools', 'companies', 'albums'} @@ -840,7 +840,7 @@ It helped significantly improve the prediction score. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 15.721 seconds) + **Total running time of the script:** (11 minutes 31.752 seconds) .. _sphx_glr_download_auto_examples_06_ken_embeddings.py: diff --git a/dev/_sources/auto_examples/07_multiple_key_join.rst.txt b/dev/_sources/auto_examples/07_multiple_key_join.rst.txt index c74538736..d88d0a20e 100644 --- a/dev/_sources/auto_examples/07_multiple_key_join.rst.txt +++ b/dev/_sources/auto_examples/07_multiple_key_join.rst.txt @@ -1225,7 +1225,7 @@ The results: /home/circleci/project/miniconda/envs/testenv/lib/python3.10/site-packages/sklearn/preprocessing/_encoders.py:228: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros warnings.warn( - 0.58915 + 0.58585 @@ -1243,7 +1243,7 @@ Our final cross-validated accuracy score is 0.58. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (10 minutes 53.468 seconds) + **Total running time of the script:** (12 minutes 0.348 seconds) .. _sphx_glr_download_auto_examples_07_multiple_key_join.py: diff --git a/dev/_sources/auto_examples/08_join_aggregation.rst.txt b/dev/_sources/auto_examples/08_join_aggregation.rst.txt index 035284f67..29a896783 100644 --- a/dev/_sources/auto_examples/08_join_aggregation.rst.txt +++ b/dev/_sources/auto_examples/08_join_aggregation.rst.txt @@ -844,75 +844,75 @@ operation maximizing our validation score.
tv.named_transformers_["high_cardinality"].get_feature_names_out()
array(['supports, support, recruit', 'shelters, centers, sheriff',
- 'rockville, fallsgrove, downtown', 'animal, individual, virtual',
- 'engineering, mangement, budgeting', 'station, state, estate',
- 'facilities, maintenance, accounts', 'silver, spring, ride',
- 'behavioral, health, school', 'equipment, management, automotive',
- 'patrol, 4th, 6th', 'safety, collision, section',
- 'communications, communication, telecommunications',
- 'warehouse, delivery, operations', 'highway, welfare, services',
- 'gaithersburg, the, clarksburg', 'nicholson, transit, taxicab',
- 'development, planning, stormwater',
- 'assessment, protective, process',
- 'construction, instruction, building',
- 'mechanical, special, commercial',
- 'technology, systems, telephone', 'district, urban, 3rd',
- 'family, sexual, crimes', 'security, mccf, unit',
- 'custody, members, customer',
+array(['services, highway, service', 'engineering, training, planning',
+ 'traffic, safety, alcohol', 'sexual, family, crimes',
+ 'district, payroll, squad', 'construction, instruction, building',
+ 'silver, spring, ride', 'behavioral, health, school',
+ 'delivery, operations, special', 'gaithersburg, clarksburg, the',
+ 'welfare, childhood, children', 'toddlers, custody, members',
+ 'supports, support, network', 'station, state, estate',
+ 'director, officers, projects', 'management, fleet, parking',
+ 'maintenance, facilities, eligibility',
+ 'processing, programs, accounting', 'security, mc311, mccf',
+ 'communications, communication, immunization',
+ 'administration, battalion, administrative', 'patrol, 4th, 6th',
+ 'procurement, protective, fiscal',
+ 'development, residential, stormwater',
'investigative, investigations, criminal',
- 'administration, administrative, battalion',
- 'assistance, eligibility, emergency', 'automated, traffic, office',
- 'master, registered, firefighter', 'manager, budget, projects',
- 'specialist, special, environmental', 'candidate, police, cadet',
- 'officer, office, traffic', 'operator, bus, operations',
- 'income, assistance, client', 'liquor, clerk, store',
- 'rescuer, recruit, firefighter', 'technician, mechanic, supply',
- 'program, programs, procurement', 'lieutenant, captain, chief',
- 'community, nurse, unit', 'school, health, room',
+ 'emergency, centers, center', 'nicholson, transit, taxicab',
+ 'automotive, assessment, equipment',
+ 'technology, systems, telephone', 'rockville, twinbrook, library',
+ 'dietary, security, partnerships',
+ 'operator, equipment, apprentice',
+ 'recreation, planning, renovation',
+ 'master, registered, firefighter', 'candidate, police, of',
+ 'officer, office, police', 'specialist, special, procurement',
+ 'program, programs, projects', 'technician, mechanic, supply',
+ 'legislative, principal, executive',
+ 'coordinator, coordinating, depot', 'sergeant, cadet, police',
+ 'recruit, firefighter, rescuer', 'community, health, nurse',
+ 'warehouse, welfare, caseworker',
+ 'enforcement, permitting, inspector', 'manager, budget, engineer',
+ 'school, room, behavioral', 'captain, rescue, chief',
+ 'supervisory, supervisor, therapist',
'communications, telecommunications, safety',
- 'coordinator, services, service', 'accountant, assistant, county',
- 'supervisor, supervisory, sergeant',
- 'craftsworker, customer, public', 'sheriff, deputy, autobody',
- 'enforcement, permitting, inspector',
- 'correctional, correction, records',
- 'information, technology, recreation',
- 'administrative, principal, executive',
- 'crossing, purchasing, engineer', 'warehouse, welfare, driver',
- 'corporal, erp, behavioral', 'worker, social, leader',
- 'librarian, library, telephone',
- 'equipment, investment, investigator'], dtype=object)
+ 'liquor, clerk, store', 'librarian, crossing, library',
+ 'sheriff, deputy, autobody',
+ 'information, technology, technologist',
+ 'lieutenant, attendant, facilities', 'services, service, urban',
+ 'assistance, assistant, income',
+ 'correctional, correction, corporal',
+ 'administrative, administration, administrator'], dtype=object)
R2 score: mean: 0.922; std: 0.013
+R2 score: mean: 0.919; std: 0.017
The simple pipeline applied on this complex dataset gave us very good results.
@@ -936,7 +937,7 @@ ConclusionTotal running time of the script: (1 minutes 16.726 seconds)
+Total running time of the script: (1 minutes 20.106 seconds)