diff --git a/dev/.buildinfo b/dev/.buildinfo index 086b5254..d0fd411c 100644 --- a/dev/.buildinfo +++ b/dev/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 0419996a16f2a3c75464ed74042897b9 +config: 0630c51683edc6ca083cc9b7428c67b6 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/dev/_images/sphx_glr_01_encodings_001.png b/dev/_images/sphx_glr_01_encodings_001.png index 65d771a1..1aa55b3d 100644 Binary files a/dev/_images/sphx_glr_01_encodings_001.png and b/dev/_images/sphx_glr_01_encodings_001.png differ diff --git a/dev/_images/sphx_glr_01_encodings_thumb.png b/dev/_images/sphx_glr_01_encodings_thumb.png index 203ecae4..fa6cceb5 100644 Binary files a/dev/_images/sphx_glr_01_encodings_thumb.png and b/dev/_images/sphx_glr_01_encodings_thumb.png differ diff --git a/dev/_images/sphx_glr_08_join_aggregation_003.png b/dev/_images/sphx_glr_08_join_aggregation_003.png index 36100ac6..b3dc3988 100644 Binary files a/dev/_images/sphx_glr_08_join_aggregation_003.png and b/dev/_images/sphx_glr_08_join_aggregation_003.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_001.png b/dev/_images/sphx_glr_09_interpolation_join_001.png index b7fb6a23..6360edce 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_001.png and b/dev/_images/sphx_glr_09_interpolation_join_001.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_002.png b/dev/_images/sphx_glr_09_interpolation_join_002.png index 4b4d9870..cbab44ef 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_002.png and b/dev/_images/sphx_glr_09_interpolation_join_002.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_003.png b/dev/_images/sphx_glr_09_interpolation_join_003.png index 7d56525b..a5d928ef 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_003.png and b/dev/_images/sphx_glr_09_interpolation_join_003.png differ diff --git a/dev/_images/sphx_glr_09_interpolation_join_thumb.png b/dev/_images/sphx_glr_09_interpolation_join_thumb.png index daed39ef..768d05f3 100644 Binary files a/dev/_images/sphx_glr_09_interpolation_join_thumb.png and b/dev/_images/sphx_glr_09_interpolation_join_thumb.png differ diff --git a/dev/_sources/auto_examples/01_encodings.rst.txt b/dev/_sources/auto_examples/01_encodings.rst.txt index a7acdb1a..c16be2ab 100644 --- a/dev/_sources/auto_examples/01_encodings.rst.txt +++ b/dev/_sources/auto_examples/01_encodings.rst.txt @@ -440,45 +440,44 @@ corresponding columns: .. code-block:: none - array(['station, state, estate', 'services, highway, service', - 'investigative, investigations, criminal', - 'nicholson, transit, taxicab', - 'communications, division, applications', 'silver, spring, ride', - 'training, building, recruit', 'gaithersburg, clarksburg, the', - 'patrol, 4th, 6th', 'director, automated, office', - 'programs, program, preparedness', - 'rockville, twinbrook, downtown', - 'eligibility, assistance, assisted', 'safety, section, collision', - 'management, equipment, automotive', 'security, mc311, mccf', + array(['gaithersburg, clarksburg, the', 'supports, support, sports', + 'district, patrol, 3rd', 'telephone, automated, stormwater', + 'special, section, labor', 'environmental, regulatory, behavioral', + 'welfare, childhood, child', 'maintenance, facilities, finance', + 'services, highway, service', 'station, state, estate', + 'traffic, safety, alcohol', 'security, custody, mcdc', + 'silver, spring, urban', 'family, crimes, major', + 'training, recruit, recruiting', + 'technology, systems, information', + 'building, construction, instruction', + 'nicholson, transit, transport', + 'communications, communication, division', + 'emergency, centers, center', 'delivery, warehouse, liquor', + 'management, mangement, engineering', 'administration, administrative, battalion', - 'behavioral, health, school', 'welfare, children, childhood', - 'protective, procurement, project', - 'development, government, stormwater', 'supports, support, sports', - 'family, animal, robbery', 'emergency, commuter, duplicating', - 'custody, toddlers, mcdc', 'district, urban, 3rd', - 'technology, systems, telephone', 'council, centers, members', - 'delivery, special, operations', - 'maintenance, facilities, finance', 'captain, chief, autobody', - 'liquor, clerk, store', 'officer, office, police', - 'master, registered, water', 'operator, bus, operations', - 'administrative, legislative, principal', - 'technician, mechanic, supply', 'manager, budget, engineer', - 'recreation, renovation, resource', 'school, room, behavioral', - 'coordinator, transit, coordinating', - 'enforcement, permitting, inspector', - 'information, technology, technologist', - 'assistance, income, client', 'therapist, sheriff, plumber', + 'investigative, explosive, investigations', + 'eligibility, assistance, assisted', 'health, school, based', + 'fleet, animal, bureau', 'downtown, rockville, library', + 'accounts, toddlers, council', 'protective, programs, program', + 'officer, office, traffic', 'warehouse, welfare, caseworker', 'firefighter, rescuer, recruit', - 'correctional, correction, regional', - 'accountant, assistant, library', + 'librarian, candidate, psychiatric', 'income, assistance, client', + 'coordinator, services, service', 'manager, iii, management', + 'equipment, investment, investigator', 'operator, bus, operations', + 'specialist, special, quality', + 'enforcement, inspector, permitting', + 'technician, mechanic, supply', 'communications, telecommunications, safety', - 'services, service, aide', 'community, health, nurse', - 'sergeant, cadet, emergency', 'craftsworker, worker, social', - 'specialist, special, procurement', - 'crossing, purchasing, planning', 'warehouse, welfare, driver', - 'corporal, erp, behavioral', 'program, programs, projects', - 'equipment, investment, investigator', - 'lieutenant, attendant, shift'], dtype=object) + 'crossing, parking, guard', + 'administrative, legislative, principal', + 'correctional, correction, corporal', 'school, room, behavioral', + 'community, nurse, health', 'liquor, clerk, store', + 'lieutenant, maintenance, client', 'sheriff, deputy, aide', + 'accountant, assistant, library', 'sergeant, police, cadet', + 'captain, chief, autobody', 'supervisor, supervisory, transit', + 'program, programs, projects', 'environmental, therapist, budget', + 'master, registered, meter', 'information, technology, renovation', + 'planning, senior, background'], dtype=object) @@ -560,7 +559,7 @@ Let's look at the cross-validated R2 score of our model: .. code-block:: none - R2 score: mean: 0.921; std: 0.016 + R2 score: mean: 0.923; std: 0.013 @@ -696,7 +695,7 @@ to plot the feature importances. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (1 minutes 19.568 seconds) + **Total running time of the script:** (1 minutes 8.670 seconds) .. _sphx_glr_download_auto_examples_01_encodings.py: diff --git a/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt b/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt index 694892af..36c38497 100644 --- a/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt +++ b/dev/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt @@ -504,7 +504,7 @@ as a set of latent topics. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 2.041 seconds) + **Total running time of the script:** (0 minutes 1.648 seconds) .. _sphx_glr_download_auto_examples_02_feature_interpretation_with_gapencoder.py: diff --git a/dev/_sources/auto_examples/03_datetime_encoder.rst.txt b/dev/_sources/auto_examples/03_datetime_encoder.rst.txt index d7276e52..9707ebad 100644 --- a/dev/_sources/auto_examples/03_datetime_encoder.rst.txt +++ b/dev/_sources/auto_examples/03_datetime_encoder.rst.txt @@ -610,7 +610,7 @@ and transforms datetime columns by default. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.948 seconds) + **Total running time of the script:** (0 minutes 4.149 seconds) .. _sphx_glr_download_auto_examples_03_datetime_encoder.py: diff --git a/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt b/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt index dab97550..142db368 100644 --- a/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt +++ b/dev/_sources/auto_examples/04_fuzzy_joining.rst.txt @@ -1711,7 +1711,7 @@ introduced into a grid search: .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 19.674 seconds) + **Total running time of the script:** (0 minutes 17.135 seconds) .. _sphx_glr_download_auto_examples_04_fuzzy_joining.py: diff --git a/dev/_sources/auto_examples/05_deduplication.rst.txt b/dev/_sources/auto_examples/05_deduplication.rst.txt index 91a2ccc9..6b32cfbd 100644 --- a/dev/_sources/auto_examples/05_deduplication.rst.txt +++ b/dev/_sources/auto_examples/05_deduplication.rst.txt @@ -335,7 +335,7 @@ or |MinHash|. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.680 seconds) + **Total running time of the script:** (0 minutes 4.909 seconds) .. _sphx_glr_download_auto_examples_05_deduplication.py: diff --git a/dev/_sources/auto_examples/06_ken_embeddings.rst.txt b/dev/_sources/auto_examples/06_ken_embeddings.rst.txt index 9ff2206f..a326a489 100644 --- a/dev/_sources/auto_examples/06_ken_embeddings.rst.txt +++ b/dev/_sources/auto_examples/06_ken_embeddings.rst.txt @@ -305,7 +305,7 @@ We will start by checking out the available tables with .. code-block:: none - {'companies', 'games', 'all_entities', 'albums', 'schools', 'movies'} + {'all_entities', 'companies', 'movies', 'albums', 'games', 'schools'} @@ -840,7 +840,7 @@ It helped significantly improve the prediction score. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 7.838 seconds) + **Total running time of the script:** (10 minutes 30.856 seconds) .. _sphx_glr_download_auto_examples_06_ken_embeddings.py: diff --git a/dev/_sources/auto_examples/07_multiple_key_join.rst.txt b/dev/_sources/auto_examples/07_multiple_key_join.rst.txt index 71dacf7c..4897f89a 100644 --- a/dev/_sources/auto_examples/07_multiple_key_join.rst.txt +++ b/dev/_sources/auto_examples/07_multiple_key_join.rst.txt @@ -1226,7 +1226,7 @@ The results: /home/circleci/project/miniconda/envs/testenv/lib/python3.10/site-packages/sklearn/preprocessing/_encoders.py:228: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros warnings.warn( - 0.58995 + 0.5878500000000001 @@ -1244,7 +1244,7 @@ Our final cross-validated accuracy score is 0.58. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 29.384 seconds) + **Total running time of the script:** (10 minutes 56.053 seconds) .. _sphx_glr_download_auto_examples_07_multiple_key_join.py: diff --git a/dev/_sources/auto_examples/08_join_aggregation.rst.txt b/dev/_sources/auto_examples/08_join_aggregation.rst.txt index c66ab1fe..8ba3cd56 100644 --- a/dev/_sources/auto_examples/08_join_aggregation.rst.txt +++ b/dev/_sources/auto_examples/08_join_aggregation.rst.txt @@ -844,75 +844,75 @@ operation maximizing our validation score.
tv.named_transformers_["high_cardinality"].get_feature_names_out()
array(['station, state, estate', 'services, highway, service',
- 'investigative, investigations, criminal',
- 'nicholson, transit, taxicab',
- 'communications, division, applications', 'silver, spring, ride',
- 'training, building, recruit', 'gaithersburg, clarksburg, the',
- 'patrol, 4th, 6th', 'director, automated, office',
- 'programs, program, preparedness',
- 'rockville, twinbrook, downtown',
- 'eligibility, assistance, assisted', 'safety, section, collision',
- 'management, equipment, automotive', 'security, mc311, mccf',
+array(['gaithersburg, clarksburg, the', 'supports, support, sports',
+ 'district, patrol, 3rd', 'telephone, automated, stormwater',
+ 'special, section, labor', 'environmental, regulatory, behavioral',
+ 'welfare, childhood, child', 'maintenance, facilities, finance',
+ 'services, highway, service', 'station, state, estate',
+ 'traffic, safety, alcohol', 'security, custody, mcdc',
+ 'silver, spring, urban', 'family, crimes, major',
+ 'training, recruit, recruiting',
+ 'technology, systems, information',
+ 'building, construction, instruction',
+ 'nicholson, transit, transport',
+ 'communications, communication, division',
+ 'emergency, centers, center', 'delivery, warehouse, liquor',
+ 'management, mangement, engineering',
'administration, administrative, battalion',
- 'behavioral, health, school', 'welfare, children, childhood',
- 'protective, procurement, project',
- 'development, government, stormwater', 'supports, support, sports',
- 'family, animal, robbery', 'emergency, commuter, duplicating',
- 'custody, toddlers, mcdc', 'district, urban, 3rd',
- 'technology, systems, telephone', 'council, centers, members',
- 'delivery, special, operations',
- 'maintenance, facilities, finance', 'captain, chief, autobody',
- 'liquor, clerk, store', 'officer, office, police',
- 'master, registered, water', 'operator, bus, operations',
- 'administrative, legislative, principal',
- 'technician, mechanic, supply', 'manager, budget, engineer',
- 'recreation, renovation, resource', 'school, room, behavioral',
- 'coordinator, transit, coordinating',
- 'enforcement, permitting, inspector',
- 'information, technology, technologist',
- 'assistance, income, client', 'therapist, sheriff, plumber',
+ 'investigative, explosive, investigations',
+ 'eligibility, assistance, assisted', 'health, school, based',
+ 'fleet, animal, bureau', 'downtown, rockville, library',
+ 'accounts, toddlers, council', 'protective, programs, program',
+ 'officer, office, traffic', 'warehouse, welfare, caseworker',
'firefighter, rescuer, recruit',
- 'correctional, correction, regional',
- 'accountant, assistant, library',
+ 'librarian, candidate, psychiatric', 'income, assistance, client',
+ 'coordinator, services, service', 'manager, iii, management',
+ 'equipment, investment, investigator', 'operator, bus, operations',
+ 'specialist, special, quality',
+ 'enforcement, inspector, permitting',
+ 'technician, mechanic, supply',
'communications, telecommunications, safety',
- 'services, service, aide', 'community, health, nurse',
- 'sergeant, cadet, emergency', 'craftsworker, worker, social',
- 'specialist, special, procurement',
- 'crossing, purchasing, planning', 'warehouse, welfare, driver',
- 'corporal, erp, behavioral', 'program, programs, projects',
- 'equipment, investment, investigator',
- 'lieutenant, attendant, shift'], dtype=object)
+ 'crossing, parking, guard',
+ 'administrative, legislative, principal',
+ 'correctional, correction, corporal', 'school, room, behavioral',
+ 'community, nurse, health', 'liquor, clerk, store',
+ 'lieutenant, maintenance, client', 'sheriff, deputy, aide',
+ 'accountant, assistant, library', 'sergeant, police, cadet',
+ 'captain, chief, autobody', 'supervisor, supervisory, transit',
+ 'program, programs, projects', 'environmental, therapist, budget',
+ 'master, registered, meter', 'information, technology, renovation',
+ 'planning, senior, background'], dtype=object)
R2 score: mean: 0.921; std: 0.016
+R2 score: mean: 0.923; std: 0.013
The simple pipeline applied on this complex dataset gave us very good results.
@@ -937,7 +936,7 @@ ConclusionTotal running time of the script: (1 minutes 19.568 seconds)
+Total running time of the script: (1 minutes 8.670 seconds)