diff --git a/output/quality_linking.pdf b/output/quality_linking.pdf index 5c0105e..9401ad1 100644 Binary files a/output/quality_linking.pdf and b/output/quality_linking.pdf differ diff --git a/output/quality_linking_advisors.pdf b/output/quality_linking_advisors.pdf index e63c8da..8b37470 100644 Binary files a/output/quality_linking_advisors.pdf and b/output/quality_linking_advisors.pdf differ diff --git a/src/dataprep/main/link/advisors.sh b/src/dataprep/main/link/advisors.sh index b31db93..b174c9e 100644 --- a/src/dataprep/main/link/advisors.sh +++ b/src/dataprep/main/link/advisors.sh @@ -7,25 +7,68 @@ keywords=False fieldofstudy_cat=False fieldofstudy_str=False institution=True +fields=("art" + "biology" + "business" + "chemistry" + "computer science" + "economics" + "engineering" + "environmental science" + "geography" + "geology" + "history" + "materials science" + "mathematics" + "medicine" + "philosophy" + "physics" + "political science" + "psychology" + "sociology") -fields=("chemistry" - "sociology" - "mathematics" - "biology" - "computer science" - "political science" - "engineering" - "psychology" - "environmental science" - "physics" - "geology" - "geography" - "economics") -fields=("political science") -for i in "${!fields[@]}"; do +for i in "${!fields[@]}"; do field=${fields[$i]} echo ${field} screen -dmS "advisors.${field}" sh main/link/link_onefield_advisors.sh $RECALL "$field" $train_name $institution $fieldofstudy_cat $fieldofstudy_str $keywords $logfile_path echo "Started screen ..." done +wait + + + +# start_field_i() { +# fields=("art" +# "biology" +# "business" +# "chemistry" +# "computer science" +# "economics" +# "engineering" +# "environmental science" +# "geography" +# "geology" +# "history" +# "materials science" +# "mathematics" +# "medicine" +# "philosophy" +# "physics" +# "political science" +# "psychology" +# "sociology") +# field=${fields[$1]} +# echo $1 +# echo ${field} +# screen -dmS "advisors.${field}" sh main/link/link_onefield_advisors.sh $RECALL "$field" $train_name $institution $fieldofstudy_cat $fieldofstudy_str $keywords $logfile_path & +# while screen -list | grep -q $"advisors.${field}" +# do +# sleep 1 +# done +# echo "Started screen ..." +# wait +# } +# export -f start_field_i +# parallel -j 3 start_field_i ::: $(seq 0 18) + diff --git a/src/dataprep/main/link/graduates.sh b/src/dataprep/main/link/graduates.sh index 5fdee33..91013bd 100644 --- a/src/dataprep/main/link/graduates.sh +++ b/src/dataprep/main/link/graduates.sh @@ -21,7 +21,17 @@ fields=("chemistry" "geology" "geography" "economics") - + +fields=("art" + "business" + "history" + "materials science" + "medicine" + "philosophy" + ) + fields=("philosophy" + ) + for i in "${!fields[@]}"; do field=${fields[$i]} diff --git a/src/dataprep/main/link/prep_linked_data.py b/src/dataprep/main/link/prep_linked_data.py index 2e58946..97c5229 100644 --- a/src/dataprep/main/link/prep_linked_data.py +++ b/src/dataprep/main/link/prep_linked_data.py @@ -31,7 +31,6 @@ import pdb import argparse -# ## Arguments # ## Arguments parser = argparse.ArgumentParser(description = 'Inputs for author_collab') parser.add_argument("--filter_trainname", @@ -145,7 +144,7 @@ # for now, do not condition on certain time distance between # graduation year and whenever the supervisor has a publication. -# TODO: do this after gaining some insights in the analysis +# Do this on the fly after gaining some insights in the analysis con.execute("CREATE UNIQUE INDEX idx_cla_AuthorIdrelid ON current_links_advisors (AuthorId ASC, relationship_id ASC)") con.execute("CREATE UNIQUE INDEX idx_cla_relid ON current_links_advisors (relationship_id ASC)") # this is also a way to make sure there are not multiple links per goid diff --git a/src/dataprep/main/link/setup_linking.py b/src/dataprep/main/link/setup_linking.py index 5701650..815a68e 100644 --- a/src/dataprep/main/link/setup_linking.py +++ b/src/dataprep/main/link/setup_linking.py @@ -34,7 +34,7 @@ def __eq__(self, other): # ## Some settings pd.set_option('display.max.columns', None) -path_dedupe_files = datapath + "DedupeFiles/flavio/issue-21/" # TODO: this needs to be fixed at the end and any new files copied to DedupeFiles/advisors +path_dedupe_files = datapath + "DedupeFiles/" share_blockedpairs_training = 0.66 # fraction of similar pairs as opposed to random pairs # register [adapter for numpy.int64](https://stackoverflow.com/questions/38753737/inserting-numpy-integer-types-into-sqlite-with-python3) diff --git a/src/dataprep/main/reports/quality_linking.Rmd b/src/dataprep/main/reports/quality_linking.Rmd index ac93083..17fc99e 100644 --- a/src/dataprep/main/reports/quality_linking.Rmd +++ b/src/dataprep/main/reports/quality_linking.Rmd @@ -22,10 +22,26 @@ lapply(packages, library, character.only = TRUE) datapath <- "/mnt/ssd/" db_file <- paste0(datapath, "AcademicGraph/AcademicGraph.sqlite") -select_fields <- c("physics", "biology", "chemistry", "sociology", - "economics", "political science", "psychology", - "mathematics", "geography", "geology", "engineering", - "computer science", "environmental science") # fields currently matched +select_fields <- c("art", + "biology", + "business", + "chemistry", + "computer science" , + "economics", + "engineering", + "environmental science", + "geography", + "geology" , + "history", + "materials science", + "mathematics", + "medicine", + "philosophy", + "physics", + "political science", + "psychology" , + "sociology") # all fields are currently matched + date_method_change <- ymd("2022-07-01") # after summer we extended the sampling period and added more features diff --git a/src/dataprep/main/reports/quality_linking_advisors.Rmd b/src/dataprep/main/reports/quality_linking_advisors.Rmd index ef09639..2162b5f 100644 --- a/src/dataprep/main/reports/quality_linking_advisors.Rmd +++ b/src/dataprep/main/reports/quality_linking_advisors.Rmd @@ -22,10 +22,25 @@ lapply(packages, library, character.only = TRUE) datapath <- "/mnt/ssd/" db_file <- paste0(datapath, "AcademicGraph/AcademicGraph.sqlite") -select_fields <- c("physics", "biology", "chemistry", "sociology", - "economics", "political science", "psychology", - "mathematics", "geography", "geology", "engineering", - "computer science", "environmental science") # fields currently matched +select_fields <- c("art", + "biology", + "business", + "chemistry", + "computer science" , + "economics", + "engineering", + "environmental science", + "geography", + "geology" , + "history", + "materials science", + "mathematics", + #"medicine", + "philosophy", + "physics", + "political science", + "psychology" , + "sociology") # fields currently matched # ## db connection @@ -137,10 +152,11 @@ linked_advisors %>% ```{r} -keep_fields <- c("biology", "chemistry", "computer science", - "economics", "engineering", "environmental science", - "geography", "geology", "mathetmatics", "physics", - "political science", "psychology", "sociology") +keep_fields <- select_fields +# c("biology", "chemistry", "computer science", +# "economics", "engineering", "environmental science", +# "geography", "geology", "mathetmatics", "physics", +# "political science", "psychology", "sociology") score_by_year <- theses %>% filter(degree_year >= 1985) %>% diff --git a/src/dataprep/pipeline.sh b/src/dataprep/pipeline.sh index 7a19d5e..f8c7ac5 100644 --- a/src/dataprep/pipeline.sh +++ b/src/dataprep/pipeline.sh @@ -83,6 +83,10 @@ Rscript -e "rmarkdown::render('$script_path/reports/sample_size_linking.Rmd', ou # ## 1. Link graduates to MAG bash $script_path/link/graduates.sh $logfile_path +# Christoph retrained with with the following options: +# --train_name "christoph_degree0" --keepyears "19852015" +# need to run the write_csv_links script with these options as well +# to get all links into db python -m $script_path.link.write_csv_links --linking_type "graduates" --train_name "christoph_fielddegree0" \ &> $logfile_path/write_csv_links_graduates.log diff --git a/src/dataprep/temp/createlink_mag_proquest_art_1:1_christoph_degree0_graduates_8515.log b/src/dataprep/temp/createlink_mag_proquest_art_1:1_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..42a83c5 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_art_1:1_christoph_degree0_graduates_8515.log @@ -0,0 +1,146 @@ +Namespace(testing=False, verbose=1, field=['art'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [142362112] and will be passed to sql queries. + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +reading from: /mnt/ssd/DedupeFiles/graduates/settings_art_1985_2015_institutionFalse_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsTruechristoph_degree0 +Link now ... +made pairs +calculated scores +made 1:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 53 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Found 7540 links for 86635 graduates with a score of at least 0. +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 5.894030499458313 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_art_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_art_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..6d355a0 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_art_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['art'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [142362112] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/flavio/issue-21/advisors/settings_art_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 8.115657456715901 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_biology_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_biology_m:1_christoph_degree0_advisors_9015.log index c6ebfd3..5f62025 100644 --- a/src/dataprep/temp/createlink_mag_proquest_biology_m:1_christoph_degree0_advisors_9015.log +++ b/src/dataprep/temp/createlink_mag_proquest_biology_m:1_christoph_degree0_advisors_9015.log @@ -150,4 +150,4 @@ Running ANALYZE... Copying to csv... Done copying to csv... Deleted the temporary database... -Done in 727.9574162801107 minutes. +Done in 688.186221853892 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_business_1:1_christoph_degree0_graduates_8515.log b/src/dataprep/temp/createlink_mag_proquest_business_1:1_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..c96824d --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_business_1:1_christoph_degree0_graduates_8515.log @@ -0,0 +1,146 @@ +Namespace(testing=False, verbose=1, field=['business'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [144133560] and will be passed to sql queries. + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +reading from: /mnt/ssd/DedupeFiles/graduates/settings_business_1985_2015_institutionFalse_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsTruechristoph_degree0 +Link now ... +made pairs +calculated scores +made 1:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 53 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Found 11032 links for 61775 graduates with a score of at least 0. +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 23.512397162119548 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_business_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_business_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..0e6741e --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_business_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['business'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [144133560] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/flavio/issue-21/advisors/settings_business_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 27.697984190781913 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_chemistry_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_chemistry_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..dfee416 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_chemistry_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['chemistry'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [185592680] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_chemistry_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 592.248101190726 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_computer science_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_computer science_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..8eb8bd2 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_computer science_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['computer science'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [41008148] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_computer_science_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 378.68625177542367 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_economics_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_economics_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..eaf8b95 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_economics_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['economics'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [162324750] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_economics_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 6.179265709718068 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_engineering_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_engineering_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..af4f1ff --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_engineering_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['engineering'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [127413603] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_engineering_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 150.64668984810513 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_environmental science_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_environmental science_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..b9906d2 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_environmental science_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['environmental science'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [39432304] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_environmental_science_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 51.86260715325673 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_geography_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_geography_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..fb5c837 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_geography_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['geography'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [205649164] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_geography_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 52.296547615528105 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_geology_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_geology_m:1_christoph_degree0_advisors_9015.log index 4705d2c..f3c6cf5 100644 --- a/src/dataprep/temp/createlink_mag_proquest_geology_m:1_christoph_degree0_advisors_9015.log +++ b/src/dataprep/temp/createlink_mag_proquest_geology_m:1_christoph_degree0_advisors_9015.log @@ -133,7 +133,7 @@ id_field is [127313418] and will be passed to sql queries. WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL -reading from: /mnt/ssd/DedupeFiles/flavio/issue-21/advisors/settings_geology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +reading from: /mnt/ssd/DedupeFiles/advisors/settings_geology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 Link now ... made pairs calculated scores @@ -141,7 +141,7 @@ made m:1 links Writing to database... Filling table info... Filled table info... -Iteration id is 79 +Iteration id is 81 Filling links into db... Filled links into db... Wrote linking info into db... @@ -150,4 +150,4 @@ Running ANALYZE... Copying to csv... Done copying to csv... Deleted the temporary database... -Done in 8.049215114116668 minutes. +Done in 18.573138852914173 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_history_1:1_christoph_degree0_graduates_8515.log b/src/dataprep/temp/createlink_mag_proquest_history_1:1_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..11aae58 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_history_1:1_christoph_degree0_graduates_8515.log @@ -0,0 +1,146 @@ +Namespace(testing=False, verbose=1, field=['history'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [95457728] and will be passed to sql queries. + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +reading from: /mnt/ssd/DedupeFiles/graduates/settings_history_1985_2015_institutionFalse_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsTruechristoph_degree0 +Link now ... +made pairs +calculated scores +made 1:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 53 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Found 8887 links for 65268 graduates with a score of at least 0. +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 5.847857069969177 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_history_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_history_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..b3d99f6 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_history_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['history'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [95457728] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_history_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 9.996124601364135 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_materials science_1:1_christoph_degree0_graduates_8515.log b/src/dataprep/temp/createlink_mag_proquest_materials science_1:1_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..b81de54 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_materials science_1:1_christoph_degree0_graduates_8515.log @@ -0,0 +1,146 @@ +Namespace(testing=False, verbose=1, field=['materials science'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [192562407] and will be passed to sql queries. + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +reading from: /mnt/ssd/DedupeFiles/graduates/settings_materials_science_1985_2015_institutionFalse_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsTruechristoph_degree0 +Link now ... +made pairs +calculated scores +made 1:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 53 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Found 18796 links for 54864 graduates with a score of at least 0. +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 169.59391787846883 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_materials science_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_materials science_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..e981837 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_materials science_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['materials science'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [192562407] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_materials_science_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 299.87566511631013 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_mathematics_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_mathematics_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..9326615 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_mathematics_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['mathematics'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [33923547] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_mathematics_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 164.0216916879018 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_medicine_1:1_christoph_degree0_graduates_8515.log b/src/dataprep/temp/createlink_mag_proquest_medicine_1:1_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..cdcb20f --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_medicine_1:1_christoph_degree0_graduates_8515.log @@ -0,0 +1,146 @@ +Namespace(testing=False, verbose=1, field=['medicine'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [71924100] and will be passed to sql queries. + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +reading from: /mnt/ssd/DedupeFiles/graduates/settings_medicine_1985_2015_institutionFalse_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsTruechristoph_degree0 +Link now ... +made pairs +calculated scores +made 1:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 53 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Found 70719 links for 141642 graduates with a score of at least 0. +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 776.7002680619557 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_medicine_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_medicine_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..6307cbd --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_medicine_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,145 @@ +Namespace(testing=False, verbose=1, field=['medicine'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [71924100] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_medicine_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... diff --git a/src/dataprep/temp/createlink_mag_proquest_philosophy_1:1_christoph_degree0_graduates_8515.log b/src/dataprep/temp/createlink_mag_proquest_philosophy_1:1_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..89fb8c0 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_philosophy_1:1_christoph_degree0_graduates_8515.log @@ -0,0 +1,146 @@ +Namespace(testing=False, verbose=1, field=['philosophy'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [138885662] and will be passed to sql queries. + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +reading from: /mnt/ssd/DedupeFiles/graduates/settings_philosophy_1985_2015_institutionFalse_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsTruechristoph_degree0 +Link now ... +made pairs +calculated scores +made 1:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 53 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Found 3322 links for 36386 graduates with a score of at least 0. +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 2.202679904301961 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_philosophy_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_philosophy_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..e42779d --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_philosophy_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['philosophy'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [138885662] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_philosophy_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 4.286465350786845 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_physics_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_physics_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..596569e --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_physics_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['physics'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [121332964] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_physics_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 167.19190193414687 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_political science_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_political science_m:1_christoph_degree0_advisors_9015.log index 95389a2..6493974 100644 --- a/src/dataprep/temp/createlink_mag_proquest_political science_m:1_christoph_degree0_advisors_9015.log +++ b/src/dataprep/temp/createlink_mag_proquest_political science_m:1_christoph_degree0_advisors_9015.log @@ -133,7 +133,7 @@ id_field is [17744445] and will be passed to sql queries. WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL -reading from: /mnt/ssd/DedupeFiles/flavio/issue-21/advisors/settings_political_science_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +reading from: /mnt/ssd/DedupeFiles/advisors/settings_political_science_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 Link now ... made pairs calculated scores @@ -141,7 +141,7 @@ made m:1 links Writing to database... Filling table info... Filled table info... -Iteration id is 80 +Iteration id is 81 Filling links into db... Filled links into db... Wrote linking info into db... @@ -150,4 +150,4 @@ Running ANALYZE... Copying to csv... Done copying to csv... Deleted the temporary database... -Done in 24.474797221024833 minutes. +Done in 81.31027365525564 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_psychology_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_psychology_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..defdf79 --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_psychology_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['psychology'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [15744967] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_psychology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 282.53486650387447 minutes. diff --git a/src/dataprep/temp/createlink_mag_proquest_sociology_m:1_christoph_degree0_advisors_9015.log b/src/dataprep/temp/createlink_mag_proquest_sociology_m:1_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..1e07efa --- /dev/null +++ b/src/dataprep/temp/createlink_mag_proquest_sociology_m:1_christoph_degree0_advisors_9015.log @@ -0,0 +1,153 @@ +Namespace(testing=False, verbose=1, field=['sociology'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='csv') +Have max 12 cores available +Testing is False + +I set the write connection to temporary database. +id_field is [144024400] and will be passed to sql queries. + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +reading from: /mnt/ssd/DedupeFiles/advisors/settings_sociology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Link now ... +made pairs +calculated scores +made m:1 links +Writing to database... +Filling table info... +Filled table info... +Iteration id is 81 +Filling links into db... +Filled links into db... +Wrote linking info into db... +Running ANALYZE... + +Copying to csv... +Done copying to csv... +Deleted the temporary database... +Done in 34.99598572651545 minutes. diff --git a/src/dataprep/temp/prep_linked_data.log b/src/dataprep/temp/prep_linked_data.log index 92744be..7c15b15 100644 --- a/src/dataprep/temp/prep_linked_data.log +++ b/src/dataprep/temp/prep_linked_data.log @@ -1,4 +1,4 @@ -Start time: 1664470032.5248213 +Start time: 1670232570.3368495 Using the following DocTypes for citations: ('Journal', 'Book', 'BookChapter', 'Conference')... @@ -8,16 +8,16 @@ where_stmt_iterations is current_links for graduates current_links for advisors -Time elapsed: 0.33170623779296876 minutes +Time elapsed: 0.2818102161089579 minutes Making author_citations... -Time elapsed: 7.353962099552154 minutes +Time elapsed: 11.818827704588573 minutes Making author_output... -Time elapsed: 22.382010038693746 minutes +Time elapsed: 26.798788146177927 minutes Running ANALYZE... -Done in 22.38204313913981 minutes. +Done in 26.79902730782827 minutes. diff --git a/src/dataprep/temp/quality_linking_advisors.log b/src/dataprep/temp/quality_linking_advisors.log index 3d30881..a902cc0 100644 --- a/src/dataprep/temp/quality_linking_advisors.log +++ b/src/dataprep/temp/quality_linking_advisors.log @@ -1,3 +1,8 @@ +During startup - Warning messages: +1: Setting LC_TIME failed, using "C" +2: Setting LC_MONETARY failed, using "C" +3: Setting LC_PAPER failed, using "C" +4: Setting LC_MEASUREMENT failed, using "C" processing file: quality_linking_advisors.Rmd @@ -41,11 +46,43 @@ label: unnamed-chunk-4 | |............................. | 41% label: unnamed-chunk-5 +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_TIME = "sv_SE.UTF-8", + LC_MONETARY = "sv_SE.UTF-8", + LC_ADDRESS = "sv_SE.UTF-8", + LC_TELEPHONE = "sv_SE.UTF-8", + LC_NAME = "sv_SE.UTF-8", + LC_MEASUREMENT = "sv_SE.UTF-8", + LC_IDENTIFICATION = "sv_SE.UTF-8", + LC_NUMERIC = "sv_SE.UTF-8", + LC_PAPER = "sv_SE.UTF-8", + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to a fallback locale ("en_US.UTF-8"). | |............................... | 45% ordinary text without R code | |.................................. | 48% label: unnamed-chunk-6 +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_TIME = "sv_SE.UTF-8", + LC_MONETARY = "sv_SE.UTF-8", + LC_ADDRESS = "sv_SE.UTF-8", + LC_TELEPHONE = "sv_SE.UTF-8", + LC_NAME = "sv_SE.UTF-8", + LC_MEASUREMENT = "sv_SE.UTF-8", + LC_IDENTIFICATION = "sv_SE.UTF-8", + LC_NUMERIC = "sv_SE.UTF-8", + LC_PAPER = "sv_SE.UTF-8", + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to a fallback locale ("en_US.UTF-8"). | |.................................... | 52% ordinary text without R code @@ -56,6 +93,22 @@ label: unnamed-chunk-7 | |........................................... | 62% label: unnamed-chunk-8 +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_TIME = "sv_SE.UTF-8", + LC_MONETARY = "sv_SE.UTF-8", + LC_ADDRESS = "sv_SE.UTF-8", + LC_TELEPHONE = "sv_SE.UTF-8", + LC_NAME = "sv_SE.UTF-8", + LC_MEASUREMENT = "sv_SE.UTF-8", + LC_IDENTIFICATION = "sv_SE.UTF-8", + LC_NUMERIC = "sv_SE.UTF-8", + LC_PAPER = "sv_SE.UTF-8", + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to a fallback locale ("en_US.UTF-8"). | |.............................................. | 66% ordinary text without R code @@ -66,6 +119,22 @@ label: unnamed-chunk-9 | |..................................................... | 76% label: unnamed-chunk-10 +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_TIME = "sv_SE.UTF-8", + LC_MONETARY = "sv_SE.UTF-8", + LC_ADDRESS = "sv_SE.UTF-8", + LC_TELEPHONE = "sv_SE.UTF-8", + LC_NAME = "sv_SE.UTF-8", + LC_MEASUREMENT = "sv_SE.UTF-8", + LC_IDENTIFICATION = "sv_SE.UTF-8", + LC_NUMERIC = "sv_SE.UTF-8", + LC_PAPER = "sv_SE.UTF-8", + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to a fallback locale ("en_US.UTF-8"). | |........................................................ | 79% ordinary text without R code @@ -76,6 +145,22 @@ label: unnamed-chunk-11 | |............................................................... | 90% label: unnamed-chunk-12 +perl: warning: Setting locale failed. +perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_TIME = "sv_SE.UTF-8", + LC_MONETARY = "sv_SE.UTF-8", + LC_ADDRESS = "sv_SE.UTF-8", + LC_TELEPHONE = "sv_SE.UTF-8", + LC_NAME = "sv_SE.UTF-8", + LC_MEASUREMENT = "sv_SE.UTF-8", + LC_IDENTIFICATION = "sv_SE.UTF-8", + LC_NUMERIC = "sv_SE.UTF-8", + LC_PAPER = "sv_SE.UTF-8", + LANG = "en_US.UTF-8" + are supported and installed on your system. +perl: warning: Falling back to a fallback locale ("en_US.UTF-8"). | |................................................................. | 93% ordinary text without R code @@ -90,6 +175,6 @@ List of 1 output file: quality_linking_advisors.knit.md -/home/flavio/miniconda3/envs/science-career-tempenv/bin/pandoc +RTS -K512m -RTS quality_linking_advisors.knit.md --to latex --from markdown+autolink_bare_uris+tex_math_single_backslash --output /home/flavio/projects/mag_sample/output/quality_linking_advisors.tex --lua-filter /home/flavio/R/x86_64-pc-linux-gnu-library/4.1/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /home/flavio/R/x86_64-pc-linux-gnu-library/4.1/rmarkdown/rmarkdown/lua/latex-div.lua --embed-resources --standalone --table-of-contents --toc-depth 3 --highlight-style tango --pdf-engine pdflatex --variable graphics --variable 'geometry:margin=1in' +/home/christoph/anaconda3/envs/science-career-tempenv/bin/pandoc +RTS -K512m -RTS quality_linking_advisors.knit.md --to latex --from markdown+autolink_bare_uris+tex_math_single_backslash --output /home/christoph/mag_sample/output/quality_linking_advisors.tex --lua-filter /home/christoph/R/x86_64-pc-linux-gnu-library/4.1/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /home/christoph/R/x86_64-pc-linux-gnu-library/4.1/rmarkdown/rmarkdown/lua/latex-div.lua --embed-resources --standalone --table-of-contents --toc-depth 3 --highlight-style tango --pdf-engine pdflatex --variable graphics --variable 'geometry:margin=1in' -Output created: /home/flavio/projects/mag_sample/output/quality_linking_advisors.pdf +Output created: /home/christoph/mag_sample/output/quality_linking_advisors.pdf diff --git a/src/dataprep/temp/trainlink_mag_proquest_art_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_art_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..2bd43d5 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_art_christoph_degree0_advisors_9015.log @@ -0,0 +1,687 @@ +Namespace(testing=False, verbose=1, field=['art'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [142362112] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0009133100509643554 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 34.11048614184062 minutes + +Starting active labeling... +firstname : william +lastname : labov +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of pennsylvania'),) +all_us_institutions_year : ((1995, 'university of pennsylvania'),) + +firstname : william +lastname : labov +middlename : None +year_range : (1963, 2020) +main_us_institutions_year : ((1963, 'columbia university'), (1964, 'columbia university'), (1966, 'columbia university'), (1966, 'university of pennsylvania'), (1970, 'university of pennsylvania'), (1971, 'university of pennsylvania'), (1972, 'university of pennsylvania'), (1973, 'university of pennsylvania'), (1979, 'university of pennsylvania'), (1982, 'university of pennsylvania'), (1983, 'university of pennsylvania'), (1989, 'university of pennsylvania'), (1990, 'university of pennsylvania'), (1991, 'university of pennsylvania'), (1995, 'university of pennsylvania'), (1997, 'university of pennsylvania'), (2001, 'university of pennsylvania'), (2002, 'university of pennsylvania'), (2006, 'university of pennsylvania'), (2010, 'university of pennsylvania'), (2011, 'university of pennsylvania'), (2013, 'university of pennsylvania'), (2014, 'university of pennsylvania'), (2016, 'university of pennsylvania'), (2017, 'university of pennsylvania'), (2018, 'university of pennsylvania')) +all_us_institutions_year : ((1963, 'columbia university'), (1964, 'columbia university'), (1966, 'columbia university'), (1966, 'university of pennsylvania'), (1970, 'university of pennsylvania'), (1971, 'university of pennsylvania'), (1972, 'university of pennsylvania'), (1973, 'university of pennsylvania'), (1978, 'university of pennsylvania'), (1979, 'university of pennsylvania'), (1982, 'university of pennsylvania'), (1983, 'university of pennsylvania'), (1989, 'university of pennsylvania'), (1990, 'university of pennsylvania'), (1991, 'university of pennsylvania'), (1995, 'university of pennsylvania'), (1997, 'university of pennsylvania'), (2001, 'university of pennsylvania'), (2002, 'university of pennsylvania'), (2006, 'university of pennsylvania'), (2008, 'university of pennsylvania'), (2010, 'university of pennsylvania'), (2011, 'university of pennsylvania'), (2013, 'university of pennsylvania'), (2014, 'university of pennsylvania'), (2015, 'university of pennsylvania'), (2016, 'university of pennsylvania'), (2017, 'university of pennsylvania'), (2018, 'university of pennsylvania')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : michael +lastname : zank +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'boston university'),) +all_us_institutions_year : ((2000, 'boston university'),) + +firstname : michael +lastname : zank +middlename : None +year_range : (1996, 2019) +main_us_institutions_year : ((1996, 'boston university'), (1999, 'boston university'), (2000, 'boston university'), (2004, 'boston university'), (2008, 'boston university'), (2009, 'boston university'), (2010, 'boston university'), (2012, 'boston university'), (2019, 'boston university')) +all_us_institutions_year : ((1996, 'boston university'), (1999, 'boston university'), (2000, 'boston university'), (2004, 'boston university'), (2008, 'boston university'), (2009, 'boston university'), (2010, 'boston university'), (2012, 'boston university'), (2017, 'boston university'), (2019, 'boston university'), (2021, 'boston university')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : victoriano +lastname : roncerolopez +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'stony brook university'),) +all_us_institutions_year : ((2009, 'stony brook university'),) + +firstname : victoriano +lastname : roncero +middlename : None +year_range : (2014, 2018) +main_us_institutions_year : ((2014, 'stony brook university'), (2015, 'stony brook university'), (2018, 'stony brook university')) +all_us_institutions_year : ((2014, 'stony brook university'), (2015, 'stony brook university'), (2017, 'stony brook university'), (2018, 'stony brook university'), (2019, 'stony brook university')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : browne +middlename : dennis +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of minnesota'),) +all_us_institutions_year : ((1999, 'university of minnesota'),) + +firstname : michael +lastname : brown +middlename : None +year_range : (1983, 2006) +main_us_institutions_year : ((1983, 'texas state university'), (2002, 'texas state university'), (2003, 'texas state university'), (2004, 'texas state university'), (2005, 'texas state university'), (2006, 'texas state university')) +all_us_institutions_year : ((1983, 'texas state university'), (2002, 'texas state university'), (2003, 'texas state university'), (2004, 'texas state university'), (2005, 'san jose state university'), (2005, 'texas state university'), (2006, 'texas state university')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : everson +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of iowa'),) +all_us_institutions_year : ((2003, 'university of iowa'),) + +firstname : michael +lastname : iverson +middlename : None +year_range : (2007, 2021) +main_us_institutions_year : ((2007, 'university of iowa'), (2008, 'university of iowa'), (2009, 'university of iowa'), (2011, 'university of iowa'), (2016, 'indiana university'), (2017, 'indiana university'), (2019, 'indiana university'), (2020, 'indiana university'), (2021, 'indiana university')) +all_us_institutions_year : ((2007, 'university of iowa'), (2008, 'university of iowa'), (2009, 'university of iowa'), (2011, 'university of iowa'), (2016, 'indiana university'), (2017, 'indiana university'), (2019, 'indiana university'), (2020, 'indiana university'), (2021, 'indiana university')) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : cecelia +lastname : ford +middlename : e +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of wisconsin madison'),) +all_us_institutions_year : ((2002, 'university of wisconsin madison'),) + +firstname : c +lastname : forest +middlename : b +year_range : (1994, 2021) +main_us_institutions_year : ((1999, 'university of wisconsin madison'), (2000, 'university of wisconsin madison'), (2001, 'university of wisconsin madison'), (2002, 'university of wisconsin madison'), (2003, 'university of wisconsin madison'), (2005, 'university of wisconsin madison'), (2007, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2010, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2013, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison'), (2019, 'university of wisconsin madison')) +all_us_institutions_year : ((1999, 'university of wisconsin madison'), (2000, 'university of wisconsin madison'), (2001, 'university of wisconsin madison'), (2002, 'university of wisconsin madison'), (2003, 'university of wisconsin madison'), (2005, 'university of wisconsin madison'), (2007, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2010, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison'), (2013, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison'), (2019, 'university of wisconsin madison')) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : margie +lastname : sberns +middlename : None +year_range : (2001,) +main_us_institutions_year : ((2001, 'purdue university'),) +all_us_institutions_year : ((2001, 'purdue university'),) + +firstname : margie +lastname : berns +middlename : None +year_range : (1984, 2019) +main_us_institutions_year : ((1985, 'university of florida'), (1987, 'purdue university'), (1988, 'purdue university'), (1989, 'purdue university'), (1990, 'purdue university'), (1991, 'purdue university'), (1992, 'purdue university'), (1993, 'purdue university'), (1994, 'purdue university'), (1995, 'purdue university'), (1998, 'purdue university'), (1999, 'purdue university'), (2000, 'purdue university'), (2002, 'purdue university'), (2006, 'purdue university'), (2007, 'purdue university'), (2008, 'purdue university'), (2009, 'purdue university'), (2015, 'purdue university'), (2017, 'purdue university'), (2018, 'purdue university'), (2019, 'purdue university'), (2020, 'purdue university')) +all_us_institutions_year : ((1985, 'university of florida'), (1987, 'purdue university'), (1988, 'purdue university'), (1989, 'purdue university'), (1990, 'purdue university'), (1991, 'purdue university'), (1992, 'purdue university'), (1993, 'purdue university'), (1994, 'purdue university'), (1995, 'purdue university'), (1997, 'purdue university'), (1998, 'purdue university'), (1999, 'purdue university'), (2000, 'purdue university'), (2002, 'purdue university'), (2006, 'purdue university'), (2007, 'purdue university'), (2008, 'purdue university'), (2009, 'purdue university'), (2015, 'purdue university'), (2017, 'purdue university'), (2018, 'purdue university'), (2019, 'purdue university'), (2020, 'purdue university')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : fullerton +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'ohio state university'),) +all_us_institutions_year : ((2014, 'ohio state university'),) + +firstname : mark +lastname : fuller +middlename : r +year_range : (1975, 2018) +main_us_institutions_year : ((2002, 'united states geological survey'), (2003, 'united states geological survey'), (2006, 'united states geological survey'), (2010, 'united states geological survey'), (2011, 'united states geological survey'), (2015, 'united states geological survey'), (2016, 'united states geological survey'), (2017, 'united states geological survey'), (2018, 'united states geological survey')) +all_us_institutions_year : ((2002, 'united states geological survey'), (2003, 'united states geological survey'), (2006, 'united states geological survey'), (2010, 'united states geological survey'), (2011, 'united states geological survey'), (2015, 'united states geological survey'), (2016, 'united states geological survey'), (2017, 'united states geological survey'), (2018, 'united states geological survey')) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : susan +lastname : kirkpatrick +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of california san diego'),) +all_us_institutions_year : ((2005, 'university of california san diego'),) + +firstname : susan +lastname : fitzpatrickbehrens +middlename : None +year_range : (2014, 2019) +main_us_institutions_year : ((2016, 'california state university'), (2018, 'california state university'), (2019, 'california state university')) +all_us_institutions_year : ((2016, 'california state university'), (2018, 'california state university'), (2019, 'california state university')) + +3/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stanley +lastname : stewart +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of california riverside'),) +all_us_institutions_year : ((2002, 'university of california riverside'),) + +firstname : stanley +lastname : sue +middlename : None +year_range : (1971, 2019) +main_us_institutions_year : ((1972, 'university of washington'), (1973, 'university of washington'), (1974, 'university of washington'), (1975, 'university of washington'), (1976, 'university of washington'), (1977, 'university of washington'), (1978, 'university of washington'), (1979, 'university of washington'), (1981, 'university of california'), (1981, 'university of washington'), (1984, 'university of california los angeles'), (1987, 'university of california los angeles'), (1990, 'university of california los angeles'), (1991, 'university of california los angeles'), (1992, 'university of california los angeles'), (1993, 'university of california los angeles'), (1994, 'university of california los angeles'), (1995, 'university of california los angeles'), (1996, 'university of california los angeles'), (1998, 'university of california los angeles'), (2000, 'university of california davis'), (2003, 'university of california davis'), (2004, 'university of california davis'), (2005, 'university of california davis'), (2006, 'university of california davis'), (2007, 'university of california davis'), (2009, 'university of california los angeles'), (2009, 'university of california davis'), (2010, 'university of california davis'), (2012, 'palo alto university'), (2016, 'palo alto university')) +all_us_institutions_year : ((1972, 'university of washington'), (1973, 'university of washington'), (1974, 'university of washington'), (1975, 'university of washington'), (1976, 'university of washington'), (1977, 'university of washington'), (1978, 'university of washington'), (1979, 'university of washington'), (1981, 'university of california'), (1981, 'university of washington'), (1984, 'university of california los angeles'), (1985, 'university of california los angeles'), (1986, 'university of california los angeles'), (1987, 'university of california los angeles'), (1990, 'university of california los angeles'), (1991, 'university of california los angeles'), (1992, 'university of california los angeles'), (1993, 'university of california los angeles'), (1994, 'university of california los angeles'), (1995, 'university of california los angeles'), (1996, 'university of california los angeles'), (1998, 'university of california davis'), (1998, 'university of california los angeles'), (2000, 'university of california davis'), (2001, 'university of california davis'), (2003, 'university of california davis'), (2004, 'university of california davis'), (2005, 'university of california davis'), (2006, 'university of california davis'), (2007, 'university of california davis'), (2009, 'university of california davis'), (2009, 'university of california los angeles'), (2010, 'university of california davis'), (2012, 'palo alto university'), (2016, 'palo alto university')) + +3/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : carol +lastname : jacobs +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'university at buffalo'),) +all_us_institutions_year : ((1994, 'university at buffalo'),) + +firstname : sarah +lastname : jacobson +middlename : l +year_range : (2011, 2015) +main_us_institutions_year : ((2011, 'university at buffalo'), (2013, 'state university of new york system'), (2015, 'university at buffalo')) +all_us_institutions_year : ((2011, 'state university of new york system'), (2011, 'university at buffalo'), (2013, 'state university of new york system'), (2015, 'university at buffalo')) + +3/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : trudier +lastname : harrislopez +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2003, 'university of north carolina at chapel hill'),) + +firstname : louis +lastname : harris +middlename : s +year_range : (1960, 2015) +main_us_institutions_year : ((1962, 'rensselaer polytechnic institute'), (1964, 'rensselaer polytechnic institute'), (1965, 'rensselaer polytechnic institute'), (1967, 'university of north carolina at chapel hill'), (1968, 'rensselaer polytechnic institute'), (1970, 'university of north carolina at chapel hill'), (1971, 'university of north carolina at chapel hill'), (1972, 'university of north carolina at chapel hill'), (1973, 'university of north carolina at chapel hill'), (1974, 'university of north carolina at chapel hill'), (1975, 'vcu medical center'), (1976, 'vcu medical center'), (1977, 'vcu medical center'), (1978, 'vcu medical center'), (1979, 'vcu medical center'), (1980, 'vcu medical center'), (1981, 'vcu medical center'), (1984, 'vcu medical center'), (1985, 'vcu medical center'), (1986, 'vcu medical center'), (1987, 'vcu medical center'), (1989, 'vcu medical center'), (1990, 'vcu medical center'), (1992, 'vcu medical center'), (1992, 'virginia commonwealth university'), (1993, 'vcu medical center'), (1994, 'vcu medical center'), (1995, 'vcu medical center'), (1995, 'virginia commonwealth university'), (1996, 'vcu medical center'), (1997, 'vcu medical center'), (1998, 'vcu medical center'), (1999, 'virginia commonwealth university'), (2000, 'virginia commonwealth university'), (2001, 'vcu medical center'), (2002, 'vcu medical center'), (2003, 'vcu medical center'), (2003, 'virginia commonwealth university'), (2004, 'virginia commonwealth university'), (2005, 'virginia commonwealth university'), (2006, 'vcu medical center'), (2007, 'virginia commonwealth university'), (2008, 'virginia commonwealth university'), (2009, 'virginia commonwealth university'), (2011, 'vcu medical center'), (2012, 'virginia commonwealth university'), (2014, 'virginia commonwealth university'), (2015, 'virginia commonwealth university')) +all_us_institutions_year : ((1962, 'rensselaer polytechnic institute'), (1964, 'rensselaer polytechnic institute'), (1965, 'rensselaer polytechnic institute'), (1967, 'university of north carolina at chapel hill'), (1968, 'rensselaer polytechnic institute'), (1970, 'university of north carolina at chapel hill'), (1971, 'university of north carolina at chapel hill'), (1972, 'university of north carolina at chapel hill'), (1973, 'university of north carolina at chapel hill'), (1974, 'university of north carolina at chapel hill'), (1975, 'university of north carolina at chapel hill'), (1975, 'vcu medical center'), (1976, 'university of north carolina at chapel hill'), (1976, 'vcu medical center'), (1976, 'virginia commonwealth university'), (1977, 'university of north carolina at chapel hill'), (1977, 'vcu medical center'), (1978, 'vcu medical center'), (1979, 'vcu medical center'), (1980, 'vcu medical center'), (1981, 'vcu medical center'), (1984, 'vcu medical center'), (1985, 'vcu medical center'), (1986, 'vcu medical center'), (1986, 'virginia commonwealth university'), (1987, 'vcu medical center'), (1989, 'vcu medical center'), (1990, 'vcu medical center'), (1992, 'vcu medical center'), (1992, 'virginia commonwealth university'), (1993, 'vcu medical center'), (1994, 'vcu medical center'), (1995, 'vcu medical center'), (1995, 'virginia commonwealth university'), (1996, 'vcu medical center'), (1996, 'virginia commonwealth university'), (1997, 'vcu medical center'), (1998, 'vcu medical center'), (1998, 'virginia commonwealth university'), (1999, 'virginia commonwealth university'), (2000, 'vcu medical center'), (2000, 'virginia commonwealth university'), (2001, 'vcu medical center'), (2002, 'vcu medical center'), (2003, 'vcu medical center'), (2003, 'virginia commonwealth university'), (2004, 'virginia commonwealth university'), (2005, 'virginia commonwealth university'), (2006, 'vcu medical center'), (2007, 'vcu medical center'), (2007, 'virginia commonwealth university'), (2008, 'virginia commonwealth university'), (2009, 'virginia commonwealth university'), (2011, 'university of north carolina at chapel hill'), (2011, 'vcu medical center'), (2011, 'virginia commonwealth university'), (2012, 'virginia commonwealth university'), (2014, 'virginia commonwealth university'), (2015, 'virginia commonwealth university')) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : silverstein +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of chicago'),) +all_us_institutions_year : ((2000, 'university of chicago'),) + +firstname : michael +lastname : silvers +middlename : b +year_range : (2011, 2020) +main_us_institutions_year : ((2011, 'university of california los angeles'), (2020, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((2011, 'university of california los angeles'), (2020, 'university of illinois at urbana champaign')) + +3/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : russo +middlename : paul +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of miami'),) +all_us_institutions_year : ((2012, 'university of miami'),) + +firstname : john +lastname : russon +middlename : None +year_range : (1995, 2021) +main_us_institutions_year : ((1995, 'pennsylvania state university'), (1998, 'pennsylvania state university')) +all_us_institutions_year : ((1995, 'pennsylvania state university'), (1998, 'pennsylvania state university')) + +3/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : walter +lastname : clark +middlename : a +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of california los angeles'),) +all_us_institutions_year : ((2004, 'university of california los angeles'),) + +firstname : walter +lastname : clark +middlename : aaron +year_range : (2013, 2020) +main_us_institutions_year : ((2013, 'university of california riverside'), (2018, 'university of california riverside'), (2020, 'university of california riverside')) +all_us_institutions_year : ((2013, 'university of california riverside'), (2018, 'university of california riverside'), (2020, 'university of california riverside')) + +3/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : carolyn +lastname : shaw +middlename : martin +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of california berkeley'),) +all_us_institutions_year : ((1997, 'university of california berkeley'),) + +firstname : carolyn +lastname : shaw +middlename : martin +year_range : (1992, 2009) +main_us_institutions_year : ((1992, 'university of california santa cruz'), (1993, 'university of california santa cruz'), (1998, 'university of california santa cruz'), (2007, 'university of california santa cruz'), (2009, 'university of california santa cruz')) +all_us_institutions_year : ((1992, 'university of california santa cruz'), (1993, 'university of california santa cruz'), (1998, 'university of california santa cruz'), (2007, 'university of california santa cruz'), (2009, 'university of california santa cruz')) + +3/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : king +middlename : d +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of texas at austin'),) +all_us_institutions_year : ((1996, 'university of texas at austin'),) + +firstname : robert +lastname : king +middlename : None +year_range : (2008, 2019) +main_us_institutions_year : None +all_us_institutions_year : ((2016, 'union county college'),) + +4/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : marvin +lastname : booker +middlename : keith +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of arkansas'),) +all_us_institutions_year : ((2014, 'university of arkansas'),) + +firstname : m +lastname : booker +middlename : keith +year_range : (1991, 1998) +main_us_institutions_year : ((1991, 'university of arkansas'), (1994, 'university of arkansas')) +all_us_institutions_year : ((1991, 'university of arkansas'), (1994, 'university of arkansas')) + +4/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : vincent +lastname : wimbush +middlename : l +year_range : (2009,) +main_us_institutions_year : ((2009, 'claremont graduate university'),) +all_us_institutions_year : ((2009, 'claremont graduate university'),) + +firstname : vincent +lastname : wimbush +middlename : l +year_range : (1990, 2017) +main_us_institutions_year : ((1996, 'claremont colleges'),) +all_us_institutions_year : ((1989, 'claremont colleges'), (1991, 'claremont colleges'), (1992, 'claremont colleges'), (1993, 'claremont colleges'), (1994, 'claremont colleges'), (1995, 'claremont colleges'), (1996, 'claremont colleges'), (1997, 'claremont colleges'), (1999, 'claremont colleges'), (2000, 'claremont colleges'), (2003, 'claremont colleges'), (2004, 'claremont colleges'), (2005, 'claremont colleges'), (2009, 'claremont colleges')) + +4/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : e +lastname : cook +middlename : heckendorn +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of california santa barbara'),) +all_us_institutions_year : ((2010, 'university of california santa barbara'),) + +firstname : elizabeth +lastname : cook +middlename : heckendorn +year_range : (2007, 2009) +main_us_institutions_year : ((2007, 'university of california santa barbara'), (2009, 'university of california santa barbara')) +all_us_institutions_year : ((2007, 'university of california santa barbara'), (2009, 'university of california santa barbara')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : alan +lastname : howard +middlename : None +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of virginia main campus'),) +all_us_institutions_year : ((1998, 'university of virginia main campus'),) + +firstname : a +lastname : howard +middlename : e dick +year_range : (1965, 2016) +main_us_institutions_year : ((1986, 'university of virginia'), (1993, 'american university')) +all_us_institutions_year : ((1986, 'university of virginia'), (1993, 'american university'), (2014, 'university of virginia')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : beard +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of north dakota'),) +all_us_institutions_year : ((2005, 'university of north dakota'),) + +firstname : michael +lastname : beard +middlename : r +year_range : (1993, 2021) +main_us_institutions_year : ((1993, 'university of southern california'), (1999, 'university of texas medical branch'), (2000, 'university of texas medical branch'), (2001, 'university of texas medical branch'), (2002, 'university of texas medical branch')) +all_us_institutions_year : ((1993, 'university of southern california'), (1999, 'university of texas medical branch'), (2000, 'university of texas medical branch'), (2001, 'university of texas medical branch'), (2002, 'university of texas medical branch'), (2003, 'university of texas medical branch'), (2004, 'university of texas medical branch'), (2005, 'university of texas medical branch')) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : johnson +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'florida state university'),) +all_us_institutions_year : ((2013, 'florida state university'),) + +firstname : david +lastname : johnson +middlename : r +year_range : (1990, 2021) +main_us_institutions_year : ((1990, 'university of california berkeley'), (1995, 'university of california berkeley'), (1997, 'university of california berkeley'), (2005, 'university of california berkeley'), (2006, 'university of california berkeley'), (2008, 'university of california berkeley'), (2009, 'university of california berkeley'), (2010, 'university of michigan'), (2011, 'university of michigan')) +all_us_institutions_year : ((1990, 'university of california berkeley'), (1995, 'university of california berkeley'), (1997, 'university of california berkeley'), (2005, 'university of california berkeley'), (2006, 'university of california berkeley'), (2006, 'university of michigan'), (2008, 'university of california berkeley'), (2009, 'university of california berkeley'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of california berkeley'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2017, 'university of california berkeley'), (2019, 'university of michigan')) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : rogin +middlename : p +year_range : (2001,) +main_us_institutions_year : ((2001, 'university of california berkeley'),) +all_us_institutions_year : ((2001, 'university of california berkeley'),) + +firstname : michael +lastname : rogin +middlename : paul +year_range : (1975, 1996) +main_us_institutions_year : ((1983, 'university of colorado boulder'),) +all_us_institutions_year : ((1983, 'university of colorado boulder'),) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : goldman +middlename : None +year_range : (1992,) +main_us_institutions_year : ((1992, 'princeton university'),) +all_us_institutions_year : ((1992, 'princeton university'),) + +firstname : michael +lastname : goldman +middlename : a +year_range : (1984, 2021) +main_us_institutions_year : ((1986, 'university of washington'), (1987, 'university of washington'), (1988, 'san francisco state university'), (1991, 'university of washington'), (1992, 'san francisco state university'), (1994, 'san francisco state university'), (1997, 'san francisco state university'), (1998, 'san francisco state university'), (2000, 'san francisco state university'), (2001, 'san francisco state university'), (2002, 'san francisco state university'), (2003, 'san francisco state university'), (2004, 'san francisco state university'), (2005, 'san francisco state university'), (2006, 'san francisco state university'), (2008, 'san francisco state university'), (2009, 'san francisco state university'), (2010, 'san francisco state university'), (2011, 'san francisco state university'), (2012, 'san francisco state university'), (2013, 'san francisco state university'), (2014, 'san francisco state university'), (2015, 'san francisco state university'), (2016, 'san francisco state university'), (2017, 'san francisco state university'), (2018, 'san francisco state university'), (2020, 'san francisco state university'), (2021, 'san francisco state university')) +all_us_institutions_year : ((1986, 'university of washington'), (1987, 'university of washington'), (1988, 'san francisco state university'), (1991, 'university of washington'), (1992, 'san francisco state university'), (1994, 'san francisco state university'), (1997, 'san francisco state university'), (1998, 'san francisco state university'), (2000, 'san francisco state university'), (2001, 'san francisco state university'), (2002, 'san francisco state university'), (2003, 'san francisco state university'), (2003, 'university of washington'), (2004, 'san francisco state university'), (2005, 'san francisco state university'), (2006, 'san francisco state university'), (2008, 'san francisco state university'), (2009, 'san francisco state university'), (2010, 'san francisco state university'), (2011, 'san francisco state university'), (2012, 'san francisco state university'), (2013, 'san francisco state university'), (2014, 'san francisco state university'), (2015, 'san francisco state university'), (2016, 'san francisco state university'), (2017, 'san francisco state university'), (2018, 'san francisco state university'), (2020, 'san francisco state university'), (2021, 'san francisco state university')) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : covino +middlename : a +year_range : (2004,) +main_us_institutions_year : ((2004, 'florida atlantic university'),) +all_us_institutions_year : ((2004, 'florida atlantic university'),) + +firstname : william +lastname : covino +middlename : a +year_range : (1984, 1996) +main_us_institutions_year : ((1996, 'university of illinois at chicago'),) +all_us_institutions_year : ((1996, 'university of illinois at chicago'), (1998, 'university of illinois at chicago')) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ana +lastname : sanchez +middlename : maria amar +year_range : (1994,) +main_us_institutions_year : ((1994, 'harvard university'),) +all_us_institutions_year : ((1994, 'harvard university'),) + +firstname : ana +lastname : sanchez +middlename : maria amar +year_range : (1990, 2014) +main_us_institutions_year : ((2014, 'university of california irvine'), (2016, 'university of california irvine'), (2019, 'university of california irvine')) +all_us_institutions_year : ((2006, 'university of california irvine'), (2014, 'university of california irvine'), (2016, 'university of california irvine'), (2019, 'university of california irvine')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sangeeta +lastname : ray +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of maryland college park'),) +all_us_institutions_year : ((1999, 'university of maryland college park'),) + +firstname : sangeeta +lastname : ray +middlename : None +year_range : (1992, 2021) +main_us_institutions_year : ((1996, 'tuskegee university'),) +all_us_institutions_year : ((1996, 'tuskegee university'), (2011, 'university of maryland college park'), (2017, 'university of maryland college park')) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : noble +middlename : f x +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of notre dame'),) +all_us_institutions_year : ((2007, 'university of notre dame'),) + +firstname : thomas +lastname : noble +middlename : f x +year_range : (1970, 2017) +main_us_institutions_year : ((1976, 'texas tech university'), (1979, 'texas tech university')) +all_us_institutions_year : ((1976, 'texas tech university'), (1979, 'texas tech university')) + +9/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : mcdonald +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of louisiana at lafayette'),) +all_us_institutions_year : ((2010, 'university of louisiana at lafayette'),) + +firstname : james +lastname : mcdonald +middlename : h +year_range : (1989, 2021) +main_us_institutions_year : ((1989, 'arizona state university'), (1992, 'arizona state university'), (1993, 'university of michigan'), (1995, 'university of texas at san antonio'), (1996, 'university of texas at san antonio'), (1997, 'university of texas at san antonio'), (1998, 'university of texas at san antonio'), (1999, 'university of texas at san antonio'), (2001, 'university of texas at san antonio'), (2005, 'university of texas at san antonio'), (2008, 'university of michigan'), (2010, 'university of colorado boulder'), (2011, 'university of colorado boulder'), (2013, 'university of colorado boulder'), (2015, 'university of texas at san antonio'), (2016, 'university of texas at san antonio'), (2018, 'university of texas at san antonio'), (2020, 'university of texas at san antonio'), (2021, 'university of texas at san antonio')) +all_us_institutions_year : ((1989, 'arizona state university'), (1992, 'arizona state university'), (1993, 'university of michigan'), (1995, 'university of texas at san antonio'), (1996, 'university of texas at san antonio'), (1997, 'university of texas at san antonio'), (1998, 'university of texas at san antonio'), (1999, 'university of texas at san antonio'), (2001, 'university of texas at san antonio'), (2005, 'university of texas at san antonio'), (2008, 'university of michigan'), (2010, 'university of colorado boulder'), (2011, 'university of colorado boulder'), (2013, 'university of colorado boulder'), (2015, 'university of texas at san antonio'), (2016, 'university of texas at san antonio'), (2018, 'university of texas at san antonio'), (2020, 'university of texas at san antonio'), (2021, 'university of texas at san antonio')) + +10/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : meyer +middlename : None +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of connecticut'),) +all_us_institutions_year : ((1993, 'university of connecticut'),) + +firstname : m +lastname : meyer +middlename : None +year_range : (2006, 2021) +main_us_institutions_year : ((2006, 'space telescope science institute'), (2019, 'slac national accelerator laboratory'), (2020, 'slac national accelerator laboratory')) +all_us_institutions_year : ((2006, 'space telescope science institute'), (2017, 'slac national accelerator laboratory'), (2017, 'stanford university'), (2018, 'slac national accelerator laboratory'), (2018, 'university of maryland college park'), (2019, 'slac national accelerator laboratory'), (2019, 'stanford university'), (2020, 'slac national accelerator laboratory')) + +10/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : greenblatt +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'harvard university'),) +all_us_institutions_year : ((2015, 'harvard university'),) + +firstname : stephen +lastname : greenblatt +middlename : None +year_range : (1994, 1995) +main_us_institutions_year : ((1994, 'university of california berkeley'), (1995, 'university of california berkeley')) +all_us_institutions_year : ((1994, 'university of california berkeley'), (1995, 'university of california berkeley')) + +10/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 50.665562379360196 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_art_christoph_degree0_graduates_8515.log b/src/dataprep/temp/trainlink_mag_proquest_art_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..1bdbc26 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_art_christoph_degree0_graduates_8515.log @@ -0,0 +1,611 @@ +Namespace(testing=False, verbose=1, field=['art'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=50000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [142362112] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.000755151112874349 minutes + + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +Time elapsed: 23.88949055671692 minutes + +Starting active labeling... +firstname : michael +lastname : cooperson +middlename : None +year : 1996 +year_papertitle : ((1996, 'baghdad in rhetoric and narrative'), (2000, 'classical arabic biography the heirs of the prophets in the age of al ma mun'), (2001, 'the author and his doubles essays on classical arabic culture'), (2001, 'two abbasid trials aḥmad ibn ḥanbal and ḥunayn b isḥāq')) +keywords : frozenset({'classics', 'gender studies', 'literature'}) + +firstname : michael +lastname : cooperson +middlename : david +year : 1994 +year_papertitle : ((1994, 'the heirs of the prophets in classical arabic biography'),) +keywords : frozenset({'medieval literature', 'middle eastern literature', 'biographies', 'middle eastern history', 'literature'}) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : david +lastname : ainsworth +middlename : None +year : 2005 +year_papertitle : ((2005, 'spiritual reading in milton s eikonoklastes'), (2008, 'milton and the spiritual reader reading and religion in seventeenth century england'), (2012, 'historical milton manuscript print and political culture in revolutionary england by thomas fulton amherst and boston u of massachusetts p 2010'), (2012, 'rise to life with these salvation and herrick s mocking epigrams')) +keywords : frozenset({'art history', 'classics', 'aesthetics', 'literature'}) + +firstname : david +lastname : ainsworth +middlename : None +year : 2005 +year_papertitle : ((2005, 'milton and the spiritual reader reading and religion in seventeenth century england'),) +keywords : frozenset({'british and irish literature'}) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mona +lastname : diab +middlename : None +year : 1999 +year_papertitle : ((1999, 'the bible as a parallel corpus annotating the book of 2000 tongues'), (2000, 'a statistical word level translation model for comparable corpora'), (2000, 'an unsupervised method for multilingual word sense tagging using parallel corpora'), (2002, 'an unsupervised method for word sense tagging using parallel corpora'), (2003, 'word sense disambiguation within a multilingual framework'), (2004, 'automatic tagging of arabic text from raw text to base phrase chunks'), (2004, 'relieving the data acquisition bottleneck in word sense disambiguation'), (2006, 'developing and using a pilot dialectal arabic treebank'), (2006, 'unsupervised induction of modern standard arabic verb classes'), (2006, 'unsupervised induction of modern standard arabic verb classes using syntactic frames and lsa')) +keywords : frozenset({'artificial intelligence', 'speech recognition', 'linguistics', 'natural language processing'}) + +firstname : mona +lastname : diab +middlename : talat +year : 2003 +year_papertitle : ((2003, 'word sense disambiguation within a multilingual framework'),) +keywords : frozenset({'artificial intelligence', 'linguistics'}) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : alexandra +lastname : jaffe +middlename : None +year : 1993 +year_papertitle : ((1993, 'obligation error and authenticity competing cultural principles in the teaching of corsican'), (1996, 'the second annual corsican spelling contest orthography and ideology'), (1998, 'book review colonial migrants and racism'), (1999, 'ideologies in action language politics on corsica'), (1999, 'packaged sentiments the social meanings of greeting cards'), (2000, 'comic performance and the articulation of hybrid identity'), (2000, 'introduction non standard orthography and non standard speech'), (2000, 'the voices people read orthography and the representation of non standard speech')) +keywords : frozenset({'epistemology', 'advertising', 'linguistics', 'gender studies', 'pedagogy'}) + +firstname : alexandra +lastname : jaffe +middlename : mystra +year : 1990 +year_papertitle : ((1990, 'language identity and resistance on corsica'),) +keywords : frozenset({'cultural anthropology', 'linguistics'}) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : margaret +lastname : watkins +middlename : None +year : 2013 +year_papertitle : ((2013, 'a cruel but ancient subjugation understanding hume s attack on slavery'), (2014, 'agreeable connexions scottish enlightenment links with france by alexander broadie edinburgh scotland birlinn 2012 pp 230 40 00'), (2019, 'the philosophical progress of hume s essays'), (2020, 'brown artful virtue the interplay of the beautiful and the good in the scottish enlightenment'), (2020, 'sher church and university in the scottish enlightenment the moderate literati of edinburgh')) +keywords : frozenset({'epistemology', 'classics', 'environmental ethics', 'aesthetics', 'media studies', 'ancient history'}) + +firstname : henry +lastname : watkin +middlename : jay +year : 1988 +year_papertitle : ((1988, 'the development of cities in cyprus from the archaic to the roman period'),) +keywords : frozenset({'classical studies', 'archaeology', 'classical literature', 'ancient history'}) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : amalia +lastname : llombarthuesca +middlename : None +year : 2017 +year_papertitle : ((2017, 'morphological awareness and spelling in spanish heritage language learners'), (2019, 'linguistic factors and the spelling ability of spanish heritage language learners'), (2020, 'ressenya a laura marques pascual antonio cortijo ocana eds second and third language acquisition in catalan speaking regions newark delaware juan de la cuesta hispanic monographs 2019')) +keywords : frozenset({'humanities', 'linguistics'}) + +firstname : amalia +lastname : llombart +middlename : None +year : 2003 +year_papertitle : ((2003, 'nominal anaphora in spanish and english'),) +keywords : frozenset({'linguistics'}) + +4/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : matthew +lastname : carroll +middlename : w +year : 2009 +year_papertitle : ((2009, 'survival and predictors of outcomes in non hiv infected patients with extensively drug resistant tuberculosis'), (2010, 'genetic diversity of mycobacterium tuberculosis isolates from a tertiary care tuberculosis hospital in south korea'), (2010, 'polymorphisms associated with resistance and cross resistance to aminoglycosides and capreomycin in mycobacterium tuberculosis isolates from south korean patients with drug resistant tuberculosis'), (2011, 'linezolid for extensively drug resistant pulmonary tuberculosis'), (2012, 'frequency of adverse reactions to first and second line anti tuberculosis chemotherapy in a korean cohort'), (2012, 'linezolid for treatment of chronic extensively drug resistant tuberculosis'), (2012, 'rhabdomyolysis in a patient treated with linezolid for extensively drug resistant tuberculosis'), (2013, 'efficacy and safety of metronidazole for pulmonary multidrug resistant tuberculosis'), (2013, 'impact of diabetes and smoking on mortality in tuberculosis'), (2014, 'predictors of pulmonary tuberculosis treatment outcomes in south korea a prospective cohort study 2005 2012')) +keywords : frozenset({'intensive care medicine', 'internal medicine', 'gynecology', 'genetics', 'surgery', 'microbiology'}) + +firstname : matthew +lastname : carrillovincent +middlename : None +year : 2013 +year_papertitle : ((2013, 'the work of being a wallflower the peripheral politics of male sentimentality'),) +keywords : frozenset({'american studies', 'american literature', 'gender studies', 'film studies'}) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : keller +middlename : r +year : 1992 +year_papertitle : ((1992, 'marston s antonio and mellida'), (1992, 'webster s the duchess of malfi'), (1993, 'oliver stone s jfk and the circulation of social energy and the textuality of history'), (1995, 'like to a chaos'), (1997, 'masculinity and marginality in rob roy and braveheart'), (1998, 'perspectives in out self referentiality and hollywood s queer politics kevin kline plays howard brackett in in out')) +keywords : frozenset({'gender studies', 'art history', 'media studies', 'demography', 'statistical physics'}) + +firstname : james +lastname : kellerman +middlename : allen +year : 1996 +year_papertitle : ((1996, 'the dramatic prologue of plato s symposium as introduction to the dialogue s philosophy'),) +keywords : frozenset({'classical studies', 'philosophy', 'classical literature'}) + +4/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : paula +lastname : gilbert +middlename : ruth +year : 1992 +year_papertitle : ((1992, 'the daughter below double parody of mother daughter bonding in michele mailhot s beatrice vue d en bas'), (1999, 'pre and post mortem regendering and serial killing in rioux dandurand de and atwood')) +keywords : frozenset({'theology', 'ethnology', 'law', 'aesthetics'}) + +firstname : laura +lastname : gilbert +middlename : a +year : 2014 +year_papertitle : ((2014, 'cinematic representations of female teachers a narratological analysis of mise en scene in recent hollywood films'),) +keywords : frozenset({'teacher education', 'film studies'}) + +4/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : marta +lastname : pardo +middlename : None +year : 2009 +year_papertitle : ((2009, 'dopamine behavioral economics and effort'), (2012, 'adenosine a2a receptor antagonism and genetic deletion attenuate the effects of dopamine d2 antagonism on effort based decision making in mice'), (2012, 'anxiogenic and stress inducing effects of peripherally administered acetaldehyde in mice similarities with the disulfiram ethanol reaction'), (2012, 'dopaminergic modulation of effort related choice behavior as assessed by a progressive ratio chow feeding choice task pharmacological studies and the role of individual differences'), (2012, 'piecing together the puzzle of acetaldehyde as a neuroactive agent'), (2012, 'the behavioral pharmacology of effort related choice behavior dopamine adenosine and beyond'), (2013, 'acetate as an active metabolite of ethanol studies of locomotion loss of righting reflex and anxiety in rodents'), (2013, 'conditional neural knockout of the adenosine a2a receptor and pharmacological a2a antagonism reduce pilocarpine induced tremulous jaw movements studies with a mouse model of parkinsonian tremor'), (2013, 'effect of subtype selective adenosine receptor antagonists on basal or haloperidol regulated striatal function studies of exploratory locomotion and c fos immunoreactivity in outbred and a2ar ko mice'), (2013, 's 2 4 runners vs couch potatoes dopamine depletion reduces selection of physical effort in animals with low but not high experience of exercise')) +keywords : frozenset({'biochemistry', 'physical therapy', 'pharmacology', 'internal medicine', 'neuroscience', 'endocrinology', 'anesthesia'}) + +firstname : maria +lastname : pardo +middlename : gracia +year : 2013 +year_papertitle : ((2013, 'een vias de desarrollo libros para ninos y modernidades en latinoamerica'),) +keywords : frozenset({'latin american literature', 'education policy', 'latin american history'}) + +4/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gregg +lastname : lambert +middlename : None +year : 1997 +year_papertitle : ((1997, 'the deleuzian critique of pure fiction'), (1998, 'on the uses and abuses of literature for life gilles deleuze and the literary clinic'), (2000, 'the subject of literature between derrida and deleuze law or life'), (2002, 'deleuze and the dialectic a k a marx and hegel'), (2002, 'the non philosophy of gilles deleuze'), (2003, 'conversation on the future of theory'), (2003, 'une grande politique or the new philosophy of right'), (2003, 'what questions fascinate me what do i want to know')) +keywords : frozenset({'epistemology', 'law', 'psychoanalysis', 'humanities', 'social psychology', 'art history', 'literature'}) + +firstname : gregory +lastname : lambert +middlename : None +year : 1995 +year_papertitle : ((1995, 'on the culture of the stranger reflections on european aesthetic ideology in the new world'),) +keywords : frozenset({'philosophy', 'comparative literature', 'cultural anthropology', 'european history'}) + +4/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christopher +lastname : berg +middlename : s van den +year : 2008 +year_papertitle : ((2008, 'the pulvinar in roman culture'), (2014, 'appendix detailed outline of tacitus dialogus de oratoribus'), (2014, 'intratext declamation and dramatic argument in tacitus dialogus de oratoribus'), (2014, 'introduction rhetorical beginnings and rhetorical ends'), (2014, 'literary criticism and history cicero horace and quintilian in the dialogus'), (2014, 'the world of tacitus dialogus de oratoribus aesthetics and empire in ancient rome'), (2015, 'theory and water a co taught undergraduate course political rhetoric')) +keywords : frozenset({'pedagogy', 'classics', 'aesthetics', 'literature'}) + +firstname : christopher +lastname : berg +middlename : sean van den +year : 2006 +year_papertitle : ((2006, 'the social aesthetics of tacitus dialogus de oratoribus'),) +keywords : frozenset({'comparative literature', 'classical studies', 'classical literature'}) + +5/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : qing +lastname : wang +middlename : None +year : 1997 +year_papertitle : ((1997, 'carbazole based multifunctional molecules for photorefractive applications'), (1997, 'effect of boron on graphite oxidation a theoretical study'), (1997, 'semi empirical studies on electronic structures of a boron doped graphene layer implications on the oxidation mechanism'), (1998, 'a multifunctional photorefractive material showing high optical gain and diffraction efficiency'), (1998, 'effect of a local electric field on photogeneration efficiency in a photorefractive polymer'), (1998, 'synthesis and unusual physical behavior of a photorefractive polymer containing tris bipyridyl ruthenium ii complexes as a photosensitizer and exhibiting a low glass transition temperature'), (1999, 'a new family of amorphous molecular materials showing large photorefractive effect'), (1999, 'novel photorefractive materials based on multifunctional organic glasses'), (1999, 'novel photorefractive polymers sensitized by metalloporphyrin'), (1999, 'progress in fully functionalized photorefractive materials')) +keywords : frozenset({'atomic physics', 'optoelectronics', 'stereochemistry', 'inorganic chemistry', 'nanotechnology', 'chemical physics', 'optics', 'photochemistry', 'organic chemistry'}) + +firstname : mingquan +lastname : wang +middlename : None +year : 1987 +year_papertitle : ((1987, 'transitivity and the ba construction in mandarin china'),) +keywords : frozenset({'linguistics'}) + +6/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kathleen +lastname : turner +middlename : j +year : 1988 +year_papertitle : ((1988, 'buckskins bullets and business a history of buffalo bill s wild west by sarah j blackstone westport greenwood 1986 xiii 157 pp 27 95'), (1990, 'public affairs the military and the media 1962 1968 by william m hammond washington center of military history united states army 1988 xvi 413 pp cloth 23 00 paper 20 00'), (1992, 'deadline a memoir by james reston new york random house 1991 xviii 525 pp 25 00 isbn 0 394 58558 5')) +keywords : frozenset({'art history', 'political economy', 'ancient history'}) + +firstname : kathleen +lastname : turner +middlename : marie +year : 2013 +year_papertitle : ((2013, 'my life story was spaces marginalized women maneuvering urban environments in literature and film'),) +keywords : frozenset({'american literature', 'film studies'}) + +6/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : min +lastname : kim +middlename : jung +year : 1994 +year_papertitle : ((1994, 'occupational asthma caused by two herb materials dioscorea batatas and pinellia ternata'), (1998, 'virtual colonoscopy with electron beam ct correlation with barium enema colonoscopy and pathology'), (1999, 'comparison between palpable and nonpalpable breast cancers mammographic and pathological findings'), (1999, 'distribution of coronary calcium score in healthy middle aged korean'), (2001, 'using electron beam ct to evaluate conotruncal anomalies in pediatric and adult patients')) +keywords : frozenset({'radiology', 'cardiology', 'immunology', 'internal medicine', 'traditional medicine', 'surgery'}) + +firstname : minju +lastname : kim +middlename : None +year : 2003 +year_papertitle : ((2003, 'discourse frequency and the emergence of grammar a corpus based study of the grammaticalization of the korean existential verb i is i i i i i ta i'),) +keywords : frozenset({'language', 'modern language', 'linguistics'}) + +6/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : marylaura +lastname : papalas +middlename : None +year : 2013 +year_papertitle : ((2013, 'new orientations for french language learning is synchronous distance education a viable solution'), (2015, 'speed and convulsive beauty trains and the historic avant garde'), (2016, 'avant garde cuts schiaparelli and the construction of a surrealist femininity'), (2017, 'fashion in interwar france the urban vision of elsa schiaparelli')) +keywords : frozenset({'art history', 'visual arts', 'linguistics'}) + +firstname : mary +lastname : papalas +middlename : laura +year : 2008 +year_papertitle : ((2008, 'a changing of the guard the evolution of the french avant garde from italian futurism to surrealism to situationism to the writers of the literary journal tel quel'),) +keywords : frozenset({'romance literature'}) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : grace +lastname : tiffany +middlename : c +year : 1990 +year_papertitle : ((1990, 'our mutual friend in eumaeus joyce appropriates dickens'), (1992, 'falstaff s false staff jonsonian asexuality in the merry wives of windsor'), (1993, 'not saying no female self erasure in troilus and cressida'), (1994, 'appropriating shakespeare by brian vickers'), (1994, 'essays mainly shakespearean by anne barton review'), (1994, 'that reason wonder may diminish as you like it androgyny and the theater wars'), (1995, 'erotic beasts social monsters shakespeare jonson and comic androgyny'), (1995, 'recovering shakespeare s theatrical vocabulary by alan c dessen'), (1995, 'things supernatural and causeless shakespearean romance by marco mincoff'), (1995, 'things supernatural and causeless shakespearean romance by marco mincoff and a buddhist s shakespeare affirming self deconstructions by james howe review')) +keywords : frozenset({'art history', 'linguistics', 'literature'}) + +firstname : grace +lastname : tiffany +middlename : cleveland +year : 1989 +year_papertitle : ((1989, 'power plays the construction of kingship in shakespeare s henriad'),) +keywords : frozenset({'british and irish literature'}) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michaela +lastname : morgan +middlename : None +year : 2010 +year_papertitle : ((2010, 'how to teach poetry writing workshops for ages 5 9'), (2011, 'how to teach poetry writing workshops for ages 8 13 developing creative literacy')) +keywords : frozenset({'visual arts'}) + +firstname : michael +lastname : morgan +middlename : wayne +year : 1990 +year_papertitle : ((1990, 'semantic evolution in the prepositional system in bulgarian the prepositions ot iz and s'),) +keywords : frozenset({'ancient languages', 'modern language', 'linguistics', 'language'}) + +8/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeff +lastname : white +middlename : None +year : 2004 +year_papertitle : ((2004, 'be heading a vampire combining 2d and 3d elements with on set motion capture to create the vampire brides in van helsing'), (2004, 'be heading a vampire combining 2d and 3d elements with on set motion capture to create the vampire brides in van helsing copyright restrictions prevent acm from providing the full text for this work'), (2006, 'blockparty modular rigging encoded in a geometric volume'), (2007, 'transformers giant frickin robots'), (2008, 'indiana jones a look into the visual effects challenges and slight of hand for crystal skull')) +keywords : frozenset({'artificial intelligence', 'computer graphics images', 'human computer interaction', 'electrical engineering', 'computer vision'}) + +firstname : jeffrey +lastname : white +middlename : gerard +year : 2009 +year_papertitle : ((2009, 'the italian paradise lost of john milton'),) +keywords : frozenset({'british and irish literature', 'religious history'}) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yu +lastname : wang +middlename : None +year : 2014 +year_papertitle : ((2014, 'a novel ku70 function in colorectal homeostasis separate from nonhomologous end joining'), (2017, 'dna barcoded labeling probes for highly multiplexed exchange paint imaging'), (2017, 'rapid sequential in situ multiplexing with dna exchange imaging in neuronal cells and tissues'), (2018, 'from designing the molecules of life to designing life future applications derived from advances in dna technologies'), (2018, 'vom design der molekule des lebens zum design von leben zukunftige anwendungen von dna technologien'), (2019, 'highly multiplexed in situ protein imaging using dna exchange imaging and immuno saber'), (2019, 'immuno saber enables highly multiplexed and amplified protein imaging in tissues'), (2019, 'saber amplifies fish enhanced multiplexed imaging of rna and dna in cells and tissues'), (2019, 'the human body at cellular resolution the nih human biomolecular atlas program'), (2020, 'axial plane single molecule super resolution microscopy of whole cells')) +keywords : frozenset({'cancer research', 'data science', 'immunology', 'cell biology', 'nanotechnology', 'optics', 'art history', 'biophysics'}) + +firstname : yudong +lastname : wang +middlename : None +year : 2007 +year_papertitle : ((2007, 'figures i en buste i in medieval china three studies'),) +keywords : frozenset({'art history', 'religious history', 'history'}) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chun +lastname : yang +middlename : None +year : 2017 +year_papertitle : ((2017, 'chapter 12 absorption and meaningfulness examining the relationship between eudaimonic media use and engagement'), (2017, 'fear responses to threat appeals functional form methodological considerations and correspondence between static and dynamic data'), (2018, 'defensive reactions to threatening health messages alternative structures and next questions'), (2018, 'self regulation of emotional responses to zika spiral of fear'), (2018, 'understanding fear of zika personal interpersonal and media influences'), (2019, 'a penchant for the immoral implications of parasocial interaction perceived complicity and identification on liking of anti heroes'), (2020, 'fear of zika information seeking as cause and consequence')) +keywords : frozenset({'cognitive psychology', 'social psychology', 'developmental psychology', 'psychiatry', 'clinical psychology'}) + +firstname : chunglin +lastname : yang +middlename : None +year : 2015 +year_papertitle : ((2015, 'phonological variation and l2 word learning the role of orthography in word recognition and production'),) +keywords : frozenset({'cognitive psychology', 'linguistics', 'experimental psychology'}) + +8/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : johnson +middlename : t +year : 2009 +year_papertitle : ((2009, 'playgrounds of unlimited potential adaptation documentary and dogtown and z boys'), (2013, 'the flashing glimpse of cinephilia what an unusual methodology might offer adaptation studies')) +keywords : frozenset({'visual arts'}) + +firstname : david +lastname : johnson +middlename : christopher +year : 1994 +year_papertitle : ((1994, 'the volga german dialect of schoenchen kansas'),) +keywords : frozenset({'germanic literature', 'american history', 'linguistics'}) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : fisher +middlename : t +year : 1989 +year_papertitle : ((1989, 'the catholic counterculture in america 1933 1962'), (1996, 'the great beader pete axthelm and the bonds of tradition')) +keywords : frozenset({'theology', 'genealogy', 'classics', 'religious studies'}) + +firstname : james +lastname : fisher +middlename : randall +year : 1993 +year_papertitle : ((1993, 'certaine signes of the zodiac patterns through time and space in book ii of edmund spenser s the faerie queene'),) +keywords : frozenset({'british and irish literature'}) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sarah +lastname : giragosian +middlename : None +year : 2014 +year_papertitle : ((2014, 'towards a poetics of the animal'), (2016, 'elizabeth bishop s evolutionary poetics')) +keywords : frozenset({'literature'}) + +firstname : sarah +lastname : giragosian +middlename : None +year : 2014 +year_papertitle : ((2014, 'queer creatures queer times'),) +keywords : frozenset({'modern literature', 'lgbtq studies'}) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : daniel +lastname : reynolds +middlename : p +year : 1999 +year_papertitle : ((1999, 'es handelt sich hier um werdende the fiction of the artist in rilke s worpswede'), (2000, 'schreiben gegen die verstreichende zeit zu leben und werk von gunter grass'), (2001, 'rezeption und zeitlichkeit des werkes christoph heins'), (2003, 'blinded by the enlightenment gunter grass in calcutta'), (2003, 'portrait of misreading bernhard schlink s der vorleser')) +keywords : frozenset({'art history', 'religious studies', 'literature'}) + +firstname : daniel +lastname : reynolds +middlename : patrick +year : 1996 +year_papertitle : ((1996, 'narrative out of bounds fiction metafiction and the essay in twentieth century german literature'),) +keywords : frozenset({'modern literature', 'german literature', 'literature'}) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : vanessa +lastname : corredera +middlename : i +year : 2015 +year_papertitle : ((2015, 'faces and figures of fortune astrological physiognomy in tamburlaine part 1'), (2020, 'how dey goin to kill othello key peele and shakespearean universality'), (2021, 'lessons for whiteness keith hamilton cobb s american moor')) +keywords : frozenset({'religious studies', 'literature'}) + +firstname : vanessa +lastname : corredera +middlename : ivette +year : 2012 +year_papertitle : ((2012, 'the early modern face physiognomy on and off the english stage'),) +keywords : frozenset({'british and irish literature', 'theater history'}) + +9/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jennifer +lastname : starkey +middlename : s +year : 2013 +year_papertitle : ((2013, 'soldiers and sailors in aristophanes babylonians'), (2018, 'sophoclean moments in greek comedy')) +keywords : frozenset({'literature'}) + +firstname : jennifer +lastname : starkey +middlename : sara +year : 2012 +year_papertitle : ((2012, 'sophocles the honeybee dramatic context and interaction'),) +keywords : frozenset({'classical literature', 'classical studies', 'ancient languages'}) + +10/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : josefa +lastname : alvarez +middlename : None +year : 2007 +year_papertitle : ((2007, 'el caballero del verde gaban algunas consideraciones desde el epicureismo y el estoicismo'), (2014, 'el pensamiento clasico en la poesia espanola de hoy el platon de los poetas')) +keywords : frozenset({'humanities', 'literature'}) + +firstname : jose +lastname : alvarez +middlename : o +year : 2005 +year_papertitle : ((2005, 'poetica de la brevedad en borges'),) +keywords : frozenset({'latin american literature'}) + +11/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 43.82352336247762 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_biology_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_biology_christoph_degree0_advisors_9015.log index e57a8e9..2659ad3 100644 --- a/src/dataprep/temp/trainlink_mag_proquest_biology_christoph_degree0_advisors_9015.log +++ b/src/dataprep/temp/trainlink_mag_proquest_biology_christoph_degree0_advisors_9015.log @@ -5,7 +5,7 @@ Testing is False I set the write connection to the main database. id_field is [86803240] and will be passed to sql queries. finished setup ... -Time elapsed: 0.00044922431310017905 minutes +Time elapsed: 0.0008649428685506185 minutes SELECT relationship_id @@ -136,604 +136,5 @@ Time elapsed: 0.00044922431310017905 minutes WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL -Reading labelled examples from /mnt/ssd/DedupeFiles/flavio/issue-21/advisors/training_biology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0.json -Time elapsed: 681.8146329641343 minutes - -Starting active labeling... -firstname : hong -lastname : wu -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'university of california los angeles'),) -all_us_institutions_year : ((2015, 'university of california los angeles'),) - -firstname : hongjiang -lastname : wu -middlename : None -year_range : (1991, 2005) -main_us_institutions_year : ((1991, 'university of california san diego'), (1993, 'university of california san diego'), (1993, 'united states department of veterans affairs'), (1994, 'university of california san diego'), (1995, 'university of california san diego'), (1996, 'united states department of veterans affairs'), (1997, 'united states department of veterans affairs'), (1998, 'united states department of veterans affairs'), (1999, 'university of california san diego'), (2000, 'veterans health administration'), (2002, 'veterans health administration')) -all_us_institutions_year : ((1991, 'university of california san diego'), (1993, 'united states department of veterans affairs'), (1993, 'university of california san diego'), (1994, 'united states department of veterans affairs'), (1994, 'university of california berkeley'), (1994, 'university of california san diego'), (1995, 'university of california san diego'), (1996, 'united states department of veterans affairs'), (1997, 'united states department of veterans affairs'), (1998, 'united states department of veterans affairs'), (1999, 'united states department of veterans affairs'), (1999, 'university of california san diego'), (2000, 'veterans health administration'), (2002, 'veterans health administration')) - -11/10 positive, 26/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : john -lastname : graca -middlename : v da -year_range : (2003,) -main_us_institutions_year : ((2003, 'texas a m university college station'),) -all_us_institutions_year : ((2003, 'texas a m university college station'),) - -firstname : j -lastname : graca -middlename : v da -year_range : (2005, 2018) -main_us_institutions_year : ((2005, 'texas a m university kingsville'), (2006, 'texas a m university kingsville'), (2013, 'texas a m university'), (2013, 'texas a m university kingsville'), (2015, 'texas a m university kingsville'), (2016, 'texas a m university'), (2018, 'texas a m university kingsville')) -all_us_institutions_year : ((2004, 'texas a m university'), (2005, 'texas a m university kingsville'), (2006, 'texas a m university kingsville'), (2013, 'texas a m university'), (2013, 'texas a m university kingsville'), (2014, 'texas a m university'), (2015, 'texas a m university kingsville'), (2016, 'texas a m university'), (2018, 'texas a m university kingsville')) - -11/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : sharron -lastname : quisenberry -middlename : s -year_range : (1992,) -main_us_institutions_year : ((1992, 'louisiana state university and agricultural mechanical college'),) -all_us_institutions_year : ((1992, 'louisiana state university and agricultural mechanical college'),) - -firstname : s -lastname : quisenberry -middlename : s -year_range : (1984, 1997) -main_us_institutions_year : ((1984, 'louisiana state university agricultural center'), (1985, 'louisiana state university agricultural center'), (1986, 'louisiana state university agricultural center'), (1987, 'louisiana state university agricultural center'), (1988, 'louisiana state university agricultural center'), (1989, 'louisiana state university agricultural center'), (1990, 'louisiana state university agricultural center'), (1991, 'louisiana state university agricultural center'), (1992, 'louisiana state university agricultural center'), (1993, 'louisiana state university agricultural center'), (1994, 'louisiana state university agricultural center'), (1995, 'louisiana state university agricultural center'), (1997, 'louisiana state university agricultural center')) -all_us_institutions_year : ((1984, 'louisiana state university agricultural center'), (1985, 'louisiana state university agricultural center'), (1986, 'louisiana state university agricultural center'), (1987, 'louisiana state university agricultural center'), (1988, 'louisiana state university agricultural center'), (1989, 'louisiana state university agricultural center'), (1990, 'louisiana state university agricultural center'), (1991, 'louisiana state university agricultural center'), (1992, 'louisiana state university agricultural center'), (1993, 'louisiana state university agricultural center'), (1994, 'louisiana state university agricultural center'), (1995, 'louisiana state university agricultural center'), (1997, 'louisiana state university agricultural center')) - -12/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : kelly -lastname : tatchell -middlename : None -year_range : (2004,) -main_us_institutions_year : ((2004, 'louisiana state university health sciences center shreveport'),) -all_us_institutions_year : ((2004, 'louisiana state university health sciences center shreveport'),) - -firstname : kelly -lastname : tatchell -middlename : None -year_range : (1975, 2021) -main_us_institutions_year : ((1984, 'university of pennsylvania'), (1986, 'university of pennsylvania'), (1996, 'louisiana state university'), (1997, 'louisiana state university'), (2000, 'louisiana state university'), (2001, 'louisiana state university'), (2002, 'lsu health sciences center shreveport'), (2003, 'lsu health sciences center shreveport'), (2005, 'lsu health sciences center shreveport'), (2007, 'lsu health sciences center shreveport'), (2008, 'lsu health sciences center shreveport'), (2009, 'lsu health sciences center shreveport'), (2010, 'lsu health sciences center shreveport'), (2011, 'lsu health sciences center shreveport'), (2012, 'lsu health sciences center shreveport'), (2017, 'lsu health sciences center shreveport'), (2018, 'lsu health sciences center shreveport'), (2019, 'louisiana state university'), (2021, 'lsu health sciences center shreveport')) -all_us_institutions_year : ((1984, 'university of pennsylvania'), (1986, 'university of pennsylvania'), (1996, 'louisiana state university'), (1997, 'louisiana state university'), (2000, 'indiana university'), (2000, 'louisiana state university'), (2000, 'lsu health sciences center shreveport'), (2001, 'louisiana state university'), (2002, 'lsu health sciences center shreveport'), (2003, 'lsu health sciences center shreveport'), (2005, 'louisiana state university in shreveport'), (2005, 'lsu health sciences center shreveport'), (2007, 'lsu health sciences center shreveport'), (2008, 'lsu health sciences center shreveport'), (2009, 'lsu health sciences center shreveport'), (2010, 'lsu health sciences center shreveport'), (2011, 'lsu health sciences center shreveport'), (2012, 'lsu health sciences center shreveport'), (2017, 'lsu health sciences center shreveport'), (2018, 'lsu health sciences center shreveport'), (2019, 'louisiana state university'), (2021, 'lsu health sciences center shreveport')) - -13/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : edward -lastname : mccabe -middlename : r b -year_range : (2011,) -main_us_institutions_year : ((2011, 'university of california los angeles'),) -all_us_institutions_year : ((2011, 'university of california los angeles'),) - -firstname : edward -lastname : mccabe -middlename : r b -year_range : (1972, 2014) -main_us_institutions_year : ((1982, 'university of colorado denver'), (1983, 'university of colorado denver'), (1984, 'university of colorado denver'), (1985, 'university of colorado denver'), (1986, 'university of colorado denver'), (1987, 'university of colorado denver'), (1988, 'university of colorado denver'), (2011, 'university of colorado denver'), (2012, 'university of colorado denver'), (2013, 'university of colorado denver')) -all_us_institutions_year : ((1982, 'university of colorado denver'), (1983, 'university of colorado denver'), (1984, 'university of colorado denver'), (1985, 'university of colorado denver'), (1986, 'university of colorado denver'), (1987, 'university of colorado denver'), (1988, 'university of colorado denver'), (2011, 'university of colorado denver'), (2012, 'university of colorado denver'), (2013, 'elsevier'), (2013, 'university of colorado denver')) - -14/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : dihua -lastname : yu -middlename : None -year_range : (2006,) -main_us_institutions_year : ((2006, 'university of texas graduate school of biomedical sciences at houston'),) -all_us_institutions_year : ((2006, 'university of texas graduate school of biomedical sciences at houston'),) - -firstname : dihua -lastname : yu -middlename : None -year_range : (2010, 2013) -main_us_institutions_year : ((2010, 'university of texas md anderson cancer center'), (2013, 'university of texas md anderson cancer center')) -all_us_institutions_year : ((2010, 'university of texas md anderson cancer center'), (2013, 'university of texas md anderson cancer center')) - -14/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : guillermina -lastname : lozano -middlename : None -year_range : (1995,) -main_us_institutions_year : ((1995, 'university of texas graduate school of biomedical sciences at houston'),) -all_us_institutions_year : ((1995, 'university of texas graduate school of biomedical sciences at houston'),) - -firstname : guillermina -lastname : lozano -middlename : None -year_range : (2015, 2016) -main_us_institutions_year : ((2015, 'university of texas md anderson cancer center'), (2015, 'university of texas at austin'), (2016, 'university of texas md anderson cancer center'), (2016, 'university of texas at austin')) -all_us_institutions_year : ((2015, 'university of texas at austin'), (2015, 'university of texas md anderson cancer center'), (2016, 'university of texas at austin'), (2016, 'university of texas md anderson cancer center')) - -14/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : steven -lastname : martin -middlename : k st -year_range : (1993,) -main_us_institutions_year : ((1993, 'ohio state university'),) -all_us_institutions_year : ((1993, 'ohio state university'),) - -firstname : s -lastname : martin -middlename : k st -year_range : (1986, 2011) -main_us_institutions_year : ((1986, 'ohio agricultural research and development center'), (1994, 'ohio agricultural research and development center'), (1999, 'ohio agricultural research and development center'), (2010, 'ohio agricultural research and development center')) -all_us_institutions_year : ((1986, 'ohio agricultural research and development center'), (1994, 'ohio agricultural research and development center'), (1999, 'ohio agricultural research and development center'), (2010, 'ohio agricultural research and development center')) - -14/10 positive, 28/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : r -lastname : linnoila -middlename : ilona -year_range : (1998,) -main_us_institutions_year : ((1998, 'georgetown university medical center'),) -all_us_institutions_year : ((1998, 'georgetown university medical center'),) - -firstname : r -lastname : linnoila -middlename : ilona -year_range : (1988, 1990) -main_us_institutions_year : ((1988, 'uniformed services university of the health sciences'), (1990, 'uniformed services university of the health sciences')) -all_us_institutions_year : ((1988, 'uniformed services university of the health sciences'), (1990, 'uniformed services university of the health sciences')) - -14/10 positive, 28/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : a -lastname : cartwright -middlename : lee -year_range : (1999,) -main_us_institutions_year : ((1999, 'texas a m university college station'),) -all_us_institutions_year : ((1999, 'texas a m university college station'),) - -firstname : a -lastname : cartwright -middlename : lee -year_range : (1991, 1994) -main_us_institutions_year : ((1991, 'united states department of agriculture'),) -all_us_institutions_year : ((1991, 'united states department of agriculture'),) - -14/10 positive, 29/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : warren -lastname : liao -middlename : sl -year_range : (1999,) -main_us_institutions_year : ((1999, 'university of texas graduate school of biomedical sciences at houston'),) -all_us_institutions_year : ((1999, 'university of texas graduate school of biomedical sciences at houston'),) - -firstname : w -lastname : liao -middlename : sl -year_range : (2012, 2014) -main_us_institutions_year : ((2012, 'university of texas md anderson cancer center'), (2014, 'university of texas md anderson cancer center')) -all_us_institutions_year : ((2012, 'university of texas md anderson cancer center'), (2014, 'university of texas md anderson cancer center')) - -14/10 positive, 30/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : george -lastname : kemp -middlename : e -year_range : (2000,) -main_us_institutions_year : ((2000, 'university of alabama at birmingham'),) -all_us_institutions_year : ((2000, 'university of alabama at birmingham'),) - -firstname : george -lastname : kemp -middlename : None -year_range : (1983, 1986) -main_us_institutions_year : ((1983, 'boys town'), (1983, 'university of alabama'), (1986, 'university of alabama')) -all_us_institutions_year : ((1983, 'boys town'), (1983, 'university of alabama'), (1986, 'university of alabama')) - -14/10 positive, 31/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : thomas -lastname : plattsmills -middlename : a e -year_range : (2000,) -main_us_institutions_year : ((2000, 'university of virginia main campus'),) -all_us_institutions_year : ((2000, 'university of virginia main campus'),) - -firstname : t -lastname : plattsmills -middlename : a e -year_range : (1983, 2018) -main_us_institutions_year : ((1986, 'university of virginia'), (1987, 'university of virginia'), (1992, 'university of virginia'), (1993, 'university of virginia'), (1994, 'university of virginia'), (1997, 'university of virginia'), (2001, 'university of virginia'), (2002, 'university of virginia'), (2003, 'university of virginia'), (2004, 'university of virginia'), (2005, 'university of virginia'), (2006, 'university of virginia'), (2007, 'university of virginia'), (2008, 'university of virginia'), (2009, 'university of virginia'), (2010, 'university of virginia'), (2011, 'university of virginia'), (2012, 'university of virginia'), (2013, 'university of virginia'), (2015, 'university of virginia'), (2018, 'university of virginia')) -all_us_institutions_year : ((1986, 'university of virginia'), (1987, 'university of virginia'), (1992, 'university of virginia'), (1993, 'university of virginia'), (1994, 'university of virginia'), (1997, 'university of virginia'), (2001, 'university of virginia'), (2002, 'university of virginia'), (2003, 'university of virginia'), (2004, 'university of virginia'), (2005, 'university of virginia'), (2006, 'university of virginia'), (2007, 'university of virginia'), (2008, 'university of virginia'), (2009, 'university of virginia'), (2010, 'university of virginia'), (2011, 'university of virginia'), (2012, 'university of virginia'), (2013, 'university of virginia'), (2015, 'university of virginia'), (2018, 'university of virginia')) - -14/10 positive, 32/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : george -lastname : giovanni -middlename : d di -year_range : (2013,) -main_us_institutions_year : ((2013, 'university of texas at el paso'),) -all_us_institutions_year : ((2013, 'university of texas at el paso'),) - -firstname : g -lastname : giovanni -middlename : d di -year_range : (2005, 2007) -main_us_institutions_year : ((2005, 'texas a m university'), (2007, 'texas a m university')) -all_us_institutions_year : ((2005, 'texas a m university'), (2007, 'texas a m university'), (2010, 'texas a m university'), (2012, 'texas a m university')) - -15/10 positive, 32/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : michael -lastname : vandehaar -middlename : None -year_range : (2006,) -main_us_institutions_year : ((2006, 'michigan state university'),) -all_us_institutions_year : ((2006, 'michigan state university'),) - -firstname : m -lastname : vandehaar -middlename : j -year_range : (1991, 2020) -main_us_institutions_year : ((1991, 'michigan state university'), (1994, 'michigan state university'), (1995, 'michigan state university'), (1996, 'michigan state university'), (1997, 'michigan state university'), (1998, 'michigan state university'), (1999, 'michigan state university'), (2000, 'michigan state university'), (2001, 'michigan state university'), (2002, 'michigan state university'), (2004, 'michigan state university'), (2005, 'michigan state university'), (2006, 'michigan state university'), (2008, 'michigan state university'), (2009, 'michigan state university'), (2010, 'michigan state university'), (2011, 'michigan state university'), (2013, 'michigan state university'), (2014, 'michigan state university'), (2015, 'michigan state university'), (2016, 'michigan state university'), (2017, 'michigan state university'), (2018, 'michigan state university'), (2019, 'michigan state university'), (2020, 'michigan state university')) -all_us_institutions_year : ((1991, 'michigan state university'), (1994, 'michigan state university'), (1994, 'upjohn'), (1995, 'michigan state university'), (1995, 'upjohn'), (1996, 'michigan state university'), (1997, 'michigan state university'), (1998, 'michigan state university'), (1999, 'michigan state university'), (2000, 'michigan state university'), (2001, 'michigan state university'), (2002, 'michigan state university'), (2004, 'michigan state university'), (2005, 'michigan state university'), (2006, 'michigan state university'), (2008, 'michigan state university'), (2009, 'michigan state university'), (2010, 'michigan state university'), (2011, 'michigan state university'), (2013, 'michigan state university'), (2014, 'michigan state university'), (2015, 'michigan state university'), (2016, 'michigan state university'), (2017, 'michigan state university'), (2018, 'michigan state university'), (2019, 'michigan state university'), (2020, 'michigan state university')) - -15/10 positive, 32/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : james -lastname : cowan -middlename : None -year_range : (2011,) -main_us_institutions_year : ((2011, 'ohio state university'),) -all_us_institutions_year : ((2011, 'ohio state university'),) - -firstname : j -lastname : cowan -middlename : a -year_range : (1994, 2015) -main_us_institutions_year : ((1994, 'ohio state university'), (1996, 'ohio state university'), (2009, 'ohio state university'), (2013, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university')) -all_us_institutions_year : ((1994, 'ohio state university'), (1995, 'ohio state university'), (1996, 'ohio state university'), (1999, 'ohio state university'), (2009, 'ohio state university'), (2010, 'ohio state university'), (2013, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university')) - -16/10 positive, 32/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : david -lastname : clayton -middlename : a -year_range : (1995,) -main_us_institutions_year : ((1995, 'stanford university'),) -all_us_institutions_year : ((1995, 'stanford university'),) - -firstname : david -lastname : clayton -middlename : None -year_range : (1984, 2017) -main_us_institutions_year : None -all_us_institutions_year : ((2001, 'university of southern california'),) - -17/10 positive, 32/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jie -lastname : chen -middlename : None -year_range : (2009,) -main_us_institutions_year : ((2009, 'university of illinois at urbana champaign'),) -all_us_institutions_year : ((2009, 'university of illinois at urbana champaign'),) - -firstname : jie -lastname : chen -middlename : None -year_range : (2002, 2021) -main_us_institutions_year : None -all_us_institutions_year : ((2007, 'vision sciences inc'), (2015, 'vision sciences inc')) - -17/10 positive, 33/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : david -lastname : clayton -middlename : a -year_range : (1995,) -main_us_institutions_year : ((1995, 'stanford university'),) -all_us_institutions_year : ((1995, 'stanford university'),) - -firstname : david -lastname : clayton -middlename : None -year_range : (1984, 2017) -main_us_institutions_year : None -all_us_institutions_year : ((2001, 'university of southern california'),) - -17/10 positive, 32/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : jie -lastname : chen -middlename : None -year_range : (2009,) -main_us_institutions_year : ((2009, 'university of illinois at urbana champaign'),) -all_us_institutions_year : ((2009, 'university of illinois at urbana champaign'),) - -firstname : jie -lastname : chen -middlename : None -year_range : (2002, 2021) -main_us_institutions_year : None -all_us_institutions_year : ((2007, 'vision sciences inc'), (2015, 'vision sciences inc')) - -17/10 positive, 33/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : yi -lastname : zhang -middlename : heng percival -year_range : (2013,) -main_us_institutions_year : ((2013, 'virginia tech'),) -all_us_institutions_year : ((2013, 'virginia tech'),) - -firstname : yiheng -lastname : zhang -middlename : percival -year_range : (2012, 2017) -main_us_institutions_year : ((2012, 'oak ridge national laboratory'), (2012, 'virginia tech'), (2014, 'virginia tech'), (2017, 'chinese academy of sciences')) -all_us_institutions_year : ((2012, 'oak ridge national laboratory'), (2012, 'virginia tech'), (2014, 'virginia tech'), (2017, 'chinese academy of sciences'), (2017, 'virginia tech')) - -17/10 positive, 34/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : udeni -lastname : balasuryia -middlename : b r -year_range : (2013,) -main_us_institutions_year : ((2013, 'university of kentucky'),) -all_us_institutions_year : ((2013, 'university of kentucky'),) - -firstname : u -lastname : balasuriya -middlename : b r -year_range : (2007, 2018) -main_us_institutions_year : ((2007, 'university of kentucky'), (2010, 'university of kentucky'), (2011, 'university of kentucky'), (2012, 'university of kentucky'), (2016, 'university of kentucky'), (2018, 'university of kentucky')) -all_us_institutions_year : ((2007, 'university of kentucky'), (2010, 'university of kentucky'), (2011, 'university of kentucky'), (2012, 'university of kentucky'), (2016, 'university of kentucky'), (2018, 'university of kentucky')) - -18/10 positive, 34/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : j -lastname : murphy -middlename : paul -year_range : (2006,) -main_us_institutions_year : ((2006, 'north carolina state university'),) -all_us_institutions_year : ((2006, 'north carolina state university'),) - -firstname : jason -lastname : murphy -middlename : p -year_range : (2015, 2017) -main_us_institutions_year : ((2015, 'university of chicago'), (2016, 'northwestern university'), (2017, 'northwestern university'), (2017, 'university of chicago')) -all_us_institutions_year : ((2015, 'university of chicago'), (2016, 'northwestern university'), (2017, 'northwestern university'), (2017, 'university of chicago')) - -19/10 positive, 34/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : siu -lastname : lee -middlename : sylvia -year_range : (2009,) -main_us_institutions_year : ((2009, 'cornell university'),) -all_us_institutions_year : ((2009, 'cornell university'),) - -firstname : siu -lastname : lee -middlename : sylvia -year_range : (1997, 2003) -main_us_institutions_year : ((1997, 'baylor college of medicine'), (1999, 'baylor college of medicine'), (2000, 'baylor college of medicine'), (2001, 'harvard university'), (2003, 'baylor college of medicine'), (2003, 'harvard university')) -all_us_institutions_year : ((1997, 'baylor college of medicine'), (1999, 'baylor college of medicine'), (2000, 'baylor college of medicine'), (2001, 'harvard university'), (2003, 'baylor college of medicine'), (2003, 'harvard university')) - -19/10 positive, 35/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : malcolm -lastname : moore -middlename : a s -year_range : (2013,) -main_us_institutions_year : ((2013, 'weill medical college of cornell university'),) -all_us_institutions_year : ((2013, 'weill medical college of cornell university'),) - -firstname : malcolm -lastname : moore -middlename : a s -year_range : (1996, 2013) -main_us_institutions_year : ((1996, 'memorial sloan kettering cancer center'), (2004, 'memorial sloan kettering cancer center'), (2009, 'memorial sloan kettering cancer center'), (2009, 'kettering university'), (2013, 'kettering university')) -all_us_institutions_year : ((1993, 'kettering university'), (1995, 'kettering university'), (1996, 'memorial sloan kettering cancer center'), (1997, 'kettering university'), (1999, 'kettering university'), (2004, 'memorial sloan kettering cancer center'), (2009, 'kettering university'), (2009, 'memorial sloan kettering cancer center'), (2013, 'kettering university')) - -19/10 positive, 36/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : siu -lastname : lee -middlename : sylvia -year_range : (2009,) -main_us_institutions_year : ((2009, 'cornell university'),) -all_us_institutions_year : ((2009, 'cornell university'),) - -firstname : siu -lastname : lee -middlename : sylvia -year_range : (1997, 2003) -main_us_institutions_year : ((1997, 'baylor college of medicine'), (1999, 'baylor college of medicine'), (2000, 'baylor college of medicine'), (2001, 'harvard university'), (2003, 'baylor college of medicine'), (2003, 'harvard university')) -all_us_institutions_year : ((1997, 'baylor college of medicine'), (1999, 'baylor college of medicine'), (2000, 'baylor college of medicine'), (2001, 'harvard university'), (2003, 'baylor college of medicine'), (2003, 'harvard university')) - -19/10 positive, 35/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : malcolm -lastname : moore -middlename : a s -year_range : (2013,) -main_us_institutions_year : ((2013, 'weill medical college of cornell university'),) -all_us_institutions_year : ((2013, 'weill medical college of cornell university'),) - -firstname : malcolm -lastname : moore -middlename : a s -year_range : (1996, 2013) -main_us_institutions_year : ((1996, 'memorial sloan kettering cancer center'), (2004, 'memorial sloan kettering cancer center'), (2009, 'memorial sloan kettering cancer center'), (2009, 'kettering university'), (2013, 'kettering university')) -all_us_institutions_year : ((1993, 'kettering university'), (1995, 'kettering university'), (1996, 'memorial sloan kettering cancer center'), (1997, 'kettering university'), (1999, 'kettering university'), (2004, 'memorial sloan kettering cancer center'), (2009, 'kettering university'), (2009, 'memorial sloan kettering cancer center'), (2013, 'kettering university')) - -19/10 positive, 36/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : joe -lastname : gray -middlename : None -year_range : (2013,) -main_us_institutions_year : ((2013, 'university of california berkeley'),) -all_us_institutions_year : ((2013, 'university of california berkeley'),) - -firstname : j -lastname : gray -middlename : dixon -year_range : (1985, 1990) -main_us_institutions_year : ((1985, 'university of california los angeles'), (1990, 'university of california los angeles')) -all_us_institutions_year : ((1985, 'university of california los angeles'), (1990, 'university of california los angeles')) - -19/10 positive, 36/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jacqueline -lastname : barton -middlename : k -year_range : (1995,) -main_us_institutions_year : ((1995, 'california institute of technology'),) -all_us_institutions_year : ((1995, 'california institute of technology'),) - -firstname : j -lastname : barton -middlename : None -year_range : (1889, 2020) -main_us_institutions_year : ((1975, 'stanford university'), (1979, 'stanford university'), (1982, 'stanford university'), (1985, 'university of pennsylvania'), (1986, 'richard stockton college of new jersey'), (1988, 'allegheny general hospital'), (1989, 'stanford university'), (1992, 'university of california davis'), (1992, 'stanford university'), (1995, 'stanford university'), (1997, 'stanford university'), (1999, 'harvard university'), (2000, 'stanford university'), (2002, 'stanford university'), (2004, 'stanford university'), (2006, 'stanford university'), (2014, 'stanford university')) -all_us_institutions_year : ((1975, 'stanford university'), (1979, 'stanford university'), (1982, 'stanford university'), (1985, 'university of pennsylvania'), (1986, 'richard stockton college of new jersey'), (1988, 'allegheny general hospital'), (1989, 'stanford university'), (1992, 'stanford university'), (1992, 'university of california davis'), (1994, 'stanford university'), (1995, 'stanford university'), (1997, 'stanford university'), (1999, 'harvard university'), (2000, 'stanford university'), (2002, 'stanford university'), (2003, 'stanford university'), (2004, 'stanford university'), (2006, 'stanford university'), (2009, 'stanford university'), (2014, 'stanford university'), (2015, 'stanford university')) - -19/10 positive, 37/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : stephen -lastname : wassall -middlename : r -year_range : (2000,) -main_us_institutions_year : ((2000, 'purdue university'),) -all_us_institutions_year : ((2000, 'purdue university'),) - -firstname : stephen -lastname : wassall -middlename : r -year_range : (1983, 2021) -main_us_institutions_year : ((1987, 'indiana university purdue university indianapolis'), (1988, 'indiana university purdue university indianapolis'), (1990, 'indiana university purdue university indianapolis'), (1991, 'indiana university purdue university indianapolis'), (1992, 'indiana university purdue university indianapolis'), (1993, 'indiana university purdue university indianapolis'), (1994, 'indiana university purdue university indianapolis'), (1996, 'indiana university purdue university indianapolis'), (1999, 'indiana university purdue university indianapolis'), (2001, 'indiana university purdue university indianapolis'), (2002, 'indiana university purdue university indianapolis'), (2003, 'indiana university purdue university indianapolis'), (2004, 'indiana university purdue university indianapolis'), (2005, 'indiana university purdue university indianapolis'), (2006, 'indiana university purdue university indianapolis'), (2008, 'indiana university purdue university indianapolis'), (2009, 'indiana university purdue university indianapolis'), (2010, 'indiana university purdue university indianapolis'), (2011, 'indiana university purdue university indianapolis'), (2012, 'indiana university purdue university indianapolis'), (2013, 'indiana university purdue university indianapolis'), (2014, 'indiana university purdue university indianapolis'), (2015, 'indiana university purdue university indianapolis'), (2016, 'indiana university purdue university indianapolis'), (2017, 'indiana university purdue university indianapolis'), (2018, 'indiana university purdue university indianapolis'), (2020, 'indiana university purdue university indianapolis')) -all_us_institutions_year : ((1987, 'indiana university purdue university indianapolis'), (1988, 'indiana university purdue university indianapolis'), (1990, 'indiana university purdue university indianapolis'), (1991, 'indiana university purdue university indianapolis'), (1992, 'indiana university purdue university indianapolis'), (1993, 'indiana university'), (1993, 'indiana university purdue university indianapolis'), (1994, 'indiana university purdue university indianapolis'), (1996, 'indiana university purdue university indianapolis'), (1999, 'indiana university purdue university indianapolis'), (2001, 'indiana university'), (2001, 'indiana university purdue university indianapolis'), (2002, 'indiana university'), (2002, 'indiana university purdue university indianapolis'), (2003, 'indiana university purdue university indianapolis'), (2004, 'indiana university'), (2004, 'indiana university purdue university indianapolis'), (2005, 'indiana university purdue university indianapolis'), (2006, 'indiana university purdue university indianapolis'), (2008, 'indiana university purdue university indianapolis'), (2009, 'indiana university purdue university indianapolis'), (2010, 'indiana university purdue university indianapolis'), (2011, 'indiana university purdue university indianapolis'), (2012, 'indiana university purdue university indianapolis'), (2013, 'indiana university purdue university indianapolis'), (2014, 'indiana university purdue university indianapolis'), (2015, 'indiana university purdue university indianapolis'), (2016, 'indiana university purdue university indianapolis'), (2017, 'indiana university purdue university indianapolis'), (2018, 'indiana university purdue university indianapolis'), (2020, 'indiana university purdue university indianapolis')) - -19/10 positive, 38/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : arrigo -lastname : debenedetti -middlename : None -year_range : (2001,) -main_us_institutions_year : ((2001, 'louisiana state university health sciences center shreveport'),) -all_us_institutions_year : ((2001, 'louisiana state university health sciences center shreveport'),) - -firstname : arrigo -lastname : debenedetti -middlename : None -year_range : (2002, 2019) -main_us_institutions_year : ((2002, 'lsu health sciences center shreveport'), (2004, 'lsu health sciences center shreveport'), (2005, 'lsu health sciences center shreveport'), (2006, 'louisiana state university'), (2006, 'louisiana state university in shreveport'), (2007, 'lsu health sciences center shreveport'), (2007, 'louisiana state university'), (2008, 'lsu health sciences center shreveport'), (2008, 'louisiana state university'), (2010, 'lsu health sciences center shreveport'), (2016, 'lsu health sciences center shreveport'), (2019, 'lsu health sciences center shreveport')) -all_us_institutions_year : ((2002, 'lsu health sciences center shreveport'), (2004, 'lsu health sciences center shreveport'), (2005, 'lsu health sciences center shreveport'), (2006, 'louisiana state university'), (2006, 'louisiana state university in shreveport'), (2007, 'louisiana state university'), (2007, 'lsu health sciences center shreveport'), (2008, 'louisiana state university'), (2008, 'lsu health sciences center shreveport'), (2010, 'lsu health sciences center shreveport'), (2016, 'lsu health sciences center shreveport'), (2019, 'lsu health sciences center shreveport'), (2020, 'louisiana state university')) - -20/10 positive, 38/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : u -lastname : rao -middlename : subrahmanyeswara -year_range : (2003,) -main_us_institutions_year : ((2003, 'university of nebraska medical center'),) -all_us_institutions_year : ((2003, 'university of nebraska medical center'),) - -firstname : u -lastname : rao -middlename : subrahmanyeswara -year_range : (2006, 2013) -main_us_institutions_year : ((2008, 'texas tech university health sciences center'), (2009, 'texas tech university health sciences center'), (2010, 'texas tech university health sciences center'), (2011, 'texas tech university health sciences center'), (2012, 'texas tech university health sciences center'), (2013, 'texas tech university health sciences center')) -all_us_institutions_year : ((2008, 'texas tech university health sciences center'), (2009, 'texas tech university health sciences center'), (2010, 'texas tech university health sciences center'), (2011, 'texas tech university health sciences center'), (2012, 'texas tech university health sciences center'), (2013, 'texas tech university health sciences center')) - -21/10 positive, 38/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jeffrey -lastname : benovic -middlename : l -year_range : (1995,) -main_us_institutions_year : ((1995, 'temple university'),) -all_us_institutions_year : ((1995, 'temple university'),) - -firstname : jeffrey -lastname : benovic -middlename : l -year_range : (1983, 2021) -main_us_institutions_year : ((1983, 'howard hughes medical institute'), (1986, 'howard hughes medical institute'), (1987, 'howard hughes medical institute'), (1988, 'howard hughes medical institute'), (1989, 'howard hughes medical institute'), (1990, 'howard hughes medical institute'), (1991, 'thomas jefferson university'), (1991, 'howard hughes medical institute'), (1992, 'thomas jefferson university'), (1993, 'thomas jefferson university'), (1994, 'thomas jefferson university'), (1995, 'thomas jefferson university'), (1996, 'thomas jefferson university'), (1997, 'thomas jefferson university'), (1998, 'thomas jefferson university'), (1999, 'thomas jefferson university'), (2000, 'thomas jefferson university'), (2001, 'thomas jefferson university'), (2002, 'thomas jefferson university'), (2003, 'thomas jefferson university'), (2004, 'thomas jefferson university'), (2005, 'thomas jefferson university'), (2006, 'thomas jefferson university'), (2007, 'thomas jefferson university'), (2008, 'thomas jefferson university'), (2009, 'thomas jefferson university'), (2010, 'thomas jefferson university'), (2011, 'thomas jefferson university'), (2012, 'thomas jefferson university'), (2013, 'thomas jefferson university'), (2014, 'thomas jefferson university'), (2015, 'thomas jefferson university'), (2016, 'thomas jefferson university'), (2017, 'thomas jefferson university'), (2018, 'thomas jefferson university'), (2019, 'thomas jefferson university'), (2020, 'thomas jefferson university'), (2021, 'thomas jefferson university')) -all_us_institutions_year : ((1983, 'howard hughes medical institute'), (1986, 'howard hughes medical institute'), (1987, 'howard hughes medical institute'), (1988, 'howard hughes medical institute'), (1989, 'howard hughes medical institute'), (1990, 'howard hughes medical institute'), (1991, 'howard hughes medical institute'), (1991, 'thomas jefferson university'), (1992, 'thomas jefferson university'), (1993, 'thomas jefferson university'), (1994, 'thomas jefferson university'), (1995, 'thomas jefferson university'), (1996, 'thomas jefferson university'), (1997, 'thomas jefferson university'), (1998, 'thomas jefferson university'), (1999, 'thomas jefferson university'), (1999, 'university of texas southwestern medical center'), (2000, 'thomas jefferson university'), (2001, 'thomas jefferson university'), (2002, 'thomas jefferson university'), (2003, 'thomas jefferson university'), (2004, 'thomas jefferson university'), (2005, 'thomas jefferson university'), (2006, 'thomas jefferson university'), (2007, 'thomas jefferson university'), (2008, 'thomas jefferson university'), (2009, 'thomas jefferson university'), (2010, 'thomas jefferson university'), (2011, 'thomas jefferson university'), (2012, 'thomas jefferson university'), (2013, 'thomas jefferson university'), (2014, 'thomas jefferson university'), (2015, 'thomas jefferson university'), (2016, 'thomas jefferson university'), (2017, 'thomas jefferson university'), (2018, 'thomas jefferson university'), (2019, 'thomas jefferson university'), (2020, 'thomas jefferson university'), (2021, 'thomas jefferson university')) - -21/10 positive, 38/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : n -lastname : holbrook -middlename : michele -year_range : (2002,) -main_us_institutions_year : ((2002, 'harvard university'),) -all_us_institutions_year : ((2002, 'harvard university'),) - -firstname : n -lastname : holbrook -middlename : michele -year_range : (1989, 1997) -main_us_institutions_year : ((1991, 'stanford university'), (1992, 'stanford university'), (1995, 'stanford university'), (1996, 'stanford university')) -all_us_institutions_year : ((1991, 'stanford university'), (1992, 'stanford university'), (1995, 'stanford university'), (1996, 'stanford university')) - -21/10 positive, 39/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : william -lastname : merrick -middlename : c -year_range : (2001,) -main_us_institutions_year : ((2001, 'case western reserve university health sciences'),) -all_us_institutions_year : ((2001, 'case western reserve university health sciences'),) - -firstname : william -lastname : merrick -middlename : c -year_range : (1972, 2005) -main_us_institutions_year : ((1972, 'national institutes of health'), (1973, 'national institutes of health'), (1975, 'national institutes of health'), (1976, 'national institutes of health'), (1978, 'national institutes of health'), (1992, 'national institutes of health'), (2005, 'university of medicine and dentistry of new jersey')) -all_us_institutions_year : ((1972, 'national institutes of health'), (1973, 'national institutes of health'), (1975, 'national institutes of health'), (1976, 'national institutes of health'), (1978, 'national institutes of health'), (1992, 'national institutes of health'), (2005, 'university of medicine and dentistry of new jersey')) - -21/10 positive, 40/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -Finished labeling -Done in 1200.48080701828 minutes. +reading from /mnt/ssd/DedupeFiles/flavio/issue-21/advisors/settings_biology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Done in 661.3392433365186 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_business_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_business_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..77d834e --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_business_christoph_degree0_advisors_9015.log @@ -0,0 +1,790 @@ +Namespace(testing=False, verbose=1, field=['business'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [144133560] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0009664694468180338 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 41.87626347541809 minutes + +Starting active labeling... +firstname : marc +lastname : holzer +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'rutgers university'),) +all_us_institutions_year : ((2008, 'rutgers university'),) + +firstname : marc +lastname : holzer +middlename : None +year_range : (1974, 2019) +main_us_institutions_year : ((1987, 'new york college of health professions'), (1993, 'rutgers university'), (1997, 'rutgers university'), (1999, 'rutgers university'), (2001, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2011, 'rutgers university'), (2012, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2017, 'rutgers university')) +all_us_institutions_year : ((1987, 'new york college of health professions'), (1993, 'rutgers university'), (1997, 'rutgers university'), (1998, 'rutgers university'), (1999, 'rutgers university'), (2001, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2011, 'rutgers university'), (2012, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2017, 'rutgers university')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : john +lastname : yinger +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'syracuse university'),) +all_us_institutions_year : ((2010, 'syracuse university'),) + +firstname : john +lastname : yinger +middlename : None +year_range : (1976, 2021) +main_us_institutions_year : ((1976, 'harvard university'), (1977, 'harvard university'), (1978, 'harvard university'), (1979, 'harvard university'), (1982, 'harvard university'), (1986, 'harvard university'), (1988, 'syracuse university'), (1990, 'syracuse university'), (1991, 'syracuse university'), (1992, 'syracuse university'), (1993, 'syracuse university'), (1995, 'syracuse university'), (1997, 'syracuse university'), (1998, 'syracuse university'), (1999, 'syracuse university'), (2000, 'syracuse university'), (2002, 'syracuse university'), (2003, 'syracuse university'), (2005, 'syracuse university'), (2006, 'syracuse university'), (2007, 'syracuse university'), (2008, 'syracuse university'), (2010, 'syracuse university'), (2011, 'syracuse university'), (2014, 'syracuse university'), (2015, 'syracuse university'), (2016, 'syracuse university'), (2017, 'syracuse university'), (2020, 'syracuse university'), (2021, 'syracuse university')) +all_us_institutions_year : ((1976, 'harvard university'), (1977, 'harvard university'), (1978, 'harvard university'), (1979, 'harvard university'), (1982, 'harvard university'), (1983, 'harvard university'), (1986, 'harvard university'), (1988, 'syracuse university'), (1990, 'syracuse university'), (1991, 'syracuse university'), (1992, 'syracuse university'), (1993, 'syracuse university'), (1995, 'syracuse university'), (1997, 'syracuse university'), (1998, 'syracuse university'), (1999, 'syracuse university'), (2000, 'syracuse university'), (2001, 'syracuse university'), (2002, 'syracuse university'), (2003, 'syracuse university'), (2004, 'syracuse university'), (2005, 'syracuse university'), (2006, 'syracuse university'), (2007, 'syracuse university'), (2008, 'syracuse university'), (2009, 'syracuse university'), (2010, 'syracuse university'), (2011, 'syracuse university'), (2014, 'syracuse university'), (2015, 'syracuse university'), (2016, 'syracuse university'), (2017, 'syracuse university'), (2019, 'syracuse university'), (2020, 'syracuse university'), (2021, 'syracuse university')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : brian +lastname : rubin +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'northcentral university'),) +all_us_institutions_year : ((2015, 'northcentral university'),) + +firstname : brian +lastname : rubineau +middlename : None +year_range : (2010, 2020) +main_us_institutions_year : ((2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university')) +all_us_institutions_year : ((2003, 'united states department of the navy'), (2007, 'cornell university'), (2008, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2019, 'cornell university')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jacob +lastname : rose +middlename : m +year_range : (2009,) +main_us_institutions_year : ((2009, 'southern illinois university at carbondale'),) +all_us_institutions_year : ((2009, 'southern illinois university at carbondale'),) + +firstname : jacob +lastname : rosen +middlename : None +year_range : (2013, 2018) +main_us_institutions_year : ((2013, 'university of michigan'), (2015, 'massachusetts institute of technology'), (2016, 'massachusetts institute of technology'), (2016, 'mitre corporation')) +all_us_institutions_year : ((2013, 'university of michigan'), (2015, 'massachusetts institute of technology'), (2016, 'massachusetts institute of technology'), (2016, 'mitre corporation'), (2017, 'massachusetts institute of technology'), (2018, 'massachusetts institute of technology')) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : petecornell +middlename : elisabeth +year_range : (2000,) +main_us_institutions_year : ((2000, 'stanford university'),) +all_us_institutions_year : ((2000, 'stanford university'),) + +firstname : m +lastname : patecornell +middlename : elisabeth +year_range : (1984, 2021) +main_us_institutions_year : ((1984, 'stanford university'), (1985, 'stanford university'), (1989, 'stanford university'), (1992, 'stanford university'), (1993, 'stanford university'), (1994, 'stanford university'), (1996, 'stanford university'), (1998, 'stanford university'), (1999, 'stanford university'), (2000, 'stanford university'), (2001, 'stanford university'), (2002, 'stanford university'), (2003, 'stanford university'), (2004, 'stanford university'), (2005, 'stanford university'), (2006, 'stanford university'), (2007, 'stanford university'), (2008, 'stanford university'), (2009, 'stanford university'), (2011, 'stanford university'), (2012, 'stanford university'), (2015, 'stanford university'), (2016, 'stanford university'), (2018, 'stanford university'), (2020, 'stanford university'), (2021, 'stanford university')) +all_us_institutions_year : ((1984, 'stanford university'), (1985, 'stanford university'), (1989, 'stanford university'), (1990, 'stanford university'), (1991, 'stanford university'), (1992, 'stanford university'), (1993, 'stanford university'), (1994, 'stanford university'), (1996, 'stanford university'), (1998, 'stanford university'), (1999, 'stanford university'), (2000, 'stanford university'), (2001, 'stanford university'), (2002, 'stanford university'), (2003, 'stanford university'), (2004, 'stanford university'), (2005, 'stanford university'), (2006, 'stanford university'), (2007, 'stanford university'), (2008, 'stanford university'), (2009, 'stanford university'), (2011, 'stanford university'), (2012, 'stanford university'), (2015, 'stanford university'), (2016, 'stanford university'), (2018, 'stanford university'), (2020, 'stanford university'), (2021, 'stanford university')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : george +lastname : furstenburg +middlename : m von +year_range : (2001,) +main_us_institutions_year : ((2001, 'indiana university'),) +all_us_institutions_year : ((2001, 'indiana university'),) + +firstname : george +lastname : furstenberg +middlename : m von +year_range : (1970, 2014) +main_us_institutions_year : ((1971, 'indiana university'), (1972, 'indiana university'), (1973, 'indiana university'), (1974, 'indiana university'), (1975, 'indiana university'), (1978, 'indiana university'), (1979, 'indiana university'), (1985, 'indiana university'), (1986, 'indiana university'), (1988, 'indiana university'), (1989, 'indiana university'), (1991, 'indiana university'), (1993, 'indiana university'), (1996, 'indiana university'), (1998, 'indiana university'), (2000, 'fordham university'), (2001, 'fordham university'), (2002, 'indiana university'), (2002, 'fordham university'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2008, 'indiana university'), (2012, 'indiana university')) +all_us_institutions_year : ((1971, 'indiana university'), (1972, 'indiana university'), (1973, 'indiana university'), (1974, 'indiana university'), (1975, 'indiana university'), (1978, 'indiana university'), (1979, 'indiana university'), (1985, 'indiana university'), (1986, 'indiana university'), (1988, 'indiana university'), (1989, 'indiana university'), (1991, 'indiana university'), (1993, 'indiana university'), (1996, 'indiana university'), (1997, 'indiana university'), (1998, 'indiana university'), (2000, 'fordham university'), (2001, 'fordham university'), (2002, 'fordham university'), (2002, 'indiana university'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2007, 'national science foundation'), (2008, 'indiana university'), (2008, 'national science foundation'), (2011, 'indiana university'), (2012, 'indiana university'), (2013, 'indiana university')) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : elise +lastname : sautler +middlename : truly +year_range : (2000,) +main_us_institutions_year : ((2000, 'new mexico state university'),) +all_us_institutions_year : ((2000, 'new mexico state university'),) + +firstname : elise +lastname : sautter +middlename : truly +year_range : (1994, 1999) +main_us_institutions_year : ((1994, 'new mexico state university'), (1995, 'new mexico state university'), (1997, 'new mexico state university'), (1999, 'new mexico state university')) +all_us_institutions_year : ((1994, 'new mexico state university'), (1995, 'new mexico state university'), (1997, 'new mexico state university'), (1999, 'new mexico state university')) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : peterson +middlename : r +year_range : (1996,) +main_us_institutions_year : ((1996, 'florida state university'),) +all_us_institutions_year : ((1996, 'florida state university'),) + +firstname : david +lastname : peters +middlename : h +year_range : (1991, 2021) +main_us_institutions_year : ((1991, 'johns hopkins university'), (1992, 'johns hopkins university'), (1993, 'johns hopkins university'), (1998, 'world bank'), (2000, 'world bank'), (2002, 'johns hopkins university'), (2003, 'johns hopkins university'), (2004, 'johns hopkins university'), (2005, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university'), (2014, 'johns hopkins university'), (2015, 'johns hopkins university'), (2016, 'johns hopkins university'), (2017, 'johns hopkins university'), (2018, 'johns hopkins university'), (2019, 'johns hopkins university'), (2020, 'johns hopkins university'), (2021, 'johns hopkins university')) +all_us_institutions_year : ((1991, 'johns hopkins university'), (1992, 'johns hopkins university'), (1993, 'johns hopkins university'), (1998, 'world bank'), (2000, 'world bank'), (2001, 'world bank'), (2002, 'johns hopkins university'), (2003, 'johns hopkins university'), (2004, 'johns hopkins university'), (2005, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university'), (2014, 'johns hopkins university'), (2015, 'johns hopkins university'), (2016, 'johns hopkins university'), (2017, 'johns hopkins university'), (2018, 'johns hopkins university'), (2019, 'johns hopkins university'), (2020, 'johns hopkins university'), (2021, 'johns hopkins university')) + +5/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : randolph +lastname : bucklin +middlename : e +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of california los angeles'),) +all_us_institutions_year : ((2014, 'university of california los angeles'),) + +firstname : d +lastname : suckling +middlename : m +year_range : (1980, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2012, 'united states department of agriculture'),) + +5/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : salil +lastname : sarkar +middlename : k +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of texas at arlington'),) +all_us_institutions_year : ((2005, 'university of texas at arlington'),) + +firstname : s +lastname : das +middlename : k +year_range : (2008, 2009) +main_us_institutions_year : ((2008, 'university of texas at arlington'),) +all_us_institutions_year : ((2008, 'university of texas at arlington'), (2008, 'university of texas at austin')) + +5/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : knapp +middlename : l +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of texas at austin'),) +all_us_institutions_year : ((2004, 'university of texas at austin'),) + +firstname : mark +lastname : williams +middlename : l +year_range : (1991, 2021) +main_us_institutions_year : ((1992, 'creighton university'), (1995, 'texas christian university'), (1996, 'university of miami'), (1996, 'university of texas health science center at houston'), (1997, 'university of miami'), (1997, 'university of texas health science center at houston'), (1998, 'university of miami'), (1999, 'university of texas health science center at houston'), (2000, 'university of texas health science center at houston'), (2001, 'university of texas health science center at houston'), (2002, 'university of texas health science center at houston'), (2002, 'university of texas at austin'), (2003, 'university of texas health science center at houston'), (2004, 'university of texas health science center at houston'), (2005, 'university of texas health science center at houston'), (2006, 'university of texas health science center at houston'), (2007, 'university of texas health science center at houston'), (2008, 'university of texas health science center at houston'), (2009, 'university of texas health science center at houston'), (2010, 'university of texas health science center at houston'), (2011, 'university of texas health science center at houston'), (2011, 'florida international university'), (2012, 'florida international university'), (2013, 'florida international university'), (2014, 'florida international university'), (2015, 'florida international university'), (2016, 'florida international university'), (2017, 'florida international university'), (2021, 'florida international university')) +all_us_institutions_year : ((1992, 'creighton university'), (1995, 'texas christian university'), (1996, 'university of miami'), (1996, 'university of texas health science center at houston'), (1997, 'university of miami'), (1997, 'university of texas health science center at houston'), (1998, 'university of miami'), (1998, 'university of texas health science center at houston'), (1999, 'university of texas health science center at houston'), (2000, 'university of miami'), (2000, 'university of texas health science center at houston'), (2001, 'university of miami'), (2001, 'university of texas health science center at houston'), (2002, 'university of texas at austin'), (2002, 'university of texas health science center at houston'), (2003, 'university of texas health science center at houston'), (2004, 'university of texas health science center at houston'), (2005, 'university of houston'), (2005, 'university of texas at austin'), (2005, 'university of texas health science center at houston'), (2006, 'university of texas at austin'), (2006, 'university of texas health science center at houston'), (2007, 'university of texas at austin'), (2007, 'university of texas health science center at houston'), (2008, 'university of texas at austin'), (2008, 'university of texas health science center at houston'), (2009, 'university of texas health science center at houston'), (2010, 'university of texas at austin'), (2010, 'university of texas health science center at houston'), (2011, 'florida international university'), (2011, 'university of texas health science center at houston'), (2012, 'florida international university'), (2012, 'university of texas at austin'), (2013, 'florida international university'), (2014, 'florida international university'), (2015, 'creighton university'), (2015, 'florida international university'), (2016, 'florida international university'), (2017, 'florida international university'), (2021, 'florida international university')) + +5/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : tridas +lastname : mukhopadhay +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'carnegie mellon university'),) +all_us_institutions_year : ((2005, 'carnegie mellon university'),) + +firstname : tridas +lastname : mukhopadhyay +middlename : None +year_range : (1989, 2019) +main_us_institutions_year : ((1989, 'carnegie mellon university'), (1991, 'carnegie mellon university'), (1992, 'carnegie mellon university'), (1994, 'carnegie mellon university'), (1995, 'carnegie mellon university'), (1996, 'carnegie mellon university'), (1997, 'carnegie mellon university'), (1998, 'carnegie mellon university'), (1999, 'carnegie mellon university'), (2001, 'carnegie mellon university'), (2002, 'carnegie mellon university'), (2003, 'carnegie mellon university'), (2004, 'carnegie mellon university'), (2005, 'carnegie mellon university'), (2006, 'carnegie mellon university'), (2007, 'carnegie mellon university'), (2008, 'carnegie mellon university'), (2009, 'carnegie mellon university'), (2011, 'carnegie mellon university'), (2012, 'carnegie mellon university'), (2014, 'carnegie mellon university'), (2015, 'carnegie mellon university'), (2016, 'carnegie mellon university'), (2017, 'carnegie mellon university'), (2019, 'carnegie mellon university')) +all_us_institutions_year : ((1989, 'carnegie mellon university'), (1991, 'carnegie mellon university'), (1992, 'carnegie mellon university'), (1994, 'carnegie mellon university'), (1995, 'carnegie mellon university'), (1996, 'carnegie mellon university'), (1997, 'carnegie mellon university'), (1998, 'carnegie mellon university'), (1999, 'carnegie mellon university'), (2000, 'carnegie mellon university'), (2001, 'carnegie mellon university'), (2002, 'carnegie mellon university'), (2003, 'carnegie mellon university'), (2004, 'carnegie mellon university'), (2005, 'carnegie mellon university'), (2006, 'carnegie mellon university'), (2007, 'carnegie mellon university'), (2008, 'carnegie mellon university'), (2009, 'carnegie mellon university'), (2010, 'carnegie mellon university'), (2011, 'carnegie mellon university'), (2012, 'carnegie mellon university'), (2014, 'carnegie mellon university'), (2015, 'carnegie mellon university'), (2016, 'carnegie mellon university'), (2017, 'carnegie mellon university'), (2019, 'carnegie mellon university'), (2020, 'carnegie mellon university')) + +5/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nancy +lastname : rose +middlename : l +year_range : (2001,) +main_us_institutions_year : ((2001, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2001, 'massachusetts institute of technology'),) + +firstname : jeffrey +lastname : rosenblum +middlename : l +year_range : (2015, 2020) +main_us_institutions_year : ((2015, 'massachusetts institute of technology'), (2018, 'massachusetts institute of technology'), (2020, 'massachusetts institute of technology')) +all_us_institutions_year : ((2015, 'massachusetts institute of technology'), (2018, 'massachusetts institute of technology'), (2020, 'massachusetts institute of technology')) + +6/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : peter +lastname : timmer +middlename : None +year_range : (1993,) +main_us_institutions_year : ((1993, 'harvard university'),) +all_us_institutions_year : ((1993, 'harvard university'),) + +firstname : venessa +lastname : timmerman +middlename : None +year_range : (2008, 2020) +main_us_institutions_year : ((2020, 'harvard university'),) +all_us_institutions_year : ((2020, 'harvard university'),) + +6/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : atkins +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'union institute'),) +all_us_institutions_year : ((1994, 'union institute'),) + +firstname : robert +lastname : atkin +middlename : s +year_range : (1989, 1997) +main_us_institutions_year : ((1989, 'university of pittsburgh'), (1996, 'university of pittsburgh'), (1997, 'university of pittsburgh')) +all_us_institutions_year : ((1989, 'university of pittsburgh'), (1996, 'university of pittsburgh'), (1997, 'university of pittsburgh')) + +6/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jim +lastname : holway +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'arizona state university'),) +all_us_institutions_year : ((2011, 'arizona state university'),) + +firstname : jim +lastname : ho +middlename : q +year_range : (2017, 2020) +main_us_institutions_year : ((2017, 'middlebury college'), (2018, 'middlebury college'), (2019, 'albert einstein college of medicine'), (2020, 'albert einstein college of medicine')) +all_us_institutions_year : ((2017, 'middlebury college'), (2017, 'united states department of veterans affairs'), (2018, 'middlebury college'), (2018, 'va palo alto healthcare system'), (2019, 'albert einstein college of medicine'), (2020, 'albert einstein college of medicine')) + +6/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ram +lastname : tenkaski +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'benedictine university'),) +all_us_institutions_year : ((2000, 'benedictine university'),) + +firstname : ramkrishnan +lastname : tenkasi +middlename : v +year_range : (1992, 2020) +main_us_institutions_year : ((1992, 'case western reserve university'), (1995, 'university of southern california'), (1996, 'university of southern california'), (2003, 'benedictine university'), (2004, 'benedictine university'), (2008, 'benedictine university'), (2016, 'benedictine university'), (2017, 'benedictine university')) +all_us_institutions_year : ((1992, 'case western reserve university'), (1994, 'case western reserve university'), (1995, 'university of southern california'), (1996, 'university of southern california'), (2003, 'benedictine university'), (2004, 'benedictine university'), (2008, 'benedictine university'), (2015, 'benedictine university'), (2016, 'benedictine university'), (2017, 'benedictine university')) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gary +lastname : olson +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of michigan'),) +all_us_institutions_year : ((1995, 'university of michigan'),) + +firstname : gary +lastname : olson +middlename : a +year_range : (1984, 2013) +main_us_institutions_year : ((1989, 'university of south florida'), (1992, 'university of south florida')) +all_us_institutions_year : ((1989, 'university of south florida'), (1990, 'university of south florida'), (1992, 'university of south florida')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mayuram +lastname : krishnan +middlename : s +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of michigan'),) +all_us_institutions_year : ((2006, 'university of michigan'),) + +firstname : m +lastname : krishnan +middlename : None +year_range : (2009, 2021) +main_us_institutions_year : ((2010, 'vanderbilt university medical center'), (2010, 'vanderbilt university'), (2011, 'vanderbilt university'), (2012, 'vanderbilt university'), (2013, 'university of california'), (2013, 'vanderbilt university medical center'), (2013, 'vanderbilt university'), (2014, 'university of california berkeley')) +all_us_institutions_year : ((2010, 'vanderbilt university'), (2010, 'vanderbilt university medical center'), (2011, 'vanderbilt university'), (2011, 'vanderbilt university medical center'), (2012, 'vanderbilt university'), (2013, 'university of california'), (2013, 'vanderbilt university'), (2013, 'vanderbilt university medical center'), (2014, 'university of california berkeley')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : anderson +middlename : c +year_range : (1998,) +main_us_institutions_year : ((1998, 'northwestern university'),) +all_us_institutions_year : ((1998, 'northwestern university'),) + +firstname : j +lastname : anderson +middlename : None +year_range : (1993, 2013) +main_us_institutions_year : ((1993, 'utah state university'), (1997, 'utah state university'), (2004, 'utah state university'), (2009, 'utah state university'), (2011, 'utah state university'), (2013, 'utah state university')) +all_us_institutions_year : ((1993, 'utah state university'), (1997, 'utah state university'), (2004, 'utah state university'), (2009, 'utah state university'), (2011, 'utah state university'), (2013, 'utah state university')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : joanne +lastname : barnes +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'indiana wesleyan university'),) +all_us_institutions_year : ((2014, 'indiana wesleyan university'),) + +firstname : j +lastname : barnes +middlename : c +year_range : (2007, 2021) +main_us_institutions_year : ((2007, 'university of south carolina'), (2008, 'florida state university'), (2009, 'florida state university'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2013, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2015, 'university of cincinnati'), (2016, 'university of cincinnati'), (2017, 'university of cincinnati'), (2018, 'university of cincinnati'), (2019, 'university of cincinnati'), (2020, 'university of cincinnati'), (2021, 'university of cincinnati')) +all_us_institutions_year : ((2007, 'university of south carolina'), (2008, 'florida state university'), (2009, 'florida state university'), (2010, 'florida state university'), (2010, 'university of texas at dallas'), (2011, 'university of texas at austin'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2013, 'florida state university'), (2013, 'university of texas at dallas'), (2014, 'university of cincinnati'), (2014, 'university of texas at dallas'), (2015, 'university of cincinnati'), (2015, 'university of texas at dallas'), (2016, 'university of cincinnati'), (2016, 'university of texas at dallas'), (2017, 'university of cincinnati'), (2018, 'university of cincinnati'), (2019, 'university of cincinnati'), (2020, 'university of cincinnati'), (2021, 'university of cincinnati')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : roberto +lastname : gutierrez +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of oregon'),) +all_us_institutions_year : ((2011, 'university of oregon'),) + +firstname : r +lastname : gutierrez +middlename : j +year_range : (1973, 2021) +main_us_institutions_year : ((1981, 'humboldt state university'), (1983, 'university of california berkeley'), (1987, 'humboldt state university'), (1989, 'humboldt state university'), (1990, 'humboldt state university'), (1991, 'humboldt state university'), (1992, 'humboldt state university'), (1994, 'humboldt state university'), (1995, 'humboldt state university'), (1996, 'humboldt state university'), (1998, 'humboldt state university'), (1999, 'humboldt state university'), (2000, 'humboldt state university'), (2001, 'humboldt state university'), (2001, 'university of minnesota'), (2002, 'humboldt state university'), (2002, 'university of minnesota'), (2003, 'humboldt state university'), (2003, 'university of minnesota'), (2004, 'university of minnesota'), (2005, 'university of minnesota'), (2006, 'university of minnesota'), (2007, 'university of minnesota'), (2008, 'university of minnesota'), (2009, 'university of minnesota'), (2010, 'university of minnesota'), (2011, 'university of minnesota'), (2012, 'university of minnesota'), (2013, 'university of minnesota'), (2014, 'university of minnesota'), (2015, 'university of minnesota'), (2016, 'university of minnesota'), (2017, 'university of minnesota'), (2018, 'university of wisconsin madison'), (2019, 'university of wisconsin madison'), (2020, 'university of wisconsin madison'), (2021, 'university of wisconsin madison')) +all_us_institutions_year : ((1981, 'humboldt state university'), (1983, 'university of california berkeley'), (1987, 'humboldt state university'), (1989, 'humboldt state university'), (1990, 'humboldt state university'), (1991, 'humboldt state university'), (1992, 'humboldt state university'), (1994, 'humboldt state university'), (1995, 'humboldt state university'), (1996, 'humboldt state university'), (1998, 'humboldt state university'), (1999, 'humboldt state university'), (2000, 'humboldt state university'), (2001, 'humboldt state university'), (2001, 'university of minnesota'), (2002, 'humboldt state university'), (2002, 'university of minnesota'), (2003, 'humboldt state university'), (2003, 'university of minnesota'), (2004, 'university of minnesota'), (2005, 'university of minnesota'), (2006, 'university of minnesota'), (2007, 'university of minnesota'), (2008, 'university of minnesota'), (2009, 'university of minnesota'), (2010, 'university of minnesota'), (2011, 'university of minnesota'), (2012, 'university of minnesota'), (2013, 'university of minnesota'), (2014, 'university of minnesota'), (2015, 'university of minnesota'), (2016, 'university of minnesota'), (2017, 'university of minnesota'), (2017, 'university of wisconsin madison'), (2018, 'university of minnesota'), (2018, 'university of wisconsin madison'), (2019, 'university of minnesota'), (2019, 'university of wisconsin madison'), (2020, 'university of minnesota'), (2020, 'university of wisconsin madison'), (2021, 'university of minnesota'), (2021, 'university of wisconsin madison')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : miller +middlename : mark +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of tennessee'),) +all_us_institutions_year : ((1990, 'university of tennessee'),) + +firstname : mark +lastname : miller +middlename : m +year_range : (1993, 2017) +main_us_institutions_year : ((2001, 'university of southern mississippi'), (2003, 'university of southern mississippi'), (2007, 'university of southern mississippi'), (2008, 'university of southern mississippi'), (2011, 'university of southern mississippi'), (2016, 'university of southern mississippi'), (2017, 'university of southern mississippi')) +all_us_institutions_year : ((2001, 'university of southern mississippi'), (2003, 'university of southern mississippi'), (2007, 'university of southern mississippi'), (2008, 'university of southern mississippi'), (2011, 'university of southern mississippi'), (2016, 'university of southern mississippi'), (2017, 'university of southern mississippi')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : akhil +lastname : kumar +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'pennsylvania state university'),) +all_us_institutions_year : ((2011, 'pennsylvania state university'),) + +firstname : a +lastname : kumar +middlename : None +year_range : (1971, 2019) +main_us_institutions_year : ((1983, 'bell labs'),) +all_us_institutions_year : ((1983, 'bell labs'),) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : d +lastname : lee +middlename : scott +year_range : (2002,) +main_us_institutions_year : ((2002, 'texas a m university college station'),) +all_us_institutions_year : ((2002, 'texas a m university college station'),) + +firstname : dongyoung +lastname : lee +middlename : None +year_range : (2011, 2020) +main_us_institutions_year : ((2011, 'university of utah'),) +all_us_institutions_year : ((2010, 'university of utah'), (2011, 'university of utah')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : steven +lastname : berry +middlename : t +year_range : (1998,) +main_us_institutions_year : ((1998, 'yale university'),) +all_us_institutions_year : ((1998, 'yale university'),) + +firstname : steve +lastname : berry +middlename : None +year_range : (2002, 2017) +main_us_institutions_year : ((2007, 'university of colorado colorado springs'),) +all_us_institutions_year : ((2007, 'university of colorado colorado springs'),) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : kothari +middlename : p +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of rochester'),) +all_us_institutions_year : ((1997, 'university of rochester'),) + +firstname : siddharth +lastname : kothari +middlename : None +year_range : (2013, 2021) +main_us_institutions_year : ((2013, 'stanford university'), (2021, 'international monetary fund')) +all_us_institutions_year : ((2013, 'stanford university'), (2014, 'international monetary fund'), (2014, 'stanford university'), (2016, 'international monetary fund'), (2018, 'international monetary fund'), (2020, 'international monetary fund'), (2021, 'international monetary fund')) + +7/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hd +lastname : vinod +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'fordham university'),) +all_us_institutions_year : ((2008, 'fordham university'),) + +firstname : hrishikesh +lastname : vinod +middlename : d +year_range : (1968, 2020) +main_us_institutions_year : ((1970, 'mathematica policy research'), (1973, 'bell labs'), (1976, 'bell labs'), (1978, 'at t corporation'), (1979, 'at t corporation'), (1980, 'at t corporation'), (1982, 'bell labs'), (1984, 'fordham university'), (1985, 'fordham university'), (1987, 'fordham university'), (1989, 'fordham university'), (1993, 'fordham university'), (1994, 'fordham university'), (1995, 'fordham university'), (1996, 'fordham university'), (1997, 'fordham university'), (1998, 'fordham university'), (1999, 'fordham university'), (2000, 'fordham university'), (2002, 'fordham university'), (2003, 'fordham university'), (2004, 'fordham university'), (2006, 'fordham university'), (2008, 'fordham university'), (2009, 'fordham university'), (2013, 'fordham university'), (2014, 'fordham university'), (2017, 'fordham university'), (2019, 'fordham university'), (2020, 'fordham university')) +all_us_institutions_year : ((1970, 'mathematica policy research'), (1973, 'bell labs'), (1976, 'bell labs'), (1978, 'at t corporation'), (1979, 'at t corporation'), (1980, 'at t corporation'), (1982, 'bell labs'), (1984, 'fordham university'), (1985, 'fordham university'), (1987, 'fordham university'), (1989, 'fordham university'), (1990, 'fordham university'), (1993, 'fordham university'), (1994, 'fordham university'), (1995, 'fordham university'), (1996, 'fordham university'), (1997, 'fordham university'), (1998, 'fordham university'), (1999, 'fordham university'), (2000, 'fordham university'), (2001, 'fordham university'), (2002, 'fordham university'), (2003, 'fordham university'), (2004, 'brooklyn law school'), (2004, 'fordham university'), (2005, 'fordham university'), (2006, 'fordham university'), (2007, 'fordham university'), (2008, 'fordham university'), (2009, 'fordham university'), (2010, 'fordham university'), (2011, 'fordham university'), (2012, 'fordham university'), (2013, 'fordham university'), (2014, 'fordham university'), (2015, 'fordham university'), (2016, 'fordham university'), (2017, 'fordham university'), (2018, 'fordham university'), (2019, 'fordham university'), (2020, 'fordham university'), (2021, 'fordham university')) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : wells +middlename : None +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of minnesota'),) +all_us_institutions_year : ((1997, 'university of minnesota'),) + +firstname : william +lastname : wells +middlename : None +year_range : (2002, 2021) +main_us_institutions_year : ((2002, 'southern illinois university carbondale'), (2003, 'southern illinois university carbondale'), (2004, 'southern illinois university carbondale'), (2005, 'southern illinois university carbondale'), (2006, 'southern illinois university carbondale'), (2007, 'southern illinois university carbondale'), (2007, 'sam houston state university'), (2010, 'sam houston state university'), (2011, 'sam houston state university'), (2012, 'sam houston state university'), (2015, 'sam houston state university'), (2016, 'sam houston state university'), (2017, 'sam houston state university'), (2018, 'sam houston state university'), (2019, 'sam houston state university'), (2020, 'sam houston state university'), (2021, 'sam houston state university')) +all_us_institutions_year : ((2002, 'southern illinois university carbondale'), (2003, 'southern illinois university carbondale'), (2004, 'southern illinois university carbondale'), (2005, 'southern illinois university carbondale'), (2006, 'southern illinois university carbondale'), (2007, 'sam houston state university'), (2007, 'southern illinois university carbondale'), (2010, 'sam houston state university'), (2011, 'sam houston state university'), (2012, 'bowling green state university'), (2012, 'sam houston state university'), (2015, 'sam houston state university'), (2016, 'sam houston state university'), (2017, 'sam houston state university'), (2018, 'sam houston state university'), (2019, 'sam houston state university'), (2020, 'sam houston state university'), (2021, 'sam houston state university')) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : eric +lastname : eisenstein +middlename : m +year_range : (2013,) +main_us_institutions_year : ((2013, 'temple university'),) +all_us_institutions_year : ((2013, 'temple university'),) + +firstname : eric +lastname : eisenstein +middlename : m +year_range : (2006, 2008) +main_us_institutions_year : ((2006, 'cornell university'), (2008, 'cornell university')) +all_us_institutions_year : ((2006, 'cornell university'), (2008, 'cornell university')) + +8/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : haitao +lastname : li +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of michigan'),) +all_us_institutions_year : ((2012, 'university of michigan'),) + +firstname : haitao +lastname : li +middlename : None +year_range : (2009, 2021) +main_us_institutions_year : ((2009, 'university of missouri st louis'), (2011, 'university of missouri st louis'), (2011, 'chinese academy of sciences'), (2012, 'university of missouri st louis'), (2013, 'university of missouri st louis'), (2014, 'university of missouri st louis'), (2015, 'university of missouri st louis'), (2015, 'chinese academy of sciences'), (2016, 'university of missouri st louis'), (2016, 'chinese academy of sciences'), (2017, 'university of missouri st louis'), (2019, 'university of missouri st louis'), (2020, 'university of missouri st louis'), (2021, 'university of missouri st louis')) +all_us_institutions_year : ((2009, 'university of missouri st louis'), (2011, 'chinese academy of sciences'), (2011, 'university of missouri st louis'), (2012, 'university of missouri st louis'), (2013, 'university of missouri'), (2013, 'university of missouri st louis'), (2014, 'chinese academy of sciences'), (2014, 'university of missouri st louis'), (2015, 'chinese academy of sciences'), (2015, 'university of missouri st louis'), (2016, 'chinese academy of sciences'), (2016, 'university of missouri st louis'), (2017, 'university of missouri st louis'), (2019, 'university of missouri st louis'), (2020, 'university of missouri st louis'), (2021, 'university of missouri st louis')) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : p +lastname : varadarajan +middlename : rajan +year_range : (1999,) +main_us_institutions_year : ((1999, 'texas a m university college station'),) +all_us_institutions_year : ((1999, 'texas a m university college station'),) + +firstname : p +lastname : varadarajan +middlename : rajan +year_range : (1985, 2013) +main_us_institutions_year : ((2013, 'university of massachusetts amherst'),) +all_us_institutions_year : ((2013, 'university of massachusetts amherst'),) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lynne +lastname : zucker +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of southern california'),) +all_us_institutions_year : ((2007, 'university of southern california'),) + +firstname : lynne +lastname : zucker +middlename : g +year_range : (1977, 2020) +main_us_institutions_year : ((1987, 'university of california los angeles'), (1996, 'university of california los angeles'), (1997, 'university of california los angeles'), (1998, 'university of california los angeles'), (2002, 'university of california los angeles'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2007, 'university of california los angeles'), (2007, 'national bureau of economic research'), (2009, 'duke university'), (2016, 'university of california los angeles'), (2019, 'university of california los angeles')) +all_us_institutions_year : ((1987, 'university of california los angeles'), (1994, 'national bureau of economic research'), (1994, 'university of california los angeles'), (1995, 'national bureau of economic research'), (1995, 'university of california los angeles'), (1996, 'national bureau of economic research'), (1996, 'university of california los angeles'), (1997, 'national bureau of economic research'), (1997, 'university of california los angeles'), (1998, 'national bureau of economic research'), (1998, 'university of california los angeles'), (1999, 'national bureau of economic research'), (1999, 'university of california los angeles'), (2001, 'national bureau of economic research'), (2001, 'university of california los angeles'), (2002, 'national bureau of economic research'), (2002, 'university of california los angeles'), (2003, 'national bureau of economic research'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2005, 'university of california los angeles'), (2006, 'national bureau of economic research'), (2006, 'university of california los angeles'), (2007, 'national bureau of economic research'), (2007, 'university of california los angeles'), (2008, 'national bureau of economic research'), (2008, 'university of california los angeles'), (2009, 'duke university'), (2011, 'national bureau of economic research'), (2011, 'university of california los angeles'), (2014, 'national bureau of economic research'), (2014, 'university of california los angeles'), (2015, 'national bureau of economic research'), (2015, 'university of california los angeles'), (2016, 'university of california los angeles'), (2019, 'university of california los angeles')) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christine +lastname : mcphail +middlename : johnson +year_range : (2003,) +main_us_institutions_year : ((2003, 'morgan state university'),) +all_us_institutions_year : ((2003, 'morgan state university'),) + +firstname : christine +lastname : mcphail +middlename : johnson +year_range : (2016, 2019) +main_us_institutions_year : ((2016, 'american university'), (2019, 'american university')) +all_us_institutions_year : ((2016, 'american university'), (2019, 'american university')) + +9/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : wayne +lastname : lee +middlename : y +year_range : (1998,) +main_us_institutions_year : ((1998, 'kent state university'),) +all_us_institutions_year : ((1998, 'kent state university'),) + +firstname : wayne +lastname : lee +middlename : y +year_range : (1986, 1997) +main_us_institutions_year : ((1986, 'santa clara university'), (1990, 'santa clara university')) +all_us_institutions_year : ((1986, 'santa clara university'), (1990, 'santa clara university'), (1997, 'university of arkansas')) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lyle +lastname : yorks +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'teachers college columbia university'),) +all_us_institutions_year : ((2012, 'teachers college columbia university'),) + +firstname : lyle +lastname : yorks +middlename : None +year_range : (1976, 2020) +main_us_institutions_year : ((1985, 'eastern connecticut state university'), (2001, 'columbia university'), (2002, 'columbia university'), (2004, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2008, 'eastern connecticut state university'), (2009, 'columbia university'), (2010, 'columbia university'), (2011, 'columbia university'), (2012, 'columbia university'), (2013, 'columbia university'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2018, 'columbia university'), (2020, 'columbia university')) +all_us_institutions_year : ((1985, 'eastern connecticut state university'), (2001, 'columbia university'), (2002, 'columbia university'), (2004, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2008, 'eastern connecticut state university'), (2009, 'columbia university'), (2010, 'columbia university'), (2011, 'columbia university'), (2012, 'columbia university'), (2013, 'columbia university'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2018, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university')) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : patricia +lastname : walker +middlename : hinton +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of rochester school of nursing'),) +all_us_institutions_year : ((1997, 'university of rochester school of nursing'),) + +firstname : patricia +lastname : walker +middlename : hinton +year_range : (1994, 2016) +main_us_institutions_year : ((2002, 'uniformed services university of the health sciences'), (2005, 'uniformed services university of the health sciences'), (2008, 'uniformed services university of the health sciences'), (2010, 'uniformed services university of the health sciences'), (2013, 'uniformed services university of the health sciences'), (2014, 'uniformed services university of the health sciences'), (2016, 'uniformed services university of the health sciences')) +all_us_institutions_year : ((2002, 'uniformed services university of the health sciences'), (2005, 'uniformed services university of the health sciences'), (2008, 'uniformed services university of the health sciences'), (2010, 'uniformed services university of the health sciences'), (2013, 'uniformed services university of the health sciences'), (2014, 'uniformed services university of the health sciences'), (2015, 'uniformed services university of the health sciences'), (2016, 'uniformed services university of the health sciences')) + +10/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : choi +middlename : jay +year_range : (1996,) +main_us_institutions_year : ((1996, 'temple university'),) +all_us_institutions_year : ((1996, 'temple university'),) + +firstname : jaeyoung +lastname : choi +middlename : None +year_range : (1990, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2013, 'world bank'), (2015, 'georgia institute of technology')) + +11/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 56.79188270171483 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_business_christoph_degree0_graduates_8515.log b/src/dataprep/temp/trainlink_mag_proquest_business_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..bfebda1 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_business_christoph_degree0_graduates_8515.log @@ -0,0 +1,577 @@ +Namespace(testing=False, verbose=1, field=['business'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=50000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [144133560] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0007546504338582356 minutes + + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +Time elapsed: 26.608677724997204 minutes + +Starting active labeling... +firstname : julian +lastname : clair +middlename : k saint +year : 2010 +year_papertitle : ((2010, 'all positive emotions are not equal cognitive and motivational differences between pride and surprise'), (2013, 'identity regulation theory and implications of multiple identity management'), (2015, 'consumer uncertainty and purchase decision reversals theory and evidence'), (2017, 'fight fire with fire using one consumer stereotype to overcome another via conceptual contingency learning')) +keywords : frozenset({'social psychology', 'microeconomics', 'marketing'}) + +firstname : julian +lastname : clair +middlename : k saint +year : 2013 +year_papertitle : ((2013, 'identity regulation theory and implications of multiple identity management'),) +keywords : frozenset({'marketing'}) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : syed +lastname : zaidi +middlename : kashif raza +year : 2014 +year_papertitle : ((2014, 'ifrs adoption and enforcement as antecedents of economic growth'), (2014, 'the impact of ceo stock option expensing as per sfas 123 r on earnings quality'), (2020, 'diffusion of ifrs using innovation diffusion models')) +keywords : frozenset({'accounting'}) + +firstname : syed +lastname : zaidi +middlename : kashif raza +year : 2012 +year_papertitle : ((2012, 'ifrs adoption and enforcement as antecedents of economic growth'),) +keywords : frozenset({'accounting'}) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : catherine +lastname : soule +middlename : armstrong +year : 2013 +year_papertitle : ((2013, 'not all anchors weigh the same anchoring and framing effects in pay what you want pricing'), (2014, 'buy less buy better consumer response to green demarketing strategies'), (2014, 'duped scammed and suckered the development of the sugrophobia scale'), (2015, 'anchors and norms in anonymous pay what you want pricing contexts'), (2015, 'less is more is a green demarketing strategy sustainable'), (2016, 'green demarketing in advertisements comparing buy green and buy less appeals in product and institutional advertising contexts'), (2017, '13 g new with tags consumer and brand relationships in consumer to consumer buy sell trade groups on social media'), (2017, '14 i signaling nothing conspicuous anti consumption in demarketing contexts'), (2017, 'fans and brands delineating between fandoms brand communities and brand publics'), (2018, 'buying unicorns the impact of consumer to consumer branded buy sell trade communities on traditional retail buying behavior')) +keywords : frozenset({'advertising', 'marketing', 'public relations', 'environmental resource management'}) + +firstname : catherine +lastname : soule +middlename : a armstrong +year : 2014 +year_papertitle : ((2014, 'anchors norms and dual processes exploring decision making in pay what you want pricing contexts'),) +keywords : frozenset({'marketing'}) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stacey +lastname : baker +middlename : menzel +year : 1994 +year_papertitle : ((1994, 'death by nostalgia a diagnosis of context specific cases'), (1995, 'the role of possessions in creating maintaining and preserving one s identity variation over the life course'), (1996, 'an ethnography of mick s sports card show preliminary findings from the field'), (1996, 'kids as collectors a phenomenological study of first and fifth graders'), (1999, 'motivations and commitments among participants in the great texas birding classic'), (1999, 'special session summary dialogues with visually impaired and color blind consumers psychological socio cultural and social policy perspectives on an emerging issue in consumer research'), (2000, 'santa claus does more than deliver toys advertising s commercialization of the collective memory of americans'), (2001, 'customer participation in creating site brand loyalty'), (2001, 'marketing and public accommodation a retrospective on title iii of the americans with disabilities act'), (2001, 'marketplace experiences of consumers with visual impairments beyond the americans with disabilities act')) +keywords : frozenset({'mathematics education', 'social psychology', 'advertising', 'public relations', 'media studies', 'gender studies', 'law', 'marketing', 'economic geography'}) + +firstname : stacey +lastname : baker +middlename : karlene menzel +year : 1996 +year_papertitle : ((1996, 'task associative ego and extrinsic goal orientations an experiential analysis of collectors search'),) +keywords : frozenset({'personality', 'marketing'}) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yohannes +lastname : hailu +middlename : g +year : 2004 +year_papertitle : ((2004, 'modeling migration effects on agricultural lands a growth equilibrium model'), (2007, 'regional growth impacts on agricultural land development a spatial model for three states'), (2010, 'chemical use reductions in urban fringe agriculture'), (2010, 'effects of renewable energy policies on wind industry development in the us'), (2010, 'evidence of land hoarding behavior in us agriculture'), (2011, 'endogenizing the planning horizon in urban fringe agriculture')) +keywords : frozenset({'natural resource economics', 'economy', 'agricultural economics', 'economic growth'}) + +firstname : yohannes +lastname : hailu +middlename : m +year : 1998 +year_papertitle : ((1998, 'factors affecting housing preference under conditions of limited choice the case of addis abeba'),) +keywords : frozenset({'economics', 'finance'}) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sarah +lastname : moore +middlename : a +year : 2006 +year_papertitle : ((2006, 'forgotten roots of the green city subsistence gardening in columbus ohio 1900 1940'), (2007, 'in the nature of cities urban political ecology and the politics of urban metabolism nik heynen maria kaika and erik swyngedouw eds'), (2008, 'the politics of garbage in oaxaca mexico'), (2009, 'the excess of modernity garbage politics in oaxaca mexico'), (2010, 'environment and society a critical introduction'), (2010, 'jutta gutberlet recovering resources recycling citizenship urban poverty reduction in latin america'), (2010, 'reading joel wainwright s decolonizing development colonial power and the maya'), (2010, 'the people s property power politics and the public lynn staeheli and don mitchell'), (2012, 'designing nature for learning school gardens for youth and child education'), (2012, 'garbage matters concepts in new geographies of waste')) +keywords : frozenset({'economic history', 'economy', 'development economics', 'environmental protection', 'pedagogy', 'public administration', 'environmental ethics', 'media studies', 'social science', 'ethnology', 'political economy', 'economic growth'}) + +firstname : sarah +lastname : moore +middlename : g +year : 2009 +year_papertitle : ((2009, 'some things are better left unsaid how word of mouth influences the speaker'),) +keywords : frozenset({'social psychology', 'linguistics', 'marketing'}) + +4/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lakshmi +lastname : goel +middlename : None +year : 2006 +year_papertitle : ((2006, 'weblogging implementing communities of practice'), (2007, 'seeking dragons in is research'), (2007, 'vcrm virtual customer relationship management'), (2008, 'a proposed framework for designing sustainable communities for knowledge management systems'), (2009, 'a life cycle model of virtual communities'), (2009, 'exploring the dynamics of blog communities the case of metafilter'), (2009, 'half day workshop on the value of corporate wikis'), (2009, 'if you build it will they come an empirical investigation of consumer perceptions and strategy in virtual worlds'), (2009, 'the socialness of virtual worlds'), (2010, 'situated learning conceptualization and measurement')) +keywords : frozenset({'knowledge management', 'cognitive science', 'public relations', 'social psychology', 'gender studies', 'marketing'}) + +firstname : lakshmi +lastname : goel +middlename : None +year : 2008 +year_papertitle : ((2008, 'situated learning in virtual worlds'),) +keywords : frozenset({'social psychology', 'cognitive psychology', 'management'}) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : steven +lastname : hanke +middlename : a +year : 2012 +year_papertitle : ((2012, 'a censored quantile regression analysis of employee stock options substitution for debt and the impact of sfas 123r'), (2012, 'a two state analysis of estate taxes and charitable bequests from the most generous decedents'), (2012, 'an analysis of tax court cases relevant to financial planners'), (2012, 'why do small businesses take on high levels of external loans a censored quantile regression analysis'), (2013, 'the impact of double taxation on small firms cash holdings'), (2019, 'an assessment of pawnbroker succession planning')) +keywords : frozenset({'macroeconomics', 'monetary economics', 'actuarial science', 'finance', 'accounting', 'environmental planning'}) + +firstname : steven +lastname : hanke +middlename : None +year : 2008 +year_papertitle : ((2008, 'tax policy questions regarding the federal estate tax and charitable bequests a two state analysis of probate records'),) +keywords : frozenset({'accounting'}) + +5/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jing +lastname : liu +middlename : None +year : 2000 +year_papertitle : ((2000, 'stock returns and accounting earnings'), (2000, 'the feltham ohlson 1995 model empirical implications'), (2002, 'equity valuation using multiples'), (2002, 'measuring value relevance in a possibly inefficient market'), (2004, 'discussion of the role of expectations in explaining the cross section of stock returns'), (2004, 'valuation and accounting for inflation and foreign exchange'), (2005, 'earnings quality insider trading and cost of capital'), (2006, 'on international accounting valuation'), (2007, 'information asymmetry diversification and cost of capital'), (2007, 'is cash flow king in valuations')) +keywords : frozenset({'monetary economics', 'microeconomics', 'financial economics', 'accounting', 'econometrics'}) + +firstname : jing +lastname : li +middlename : None +year : 2009 +year_papertitle : ((2009, 'accounting conservatism debt contracts and financial institutions'),) +keywords : frozenset({'accounting'}) + +6/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jorge +lastname : fernandezbaca +middlename : None +year : 1988 +year_papertitle : ((1988, 'la economia informal y la demanda de dinero en el peru 1950 1986 en torno a una polemica sobre los fundamentos estadisticos de el otro sendero'), (1989, 'reformas politicas y eficiencia economica hacia un analisis economico de la democracia los fundamentos del neo institucionalismo'), (1990, 'el problema del atraso cambiario consideraciones teoricas y practicas'), (1991, 'la importancia de la democracia para los economistas'), (1992, 'sexo divorcio y machismo en torno a gary becker premio nobel de economia 1992'), (1994, 'douglass north y la nueva historia economica')) +keywords : frozenset({'economy', 'development economics', 'positive economics', 'welfare economics', 'political economy', 'law', 'economic geography'}) + +firstname : jorge +lastname : fernandez +middlename : trullen +year : 2007 +year_papertitle : ((2007, 'quality evaluations and their impacts the roles of legitimacy and significance'),) +keywords : frozenset({'colleges universities and professional schools', 'educational administration', 'management'}) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yu +lastname : xiong +middlename : None +year : 2008 +year_papertitle : ((2008, 'pricing strategy of service provider under buyer driven pricing model'), (2008, 'value uncertain in advance selling the impact of offering refunds for cancellations'), (2009, 'robust dynamic pricing over infinite horizon in the presence of model uncertainty'), (2009, 'tracking the innovation epidemic a framework for innovation measurement and diffusion within the pharmaceutical sector'), (2010, 'dynamic pricing model and algorithm for perishable products with fuzzy demand'), (2011, 'coordination and incentive mechanisms on collaborative investment in the supply chain'), (2011, 'supply chain competition with information sharing'), (2012, 'bricks vs clicks the impact of manufacturer encroachment with a dealer leasing and selling of durable goods'), (2012, 'pathways to supply chain excellence'), (2013, 'the bright side of manufacturing remanufacturing conflict in a decentralised closed loop supply chain')) +keywords : frozenset({'industrial organization', 'mathematical economics', 'microeconomics', 'knowledge management', 'process management', 'mathematical optimization', 'algorithm', 'commerce', 'operations management', 'marketing', 'manufacturing engineering'}) + +firstname : yu +lastname : xia +middlename : None +year : 2004 +year_papertitle : ((2004, 'market segmentation and pricing strategies by logistic efficiency in two echelon supply chain'),) +keywords : frozenset({'marketing', 'management'}) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : shawn +lastname : walker +middlename : None +year : 2010 +year_papertitle : ((2010, 'budget mapping increasing citizen understanding of government via interactive design'), (2011, 'fifteen minutes of fame the power of blogs in the lifecycle of viral political information'), (2012, 'building understanding of smart city initiatives'), (2012, 'smart cities and service integration initiatives in north american cities a status report'), (2012, 'understanding smart cities an integrative framework'), (2014, 'a model of crowd enabled organization theory and methods for understanding the role of twitter in the occupy protests'), (2014, 'big data big questions working within a black box transparency in the collection and production of big twitter data'), (2014, 'organization in the crowd looking ahead'), (2014, 'organization in the crowd peer production in large scale networked protests'), (2014, 'working within a black box transparency in the collection and production of big twitter data')) +keywords : frozenset({'public administration', 'knowledge management', 'advertising', 'public relations', 'environmental resource management', 'world wide web', 'internet privacy', 'data science', 'economic growth'}) + +firstname : sean +lastname : walker +middlename : c +year : 2012 +year_papertitle : ((2012, 'the nonconscious antecedents of group processes an experimental analysis of the priming of group beliefs'),) +keywords : frozenset({'cognitive psychology', 'organizational behavior', 'management'}) + +6/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : neil +lastname : morgan +middlename : a +year : 1988 +year_papertitle : ((1988, 'successful growth by acquisition'), (1989, 'developing information strategies in the uk financial services sector'), (1989, 'marketing organisation in the uk financial services industry'), (1990, 'corporate legal advice and client quality perceptions'), (1990, 'internal marketing making marketing happen'), (1990, 'organisational context and behavioural problems as determinants of the effectiveness of the strategic marketing planning process'), (1990, 'professional accountancy firms and marketing'), (1991, 'an exploratory study of market orientation in the u k consulting engineering profession'), (1991, 'barriers to marketing implementation in u k professional service firms'), (1991, 'professional services marketing')) +keywords : frozenset({'public relations', 'finance', 'accounting', 'marketing'}) + +firstname : michael +lastname : morgan +middlename : scott +year : 1990 +year_papertitle : ((1990, 'heterogeneity in brand choice under the zero order assumption a theoretical behavioral and empirical investigation'),) +keywords : frozenset({'experiments', 'psychology', 'marketing', 'business costs'}) + +6/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mary +lastname : luque +middlename : f sully de +year : 2000 +year_papertitle : ((2000, 'the impact of culture on feedback seeking behavior an integrated model and propositions'), (2006, 'a failure of scholarship response to george graen s critique of globe'), (2006, 'conceptualizing and measuring cultures and their consequences a comparative review of globe s and hofstede s approaches'), (2006, 'cultural and leadership predictors of corporate social responsibility values of top management a globe study of 15 countries'), (2006, 'in the eye of the beholder cross cultural lessons in leadership from project globe')) +keywords : frozenset({'positive economics', 'social psychology', 'public relations', 'social science', 'marketing'}) + +firstname : mary +lastname : luque +middlename : frances sully de +year : 2000 +year_papertitle : ((2000, 'the impact of cross cultural differences on feedback seeking behavior tendency to engage strategy type and preferred source choice'),) +keywords : frozenset({'business administration', 'cultural anthropology', 'management'}) + +6/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : baojiang +lastname : chen +middlename : None +year : 2006 +year_papertitle : ((2006, 'a note on minimum aberration and clear criteria'), (2006, 'some results on blocked regular 2 level fractional factorial designs with clear effects'), (2009, 'likelihood analysis of joint marginal and conditional models for longitudinal categorical data'), (2010, 'analysis of interval censored disease progression data via multi state models under a nonignorable inspection process'), (2010, 'estimating functions for evaluating treatment effects in cluster randomized longitudinal studies in the presence of drop out and non compliance'), (2010, 'stability of etiologic dementia diagnoses in the national alzheimer s coordinating center nacc uniform data set'), (2010, 'weighted generalized estimating functions for longitudinal response and covariate data that are missing at random'), (2011, 'doubly robust estimates for binary longitudinal data analysis with missing response and missing covariates'), (2011, 'non homogeneous markov process models with informative observations with an application to alzheimer s disease'), (2011, 'progressive multi state models for informatively incomplete longitudinal data')) +keywords : frozenset({'combinatorics', 'data mining', 'psychiatry', 'artificial intelligence', 'calculus', 'statistics', 'econometrics', 'forestry', 'machine learning'}) + +firstname : yao +lastname : chen +middlename : None +year : 1991 +year_papertitle : ((1991, 'distributions for asset returns'),) +keywords : frozenset({'computer science', 'statistics', 'finance'}) + +7/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : li +lastname : chen +middlename : None +year : 2008 +year_papertitle : ((2008, 'analysis of book resale in amazon upgrade framework a game theory approach'), (2009, 'iphone or kindle competition of electronic books sales')) +keywords : frozenset({'industrial organization', 'microeconomics', 'commerce'}) + +firstname : ling +lastname : chen +middlename : None +year : 2010 +year_papertitle : ((2010, 'option pricing and hedging with transaction costs'),) +keywords : frozenset({'statistics', 'finance'}) + +7/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : steven +lastname : smith +middlename : r +year : 1986 +year_papertitle : ((1986, 'disabled newborns and the federal child abuse amendments tenuous protection'), (1989, 'a crazy system mental health care delivery in america')) +keywords : frozenset({'family medicine', 'psychiatry'}) + +firstname : steven +lastname : smith +middlename : howard +year : 2000 +year_papertitle : ((2000, 'tax accounting choice the costs of corporate tax aggressiveness'),) +keywords : frozenset({'accounting'}) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : anil +lastname : maheshwari +middlename : None +year : 1990 +year_papertitle : ((1990, 'an optimal algorithm for computing a minimum nested nonconvex polygon'), (1991, 'computing the shortest path tree in a weak visibility polygon'), (1992, 'an optimal parallel algorithm for computing furthest neighbors in a tree'), (1992, 'parallel algorithms for all minimum link paths and link center problems'), (1992, 'sharing perspectives in distributed decision making'), (1993, 'characterizing and recognizing weak visibility polygons'), (1993, 'multi list ranking complexity and applications'), (1993, 'optimal crew pram algorithms for direct dominance problems'), (1993, 'parallel algorithms for rectilinear link distance problems'), (1994, 'a simple optimal parallel algorithm for reporting paths in a tree')) +keywords : frozenset({'combinatorics', 'knowledge management', 'mathematical optimization', 'algorithm', 'parallel computing', 'discrete mathematics'}) + +firstname : anil +lastname : maheshwari +middlename : kumar +year : 1996 +year_papertitle : ((1996, 'learning and information technology an experimental investigation of computer based representation to support reflective thinking'),) +keywords : frozenset({'psychology', 'experiments', 'higher education', 'management'}) + +7/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : hesford +middlename : w +year : 2011 +year_papertitle : ((2011, 'a social network analysis of the literature on management control'), (2012, 'the social structure of communication in major accounting research journals'), (2016, 'turnover and unit level financial performance an analysis of the costs and benefits of voluntary and involuntary turnover in unskilled jobs'), (2018, 'sunrise hotels an integrated managerial accounting teaching case')) +keywords : frozenset({'labour economics', 'public relations', 'data science', 'knowledge management'}) + +firstname : james +lastname : hesford +middlename : wesley +year : 1998 +year_papertitle : ((1998, 'determinants of the use of competitors accounting information by competitive intelligence professionals'),) +keywords : frozenset({'business administration', 'accounting', 'marketing'}) + +7/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : raynor +middlename : j +year : 1986 +year_papertitle : ((1986, 'automatic smoothing of regression functions in generalized linear models'), (1989, 'comparison of disposable diapers with fluff absorbent and fluff plus absorbent polymers effects on skin hydration skin ph and diaper dermatitis')) +keywords : frozenset({'dermatology', 'mathematical optimization', 'food science'}) + +firstname : william +lastname : raynor +middlename : judson +year : 1999 +year_papertitle : ((1999, 'the impact of nafta on employment the perception of business economics faculty in mexico canada and the united states'),) +keywords : frozenset({'international law', 'labor economics', 'international relations', 'management'}) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : francis +lastname : mediavilla +middlename : a mendez +year : 2009 +year_papertitle : ((2009, 'approximate queries on distributed data marts'), (2010, 'a bayesian method for query approximation'), (2011, 'information privacy implementation and perception of laws and corporate policies by ceos and managers'), (2012, 'approximation queries for building energy aware data warehouses on mobile ad hoc networks'), (2012, 'construction analysis of rainwater harvesting systems'), (2012, 'critical variables in the decision making process for amhs technology selection in semiconductor wafer size transitions exploratory study'), (2012, 'teaching multiple regression in elementary statistics'), (2014, 'what makes a top selling textbook comparing characteristics of ais textbooks'), (2016, 'an empirical investigation on ceo turnover in it firms and firm performance')) +keywords : frozenset({'mathematics education', 'data mining', 'computer network', 'multimedia', 'simulation', 'database', 'manufacturing engineering', 'internet privacy', 'environmental engineering', 'information retrieval', 'business administration', 'civil engineering'}) + +firstname : francis +lastname : mediavilla +middlename : a mendez +year : 2005 +year_papertitle : ((2005, 'using discrete multivariate mcmc bayesian methods for change detection and disclosure control'),) +keywords : frozenset({'computer science', 'statistics', 'management'}) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : walsh +middlename : igoe +year : 1994 +year_papertitle : ((1994, 'international constraints and domestic choices economic convergence and exchange rate policy in france and italy'), (1994, 'politics and exchange rates britain france italy and the negotiation of the european monetary system'), (1999, 'political bases of macroeconomic adjustment evidence from the italian experience'), (2000, 'european monetary integration and domestic politics'), (2000, 'european monetary integration domestic politics britain france and italy'), (2000, 'when do ideas matter explaining the successes and failures of thatcherite ideas'), (2001, 'national preferences and international institutions evidence from european monetary integration')) +keywords : frozenset({'macroeconomics', 'economy', 'public administration', 'international economics', 'economic system', 'economic policy', 'political economy'}) + +firstname : james +lastname : walsh +middlename : igoe +year : 1996 +year_papertitle : ((1996, 'global finance domestic politics exchange rate policymaking in britain france and italy'),) +keywords : frozenset({'finance', 'international law', 'international relations', 'political science'}) + +8/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : karen +lastname : bogart +middlename : a smith +year : 2013 +year_papertitle : ((2013, 'convergence of corporate governance and corporate social responsibility'), (2016, 'expanding the board s contribution to corporate social responsibility')) +keywords : frozenset({'public relations', 'accounting'}) + +firstname : karen +lastname : bogart +middlename : a smith +year : 2013 +year_papertitle : ((2013, 'understanding the influence of the board of directors on corporate social responsibility in us public companies that are recognized as csr leaders'),) +keywords : frozenset({'sustainability', 'organization theory', 'management'}) + +9/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : maria +lastname : comello +middlename : leonora g +year : 2002 +year_papertitle : ((2002, 'parent child communication perceived sanctions against drug use and youth drug involvement'), (2003, 'the community readiness model a complementary approach to social marketing'), (2005, 'using community readiness key informant assessments in a randomized group prevention trial impact of a participatory community media intervention'), (2006, 'development of an aspirational campaign to prevent youth substance use be under your own influence'), (2006, 'tobacco counteradvertisements aimed at bicultural mexican american youth the impact of language and theme'), (2009, 'flavoured cigarettes sensation seeking and adolescents perceptions of cigarette brands')) +keywords : frozenset({'psychiatry', 'knowledge management', 'social psychology', 'advertising', 'public relations', 'developmental psychology', 'medical education'}) + +firstname : maria +lastname : comello +middlename : leonora g +year : 2010 +year_papertitle : ((2010, 'activated self concept as a mechanism underlying persuasive message effects'),) +keywords : frozenset({'mass communications', 'health education', 'marketing'}) + +10/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : annemarie +lastname : zissu +middlename : None +year : 1989 +year_papertitle : ((1989, 'a causality effect between bid premium and outcome of tender offer'), (1989, 'the information content of post tender offer movement in the price of target shares'), (1990, 'choosing a discount point contract rate combination'), (1991, 'predicting the outcome of tender offers an endogeneity problem')) +keywords : frozenset({'monetary economics', 'microeconomics', 'financial economics', 'actuarial science', 'econometrics'}) + +firstname : annemarie +lastname : zissu +middlename : None +year : 1988 +year_papertitle : ((1988, 'the impact of management resistance on the outcome of hostile tender offers'),) +keywords : frozenset({'finance'}) + +10/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : usha +lastname : haley +middlename : c v +year : 1989 +year_papertitle : ((1989, 'cognitive trails in strategic decision making linking theories of personalities and cognitions'), (1991, 'corporate contributions as managerial masques reframing corporate contributions as strategies to influence society'), (1996, 'singapore incorporated reinterpreting singapore s business environments through a corporate metaphor')) +keywords : frozenset({'economy', 'social psychology', 'public relations', 'accounting', 'cognitive psychology'}) + +firstname : usha +lastname : haley +middlename : c v +year : 1990 +year_papertitle : ((1990, 'from catalysts to chameleons multinational firms as participants in political environments'),) +keywords : frozenset({'south african studies', 'international law', 'commerce business', 'management'}) + +11/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 51.02889768282572 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_chemistry_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_chemistry_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..818429d --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_chemistry_christoph_degree0_advisors_9015.log @@ -0,0 +1,806 @@ +Namespace(testing=False, verbose=1, field=['chemistry'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [185592680] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008944988250732422 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 548.935148537159 minutes + +Starting active labeling... +firstname : johannes +lastname : smid +middlename : None +year_range : (1991,) +main_us_institutions_year : ((1991, 'state university of new york system'),) +all_us_institutions_year : ((1991, 'state university of new york system'),) + +firstname : johannes +lastname : smid +middlename : None +year_range : (1965, 2007) +main_us_institutions_year : ((1972, 'state university of new york system'), (1976, 'state university of new york college of environmental science and forestry'), (1977, 'state university of new york college of environmental science and forestry'), (1978, 'state university of new york college of environmental science and forestry'), (1979, 'state university of new york college of environmental science and forestry'), (1980, 'state university of new york college of environmental science and forestry'), (1981, 'state university of new york system'), (1982, 'state university of new york system'), (1983, 'state university of new york system'), (1984, 'state university of new york system'), (1985, 'state university of new york system'), (1986, 'state university of new york system'), (1987, 'state university of new york college of environmental science and forestry'), (1987, 'state university of new york system'), (1988, 'state university of new york college of environmental science and forestry'), (1988, 'state university of new york system'), (1989, 'state university of new york system'), (1990, 'state university of new york system'), (1991, 'state university of new york system'), (1992, 'state university of new york system'), (1993, 'state university of new york system'), (1994, 'state university of new york system'), (1995, 'state university of new york system'), (1996, 'state university of new york system'), (1997, 'state university of new york college of environmental science and forestry'), (1998, 'state university of new york system'), (2001, 'state university of new york system'), (2002, 'state university of new york system'), (2004, 'state university of new york college of environmental science and forestry'), (2006, 'state university of new york college of environmental science and forestry'), (2007, 'state university of new york college of environmental science and forestry')) +all_us_institutions_year : ((1972, 'state university of new york system'), (1976, 'state university of new york college of environmental science and forestry'), (1977, 'state university of new york college of environmental science and forestry'), (1978, 'state university of new york college of environmental science and forestry'), (1979, 'state university of new york college of environmental science and forestry'), (1980, 'state university of new york college of environmental science and forestry'), (1980, 'state university of new york system'), (1981, 'state university of new york system'), (1982, 'state university of new york system'), (1983, 'state university of new york system'), (1984, 'state university of new york system'), (1985, 'state university of new york college of environmental science and forestry'), (1985, 'state university of new york system'), (1986, 'state university of new york system'), (1987, 'state university of new york college of environmental science and forestry'), (1987, 'state university of new york system'), (1988, 'state university of new york college of environmental science and forestry'), (1988, 'state university of new york system'), (1989, 'state university of new york college of environmental science and forestry'), (1989, 'state university of new york system'), (1990, 'state university of new york system'), (1991, 'state university of new york system'), (1992, 'state university of new york system'), (1993, 'state university of new york system'), (1994, 'state university of new york system'), (1995, 'state university of new york system'), (1996, 'state university of new york system'), (1997, 'state university of new york college of environmental science and forestry'), (1998, 'state university of new york system'), (2001, 'state university of new york system'), (2002, 'state university of new york system'), (2004, 'state university of new york college of environmental science and forestry'), (2006, 'state university of new york college of environmental science and forestry'), (2007, 'state university of new york college of environmental science and forestry')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : ekkehard +lastname : sinn +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'western michigan university'),) +all_us_institutions_year : ((2012, 'western michigan university'),) + +firstname : ekkehard +lastname : sinn +middlename : None +year_range : (1970, 2021) +main_us_institutions_year : ((1972, 'university of virginia'), (1974, 'university of virginia'), (1975, 'university of virginia'), (1976, 'university of virginia'), (1977, 'university of virginia'), (1978, 'university of virginia'), (1979, 'university of virginia'), (1980, 'university of virginia'), (1981, 'university of virginia'), (1982, 'university of virginia'), (1983, 'university of virginia'), (1984, 'university of virginia'), (1985, 'university of virginia'), (1986, 'university of virginia'), (1987, 'university of virginia'), (1988, 'university of virginia'), (1989, 'university of virginia'), (1990, 'university of virginia'), (2001, 'missouri university of science and technology'), (2002, 'missouri university of science and technology'), (2003, 'missouri university of science and technology'), (2004, 'missouri university of science and technology'), (2005, 'missouri university of science and technology'), (2007, 'missouri university of science and technology'), (2008, 'harvard university'), (2008, 'missouri university of science and technology'), (2009, 'western michigan university'), (2010, 'western michigan university'), (2011, 'western michigan university'), (2012, 'western michigan university'), (2012, 'missouri university of science and technology'), (2013, 'western michigan university'), (2014, 'western michigan university'), (2015, 'western michigan university'), (2016, 'western michigan university'), (2017, 'western michigan university'), (2018, 'western michigan university'), (2020, 'western michigan university'), (2021, 'western michigan university')) +all_us_institutions_year : ((1972, 'university of virginia'), (1974, 'university of virginia'), (1975, 'university of virginia'), (1976, 'university of virginia'), (1977, 'university of virginia'), (1978, 'university of virginia'), (1979, 'university of virginia'), (1980, 'university of virginia'), (1981, 'university of virginia'), (1982, 'university of virginia'), (1983, 'university of virginia'), (1984, 'university of virginia'), (1985, 'university of virginia'), (1986, 'university of virginia'), (1987, 'university of virginia'), (1988, 'university of virginia'), (1989, 'university of virginia'), (1990, 'university of virginia'), (1995, 'university of virginia'), (1997, 'university of virginia'), (2001, 'missouri university of science and technology'), (2002, 'missouri university of science and technology'), (2002, 'university of missouri'), (2003, 'missouri university of science and technology'), (2004, 'missouri university of science and technology'), (2005, 'missouri university of science and technology'), (2007, 'missouri university of science and technology'), (2008, 'harvard university'), (2008, 'missouri university of science and technology'), (2009, 'western michigan university'), (2010, 'western michigan university'), (2011, 'missouri university of science and technology'), (2011, 'western michigan university'), (2012, 'missouri university of science and technology'), (2012, 'western michigan university'), (2013, 'western michigan university'), (2014, 'western michigan university'), (2015, 'western michigan university'), (2016, 'western michigan university'), (2017, 'western michigan university'), (2018, 'western michigan university'), (2020, 'western michigan university'), (2021, 'western michigan university')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : rao +middlename : anandha +year_range : (1994,) +main_us_institutions_year : ((1994, 'cornell university'),) +all_us_institutions_year : ((1994, 'cornell university'),) + +firstname : m +lastname : rao +middlename : a +year_range : (1974, 2017) +main_us_institutions_year : ((1975, 'cornell university'), (1976, 'cornell university'), (1977, 'cornell university'), (1978, 'cornell university'), (1981, 'cornell university'), (1982, 'cornell university'), (1983, 'cornell university'), (1984, 'cornell university'), (1985, 'cornell university'), (1986, 'cornell university'), (1987, 'cornell university'), (1988, 'cornell university'), (1989, 'cornell university'), (1990, 'cornell university'), (1991, 'cornell university'), (1992, 'cornell university'), (1993, 'cornell university'), (1994, 'cornell university'), (1995, 'cornell university'), (1996, 'cornell university'), (1997, 'cornell university'), (1998, 'cornell university'), (1999, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2016, 'cornell university')) +all_us_institutions_year : ((1975, 'cornell university'), (1976, 'cornell university'), (1977, 'cornell university'), (1978, 'cornell university'), (1981, 'cornell university'), (1982, 'cornell university'), (1983, 'cornell university'), (1984, 'cornell university'), (1985, 'cornell university'), (1986, 'cornell university'), (1987, 'cornell university'), (1988, 'cornell university'), (1989, 'cornell university'), (1990, 'cornell university'), (1991, 'cornell university'), (1992, 'cornell university'), (1993, 'cornell university'), (1994, 'cornell university'), (1995, 'cornell university'), (1996, 'cornell university'), (1997, 'cornell university'), (1998, 'cornell university'), (1999, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2016, 'cornell university')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : c +lastname : shaw +middlename : frank +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of wisconsin milwaukee'),) +all_us_institutions_year : ((1993, 'university of wisconsin milwaukee'),) + +firstname : c +lastname : shaw +middlename : frank +year_range : (1984, 2016) +main_us_institutions_year : ((1986, 'university of wisconsin milwaukee'), (1988, 'university of wisconsin milwaukee'), (1990, 'university of wisconsin milwaukee'), (1993, 'university of wisconsin milwaukee'), (1994, 'university of wisconsin milwaukee'), (1995, 'university of wisconsin milwaukee'), (1996, 'university of wisconsin milwaukee'), (1998, 'university of wisconsin milwaukee'), (1999, 'university of wisconsin milwaukee'), (2001, 'eastern kentucky university'), (2002, 'eastern kentucky university'), (2004, 'university of wisconsin milwaukee')) +all_us_institutions_year : ((1986, 'university of wisconsin milwaukee'), (1988, 'university of wisconsin milwaukee'), (1990, 'university of wisconsin milwaukee'), (1993, 'university of wisconsin milwaukee'), (1994, 'university of wisconsin milwaukee'), (1995, 'university of wisconsin milwaukee'), (1996, 'university of wisconsin milwaukee'), (1998, 'university of wisconsin milwaukee'), (1999, 'university of wisconsin milwaukee'), (2001, 'eastern kentucky university'), (2002, 'eastern kentucky university'), (2004, 'university of wisconsin milwaukee')) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : williams +middlename : r +year_range : (1995,) +main_us_institutions_year : ((1995, 'indiana university'),) +all_us_institutions_year : ((1995, 'indiana university'),) + +firstname : david +lastname : williamson +middlename : a +year_range : (1998, 2000) +main_us_institutions_year : ((1999, 'university of kansas'), (2000, 'university of kansas')) +all_us_institutions_year : ((1999, 'university of kansas'), (2000, 'university of kansas')) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : joseph +lastname : rosen +middlename : d +year_range : (1991,) +main_us_institutions_year : ((1991, 'rutgers university'),) +all_us_institutions_year : ((1991, 'rutgers university'),) + +firstname : joseph +lastname : rose +middlename : v +year_range : (2020, 2021) +main_us_institutions_year : ((2020, 'southern methodist university'), (2021, 'southern methodist university')) +all_us_institutions_year : ((2020, 'southern methodist university'), (2021, 'southern methodist university')) + +4/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : joseph +lastname : francisco +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'purdue university'),) +all_us_institutions_year : ((2013, 'purdue university'),) + +firstname : joseph +lastname : francis +middlename : None +year_range : (1990, 2021) +main_us_institutions_year : ((1992, 'united states department of veterans affairs'), (1993, 'veterans health administration'), (1993, 'united states department of veterans affairs'), (1994, 'veterans health administration'), (1994, 'united states department of veterans affairs'), (1995, 'united states department of veterans affairs'), (1999, 'united states department of veterans affairs'), (2001, 'veterans health administration'), (2001, 'united states department of veterans affairs'), (2002, 'university of iowa'), (2003, 'university of iowa'), (2004, 'university of iowa'), (2005, 'louisiana state university'), (2006, 'louisiana state university'), (2006, 'united states department of veterans affairs'), (2007, 'louisiana state university'), (2008, 'louisiana state university'), (2009, 'louisiana state university'), (2010, 'louisiana state university'), (2011, 'louisiana state university'), (2012, 'louisiana state university'), (2013, 'louisiana state university'), (2014, 'louisiana state university'), (2015, 'louisiana state university'), (2016, 'louisiana state university'), (2017, 'louisiana state university'), (2018, 'veterans health administration'), (2018, 'louisiana state university'), (2019, 'louisiana state university'), (2020, 'louisiana state university'), (2021, 'louisiana state university')) +all_us_institutions_year : ((1992, 'united states department of veterans affairs'), (1993, 'united states department of veterans affairs'), (1993, 'veterans health administration'), (1994, 'united states department of veterans affairs'), (1994, 'veterans health administration'), (1995, 'united states department of veterans affairs'), (1999, 'united states department of veterans affairs'), (2001, 'united states department of veterans affairs'), (2001, 'veterans health administration'), (2002, 'university of iowa'), (2003, 'roy j and lucille a carver college of medicine'), (2003, 'university of iowa'), (2004, 'university of iowa'), (2005, 'louisiana state university'), (2005, 'roy j and lucille a carver college of medicine'), (2005, 'university of iowa'), (2006, 'louisiana state university'), (2006, 'united states department of veterans affairs'), (2007, 'louisiana state university'), (2007, 'united states department of veterans affairs'), (2008, 'louisiana state university'), (2008, 'lsu health sciences center new orleans'), (2008, 'united states department of veterans affairs'), (2009, 'louisiana state university'), (2009, 'university of iowa'), (2010, 'louisiana state university'), (2010, 'united states department of veterans affairs'), (2010, 'veterans health administration'), (2011, 'louisiana state university'), (2012, 'louisiana state university'), (2013, 'louisiana state university'), (2013, 'pennington biomedical research center'), (2014, 'louisiana state university'), (2014, 'veterans health administration'), (2015, 'louisiana state university'), (2016, 'louisiana state university'), (2016, 'veterans health administration'), (2017, 'louisiana state university'), (2017, 'veterans health administration'), (2018, 'louisiana state university'), (2018, 'united states department of veterans affairs'), (2018, 'veterans health administration'), (2019, 'louisiana state university'), (2019, 'united states department of veterans affairs'), (2019, 'veterans health administration'), (2020, 'louisiana state university'), (2020, 'united states department of veterans affairs'), (2021, 'louisiana state university')) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : maria +lastname : tamargo +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'city university of new york'),) +all_us_institutions_year : ((2014, 'city university of new york'),) + +firstname : maria +lastname : camargo +middlename : emilia +year_range : (1984, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2009, 'university of california santa cruz'), (2015, 'university of california santa cruz')) + +4/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : elke +middlename : h von +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of wisconsin madison'),) +all_us_institutions_year : ((1997, 'university of wisconsin madison'),) + +firstname : j +lastname : elbe +middlename : h von +year_range : (1968, 2006) +main_us_institutions_year : ((1968, 'university of wisconsin madison'), (1969, 'university of wisconsin madison'), (1970, 'university of wisconsin madison'), (1972, 'university of wisconsin madison'), (1975, 'university of wisconsin madison'), (1977, 'university of wisconsin madison'), (1978, 'university of wisconsin madison'), (1979, 'university of wisconsin madison'), (1980, 'university of wisconsin madison'), (1981, 'university of wisconsin madison'), (1982, 'university of wisconsin madison'), (1983, 'university of wisconsin madison'), (1985, 'university of wisconsin madison'), (1987, 'university of wisconsin madison'), (1988, 'university of wisconsin madison'), (1991, 'university of wisconsin madison'), (2006, 'university of wisconsin madison')) +all_us_institutions_year : ((1968, 'university of wisconsin madison'), (1969, 'university of wisconsin madison'), (1970, 'university of wisconsin madison'), (1972, 'university of wisconsin madison'), (1975, 'university of wisconsin madison'), (1977, 'university of wisconsin madison'), (1978, 'university of wisconsin madison'), (1979, 'university of wisconsin madison'), (1980, 'university of wisconsin madison'), (1981, 'university of wisconsin madison'), (1982, 'university of wisconsin madison'), (1983, 'university of wisconsin madison'), (1985, 'university of wisconsin madison'), (1987, 'university of wisconsin madison'), (1988, 'university of wisconsin madison'), (1991, 'university of wisconsin madison'), (2006, 'university of wisconsin madison')) + +4/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : charles +lastname : wilkins +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of arkansas'),) +all_us_institutions_year : ((2010, 'university of arkansas'),) + +firstname : c +lastname : wilkinson +middlename : c +year_range : (2009, 2020) +main_us_institutions_year : ((2009, 'american museum of natural history'), (2014, 'american museum of natural history'), (2016, 'american museum of natural history'), (2017, 'american museum of natural history'), (2020, 'american museum of natural history')) +all_us_institutions_year : ((2009, 'american museum of natural history'), (2014, 'american museum of natural history'), (2016, 'american museum of natural history'), (2017, 'american museum of natural history'), (2020, 'american museum of natural history')) + +5/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yang +lastname : yang +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of california los angeles'),) +all_us_institutions_year : ((2002, 'university of california los angeles'),) + +firstname : yang +lastname : pan +middlename : None +year_range : (1992, 1995) +main_us_institutions_year : ((1992, 'university of california san diego'),) +all_us_institutions_year : ((1992, 'university of california san diego'),) + +5/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : takeshi +lastname : oka +middlename : None +year_range : (2001,) +main_us_institutions_year : ((2001, 'university of chicago'),) +all_us_institutions_year : ((2001, 'university of chicago'),) + +firstname : takeshi +lastname : ikeuchi +middlename : None +year_range : (1994, 2021) +main_us_institutions_year : ((2001, 'university of chicago'), (2003, 'university of chicago')) +all_us_institutions_year : ((2001, 'university of chicago'), (2002, 'university of chicago'), (2003, 'university of chicago')) + +5/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : geoffrey +lastname : blake +middlename : a +year_range : (2010,) +main_us_institutions_year : ((2010, 'california institute of technology'),) +all_us_institutions_year : ((2010, 'california institute of technology'),) + +firstname : e +lastname : blakely +middlename : a +year_range : (1984, 1992) +main_us_institutions_year : ((1984, 'university of california berkeley'), (1986, 'university of california berkeley'), (1989, 'university of california berkeley'), (1992, 'university of california berkeley')) +all_us_institutions_year : ((1984, 'university of california berkeley'), (1986, 'university of california berkeley'), (1989, 'university of california berkeley'), (1992, 'university of california berkeley')) + +5/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : barrufet +middlename : a +year_range : (1993,) +main_us_institutions_year : ((1993, 'texas a m university college station'),) +all_us_institutions_year : ((1993, 'texas a m university college station'),) + +firstname : martin +lastname : barr +middlename : None +year_range : (1953, 2006) +main_us_institutions_year : ((1953, 'university of the sciences'), (1954, 'university of the sciences'), (1955, 'university of the sciences'), (1956, 'university of the sciences'), (1957, 'university of the sciences'), (1958, 'university of the sciences'), (1960, 'university of the sciences'), (1963, 'university of the sciences'), (2006, 'university of the sciences')) +all_us_institutions_year : ((1953, 'university of the sciences'), (1954, 'university of the sciences'), (1955, 'university of the sciences'), (1956, 'university of the sciences'), (1957, 'university of the sciences'), (1958, 'university of the sciences'), (1960, 'university of the sciences'), (1963, 'university of the sciences'), (2006, 'university of the sciences')) + +5/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rui +lastname : liu +middlename : hai +year_range : (2007,) +main_us_institutions_year : ((2007, 'cornell university'),) +all_us_institutions_year : ((2007, 'cornell university'),) + +firstname : rui +lastname : li +middlename : None +year_range : (2004, 2021) +main_us_institutions_year : ((2004, 'chinese academy of sciences'), (2005, 'chinese academy of sciences'), (2006, 'chinese academy of sciences'), (2013, 'chinese academy of sciences')) +all_us_institutions_year : ((2004, 'chinese academy of sciences'), (2005, 'chinese academy of sciences'), (2006, 'chinese academy of sciences'), (2013, 'chinese academy of sciences'), (2019, 'chinese academy of sciences')) + +5/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : luis +lastname : echegoyen +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'clemson university'),) +all_us_institutions_year : ((2008, 'clemson university'),) + +firstname : luis +lastname : echegoyen +middlename : None +year_range : (2007, 2009) +main_us_institutions_year : ((2007, 'luna innovations'), (2008, 'luna innovations'), (2009, 'lawrence berkeley national laboratory')) +all_us_institutions_year : ((2007, 'luna innovations'), (2008, 'luna innovations'), (2009, 'lawrence berkeley national laboratory')) + +5/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : toney +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'stanford university'),) +all_us_institutions_year : ((2011, 'stanford university'),) + +firstname : michael +lastname : toney +middlename : d +year_range : (1987, 2019) +main_us_institutions_year : ((1989, 'university of california berkeley'), (1990, 'university of california berkeley'), (1991, 'university of california berkeley'), (1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1995, 'university of alaska fairbanks'), (1995, 'albert einstein college of medicine'), (1998, 'albert einstein college of medicine'), (1999, 'yeshiva university'), (2001, 'university of california davis'), (2003, 'university of wisconsin madison'), (2004, 'university of california davis'), (2005, 'university of california davis'), (2006, 'university of california davis'), (2007, 'university of california davis'), (2010, 'university of california davis'), (2011, 'university of california davis'), (2013, 'university of california davis'), (2014, 'university of california davis'), (2015, 'university of california davis'), (2016, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis')) +all_us_institutions_year : ((1989, 'university of california berkeley'), (1990, 'university of california berkeley'), (1991, 'university of california berkeley'), (1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1994, 'university of california berkeley'), (1995, 'albert einstein college of medicine'), (1995, 'university of alaska fairbanks'), (1998, 'albert einstein college of medicine'), (1998, 'yeshiva university'), (1999, 'university of california davis'), (1999, 'yeshiva university'), (2001, 'university of california davis'), (2003, 'university of wisconsin madison'), (2004, 'university of california davis'), (2005, 'university of california davis'), (2006, 'central connecticut state university'), (2006, 'university of california davis'), (2007, 'university of california davis'), (2008, 'university of california davis'), (2010, 'university of california davis'), (2011, 'university of california davis'), (2013, 'university of california davis'), (2014, 'university of california davis'), (2015, 'university of california davis'), (2016, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : hoffmann +middlename : r +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of north dakota'),) +all_us_institutions_year : ((1996, 'university of north dakota'),) + +firstname : mark +lastname : hoffmann +middlename : r +year_range : (1983, 2016) +main_us_institutions_year : ((1983, 'university of california berkeley'), (2016, 'university of california berkeley')) +all_us_institutions_year : ((1983, 'university of california berkeley'), (1986, 'university of california berkeley'), (2016, 'university of california berkeley')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mary +lastname : dubois +middlename : rakowski +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of colorado at boulder'),) +all_us_institutions_year : ((2002, 'university of colorado at boulder'),) + +firstname : mary +lastname : dubois +middlename : rakowski +year_range : (1986, 2001) +main_us_institutions_year : ((2001, 'national renewable energy laboratory'),) +all_us_institutions_year : ((2001, 'national renewable energy laboratory'),) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : g +lastname : ellison +middlename : barney +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of colorado at boulder'),) +all_us_institutions_year : ((1999, 'university of colorado at boulder'),) + +firstname : g +lastname : ellison +middlename : barney +year_range : (1993, 2010) +main_us_institutions_year : ((2008, 'national institute of standards and technology'), (2010, 'national institute of standards and technology')) +all_us_institutions_year : ((2008, 'national institute of standards and technology'), (2010, 'national institute of standards and technology')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : g +lastname : ellison +middlename : barney +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of colorado at boulder'),) +all_us_institutions_year : ((1997, 'university of colorado at boulder'),) + +firstname : g +lastname : ellison +middlename : barney +year_range : (1993, 2010) +main_us_institutions_year : ((2008, 'national institute of standards and technology'), (2010, 'national institute of standards and technology')) +all_us_institutions_year : ((2008, 'national institute of standards and technology'), (2010, 'national institute of standards and technology')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeffery +lastname : coffer +middlename : l +year_range : (1993,) +main_us_institutions_year : ((1993, 'texas christian university'),) +all_us_institutions_year : ((1993, 'texas christian university'),) + +firstname : jeffrey +lastname : coffer +middlename : l +year_range : (1994, 1997) +main_us_institutions_year : ((1994, 'texas christian university'), (1997, 'texas christian university')) +all_us_institutions_year : ((1994, 'texas christian university'), (1997, 'texas christian university'), (2002, 'texas christian university')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : guy +middlename : kip +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of california san francisco'),) +all_us_institutions_year : ((2003, 'university of california san francisco'),) + +firstname : r +lastname : guy +middlename : kip +year_range : (2007, 2016) +main_us_institutions_year : ((2007, 'st jude children s research hospital'), (2009, 'st jude children s research hospital'), (2011, 'st jude children s research hospital'), (2013, 'st jude children s research hospital'), (2016, 'st jude children s research hospital')) +all_us_institutions_year : ((2007, 'st jude children s research hospital'), (2009, 'st jude children s research hospital'), (2011, 'st jude children s research hospital'), (2013, 'st jude children s research hospital'), (2016, 'st jude children s research hospital')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : h +lastname : manning +middlename : charles +year_range : (2013,) +main_us_institutions_year : ((2013, 'vanderbilt university'),) +all_us_institutions_year : ((2013, 'vanderbilt university'),) + +firstname : suzanne +lastname : manning +middlename : None +year_range : (1988, 2018) +main_us_institutions_year : ((1988, 'vanderbilt university'), (1989, 'vanderbilt university'), (1991, 'vanderbilt university'), (1997, 'vanderbilt university medical center'), (1998, 'veterans health administration'), (1998, 'vanderbilt university medical center'), (2000, 'vanderbilt university medical center'), (2001, 'vanderbilt university medical center'), (2001, 'vanderbilt university'), (2005, 'vanderbilt university'), (2010, 'vanderbilt university'), (2011, 'vanderbilt university medical center'), (2015, 'vanderbilt university'), (2018, 'vanderbilt university')) +all_us_institutions_year : ((1988, 'vanderbilt university'), (1989, 'vanderbilt university'), (1991, 'vanderbilt university'), (1997, 'vanderbilt university medical center'), (1998, 'vanderbilt university medical center'), (1998, 'veterans health administration'), (2000, 'vanderbilt university medical center'), (2001, 'vanderbilt university'), (2001, 'vanderbilt university medical center'), (2005, 'vanderbilt university'), (2010, 'vanderbilt university'), (2011, 'vanderbilt university medical center'), (2015, 'vanderbilt university'), (2018, 'vanderbilt university')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : yates +middlename : t +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of pittsburgh'),) +all_us_institutions_year : ((2004, 'university of pittsburgh'),) + +firstname : john +lastname : yates +middlename : t +year_range : (1961, 2017) +main_us_institutions_year : ((2008, 'university of virginia'), (2009, 'university of virginia'), (2010, 'university of virginia'), (2011, 'university of virginia'), (2012, 'university of virginia'), (2013, 'university of virginia'), (2014, 'university of virginia'), (2015, 'university of virginia'), (2016, 'university of virginia')) +all_us_institutions_year : ((2008, 'university of pittsburgh'), (2008, 'university of virginia'), (2009, 'university of virginia'), (2010, 'university of virginia'), (2011, 'university of virginia'), (2012, 'university of virginia'), (2013, 'university of virginia'), (2014, 'university of virginia'), (2015, 'university of virginia'), (2016, 'university of virginia')) + +6/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : zaworotko +middlename : j +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of south florida'),) +all_us_institutions_year : ((2014, 'university of south florida'),) + +firstname : michael +lastname : zaworotko +middlename : j +year_range : (1995, 1996) +main_us_institutions_year : ((1995, 'university of saint mary'),) +all_us_institutions_year : ((1995, 'university of saint mary'),) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : fredrick +lastname : milstein +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of california santa barbara'),) +all_us_institutions_year : ((2012, 'university of california santa barbara'),) + +firstname : frederick +lastname : milstein +middlename : None +year_range : (1964, 2009) +main_us_institutions_year : ((1970, 'university of california santa barbara'), (1971, 'university of california santa barbara'), (1973, 'university of california santa barbara'), (1974, 'university of california santa barbara'), (1977, 'university of california santa barbara'), (1978, 'university of california santa barbara'), (1979, 'university of california santa barbara'), (1980, 'university of california santa barbara'), (1981, 'university of california santa barbara'), (1983, 'university of california santa barbara'), (1985, 'university of california santa barbara'), (1986, 'university of california santa barbara'), (1987, 'university of california santa barbara'), (1988, 'university of california santa barbara'), (1989, 'university of california santa barbara'), (1992, 'university of california santa barbara'), (1994, 'university of california santa barbara'), (1995, 'university of california santa barbara'), (1996, 'university of california santa barbara'), (1998, 'university of california santa barbara'), (2000, 'university of california santa barbara'), (2004, 'university of california santa barbara'), (2009, 'university of california santa barbara')) +all_us_institutions_year : ((1970, 'university of california santa barbara'), (1971, 'university of california santa barbara'), (1973, 'university of california santa barbara'), (1974, 'university of california santa barbara'), (1977, 'university of california santa barbara'), (1978, 'university of california santa barbara'), (1979, 'university of california santa barbara'), (1980, 'university of california santa barbara'), (1981, 'university of california santa barbara'), (1983, 'university of california santa barbara'), (1985, 'university of california santa barbara'), (1986, 'university of california santa barbara'), (1987, 'university of california santa barbara'), (1988, 'university of california santa barbara'), (1989, 'university of california santa barbara'), (1992, 'university of california santa barbara'), (1994, 'university of california santa barbara'), (1995, 'university of california santa barbara'), (1996, 'university of california santa barbara'), (1998, 'university of california santa barbara'), (2000, 'university of california santa barbara'), (2004, 'university of california santa barbara'), (2009, 'university of california santa barbara')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stuart +lastname : schreiber +middlename : l +year_range : (1998,) +main_us_institutions_year : ((1998, 'harvard university'),) +all_us_institutions_year : ((1998, 'harvard university'),) + +firstname : stuart +lastname : schreiber +middlename : l +year_range : (1993, 1998) +main_us_institutions_year : ((1993, 'stanford university'), (1998, 'stanford university')) +all_us_institutions_year : ((1991, 'stanford university'), (1993, 'stanford university'), (1997, 'stanford university'), (1998, 'stanford university')) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dennis +lastname : olson +middlename : g +year_range : (1996,) +main_us_institutions_year : ((1996, 'iowa state university'),) +all_us_institutions_year : ((1996, 'iowa state university'),) + +firstname : c +lastname : olson +middlename : g +year_range : (1973, 2008) +main_us_institutions_year : ((1973, 'iowa state university'), (1974, 'iowa state university'), (1975, 'iowa state university'), (1976, 'iowa state university'), (1977, 'iowa state university'), (1979, 'iowa state university'), (1980, 'iowa state university'), (1981, 'iowa state university'), (1983, 'iowa state university'), (1984, 'iowa state university'), (1985, 'iowa state university'), (1987, 'iowa state university'), (1989, 'iowa state university'), (1992, 'iowa state university'), (1993, 'iowa state university'), (1995, 'iowa state university'), (1996, 'iowa state university'), (1997, 'iowa state university'), (1998, 'iowa state university'), (1999, 'iowa state university'), (2000, 'iowa state university'), (2001, 'iowa state university'), (2002, 'iowa state university'), (2004, 'iowa state university'), (2006, 'iowa state university'), (2008, 'iowa state university')) +all_us_institutions_year : ((1973, 'iowa state university'), (1974, 'iowa state university'), (1975, 'iowa state university'), (1976, 'iowa state university'), (1977, 'iowa state university'), (1979, 'iowa state university'), (1980, 'iowa state university'), (1981, 'iowa state university'), (1983, 'iowa state university'), (1984, 'iowa state university'), (1985, 'iowa state university'), (1987, 'iowa state university'), (1989, 'iowa state university'), (1992, 'iowa state university'), (1993, 'iowa state university'), (1995, 'iowa state university'), (1996, 'iowa state university'), (1997, 'iowa state university'), (1998, 'iowa state university'), (1999, 'iowa state university'), (2000, 'iowa state university'), (2001, 'iowa state university'), (2002, 'iowa state university'), (2003, 'iowa state university'), (2004, 'iowa state university'), (2006, 'iowa state university'), (2008, 'iowa state university')) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : frederick +lastname : lewis +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'northwestern university'),) +all_us_institutions_year : ((2006, 'northwestern university'),) + +firstname : b +lastname : lewis +middlename : g +year_range : (1986, 2011) +main_us_institutions_year : ((1986, 'northwestern university'), (1993, 'northwestern university'), (1995, 'northwestern university'), (1999, 'northwestern university'), (2011, 'northwestern university')) +all_us_institutions_year : ((1986, 'northwestern university'), (1993, 'northwestern university'), (1995, 'northwestern university'), (1999, 'northwestern university'), (2011, 'northwestern university')) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : miller +middlename : j dwayne +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of rochester'),) +all_us_institutions_year : ((1993, 'university of rochester'),) + +firstname : robert +lastname : miller +middlename : c +year_range : (1971, 2021) +main_us_institutions_year : ((1971, 'mayo clinic'), (1991, 'university of kentucky'), (1992, 'university of kentucky'), (1992, 'university of rochester'), (1997, 'mayo clinic'), (1998, 'mayo clinic'), (1999, 'mayo clinic'), (2002, 'north central cancer treatment group'), (2004, 'mayo clinic'), (2005, 'mayo clinic'), (2006, 'mayo clinic'), (2007, 'mayo clinic'), (2008, 'mayo clinic'), (2009, 'mayo clinic'), (2010, 'mayo clinic'), (2011, 'mayo clinic'), (2012, 'mayo clinic'), (2013, 'mayo clinic'), (2014, 'mayo clinic'), (2015, 'mayo clinic'), (2016, 'mayo clinic'), (2017, 'mayo clinic'), (2018, 'mayo clinic'), (2019, 'mayo clinic'), (2020, 'university of maryland baltimore'), (2021, 'mayo clinic')) +all_us_institutions_year : ((1971, 'mayo clinic'), (1991, 'university of kentucky'), (1992, 'university of kentucky'), (1992, 'university of rochester'), (1997, 'mayo clinic'), (1998, 'mayo clinic'), (1998, 'university of kentucky'), (1999, 'mayo clinic'), (2002, 'north central cancer treatment group'), (2004, 'mayo clinic'), (2005, 'mayo clinic'), (2005, 'university of rochester'), (2006, 'mayo clinic'), (2007, 'mayo clinic'), (2008, 'mayo clinic'), (2009, 'mayo clinic'), (2010, 'mayo clinic'), (2010, 'university of rochester'), (2011, 'mayo clinic'), (2011, 'university of kentucky'), (2011, 'university of rochester'), (2012, 'mayo clinic'), (2012, 'university of rochester'), (2013, 'mayo clinic'), (2014, 'mayo clinic'), (2014, 'university of rochester'), (2015, 'mayo clinic'), (2016, 'mayo clinic'), (2017, 'mayo clinic'), (2018, 'mayo clinic'), (2018, 'university of kentucky'), (2019, 'mayo clinic'), (2019, 'university of kentucky'), (2019, 'university of maryland baltimore'), (2020, 'university of kentucky'), (2020, 'university of maryland baltimore'), (2021, 'mayo clinic'), (2021, 'university of maryland baltimore')) + +8/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : c +lastname : moore +middlename : bradley +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of california berkeley'),) +all_us_institutions_year : ((1993, 'university of california berkeley'),) + +firstname : c +lastname : moore +middlename : bradley +year_range : (1978, 1994) +main_us_institutions_year : ((1978, 'lawrence berkeley national laboratory'), (1984, 'lawrence berkeley national laboratory'), (1994, 'lawrence berkeley national laboratory')) +all_us_institutions_year : ((1978, 'lawrence berkeley national laboratory'), (1984, 'lawrence berkeley national laboratory'), (1994, 'lawrence berkeley national laboratory')) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : arnold +lastname : rheingold +middlename : l +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of delaware'),) +all_us_institutions_year : ((1997, 'university of delaware'),) + +firstname : arnold +lastname : rheingold +middlename : l +year_range : (1997, 1999) +main_us_institutions_year : ((1997, 'wayne state university'), (1999, 'wayne state university')) +all_us_institutions_year : ((1997, 'wayne state university'), (1999, 'wayne state university')) + +9/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : poul +lastname : hansen +middlename : m t +year_range : (1996,) +main_us_institutions_year : ((1996, 'ohio state university'),) +all_us_institutions_year : ((1996, 'ohio state university'),) + +firstname : p +lastname : hansen +middlename : m t +year_range : (1970, 1989) +main_us_institutions_year : ((1970, 'ohio agricultural research and development center'), (1972, 'ohio agricultural research and development center'), (1974, 'ohio agricultural research and development center'), (1976, 'ohio agricultural research and development center'), (1978, 'ohio agricultural research and development center'), (1982, 'ohio agricultural research and development center'), (1989, 'ohio agricultural research and development center')) +all_us_institutions_year : ((1970, 'ohio agricultural research and development center'), (1972, 'ohio agricultural research and development center'), (1974, 'ohio agricultural research and development center'), (1976, 'ohio agricultural research and development center'), (1978, 'ohio agricultural research and development center'), (1982, 'ohio agricultural research and development center'), (1989, 'ohio agricultural research and development center')) + +9/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kenneth +lastname : raymond +middlename : n +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of california berkeley'),) +all_us_institutions_year : ((1996, 'university of california berkeley'),) + +firstname : kenneth +lastname : raymond +middlename : n +year_range : (1982, 2001) +main_us_institutions_year : ((2001, 'environmental molecular sciences laboratory'),) +all_us_institutions_year : ((2001, 'environmental molecular sciences laboratory'),) + +9/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : mccusker +middlename : k +year_range : (2007,) +main_us_institutions_year : ((2007, 'michigan state university'),) +all_us_institutions_year : ((2007, 'michigan state university'),) + +firstname : james +lastname : mccusker +middlename : k +year_range : (1989, 1991) +main_us_institutions_year : ((1989, 'university of illinois at urbana champaign'), (1991, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1989, 'university of illinois at urbana champaign'), (1991, 'university of illinois at urbana champaign')) + +9/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : b +lastname : bergdahl +middlename : mikael +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of california san diego and san diego state university'),) +all_us_institutions_year : ((2005, 'university of california san diego and san diego state university'),) + +firstname : b +lastname : bergdahl +middlename : mikael +year_range : (2004, 2020) +main_us_institutions_year : ((2013, 'san diego state university'), (2016, 'san diego state university'), (2020, 'san diego state university')) +all_us_institutions_year : ((2013, 'san diego state university'), (2016, 'san diego state university'), (2020, 'san diego state university')) + +9/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : cahill +middlename : g +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2015, 'university of illinois at urbana champaign'),) + +firstname : david +lastname : cahill +middlename : None +year_range : (1988, 1990) +main_us_institutions_year : ((1990, 'university of california san francisco'),) +all_us_institutions_year : ((1990, 'university of california san francisco'),) + +10/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : bitterwolf +middlename : e +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of idaho'),) +all_us_institutions_year : ((2010, 'university of idaho'),) + +firstname : thomas +lastname : bitterwolf +middlename : e +year_range : (1977, 1991) +main_us_institutions_year : ((1977, 'united states naval academy'), (1981, 'united states naval academy'), (1983, 'united states naval academy'), (1984, 'united states naval academy'), (1986, 'united states naval academy'), (1987, 'united states naval academy'), (1988, 'united states naval academy'), (1989, 'united states naval academy'), (1990, 'united states naval academy')) +all_us_institutions_year : ((1977, 'united states naval academy'), (1981, 'united states naval academy'), (1983, 'united states naval academy'), (1984, 'united states naval academy'), (1985, 'united states naval academy'), (1986, 'united states naval academy'), (1987, 'united states naval academy'), (1988, 'united states naval academy'), (1989, 'united states naval academy'), (1990, 'united states naval academy')) + +10/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 765.9695044438045 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_computer science_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_computer science_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..aa35e58 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_computer science_christoph_degree0_advisors_9015.log @@ -0,0 +1,755 @@ +Namespace(testing=False, verbose=1, field=['computer science'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [41008148] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0006288409233093262 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 330.5035513957342 minutes + +Starting active labeling... +firstname : bongki +lastname : moon +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of arizona'),) +all_us_institutions_year : ((2007, 'university of arizona'),) + +firstname : bongki +lastname : moon +middlename : None +year_range : (1994, 2018) +main_us_institutions_year : ((1994, 'university of maryland college park'), (1995, 'university of maryland college park'), (1996, 'university of maryland college park'), (1997, 'university of maryland college park'), (1998, 'university of maryland college park'), (1999, 'university of arizona'), (2000, 'university of arizona'), (2001, 'university of arizona'), (2002, 'university of arizona'), (2003, 'university of arizona'), (2004, 'university of arizona'), (2005, 'university of arizona'), (2006, 'university of arizona'), (2007, 'university of arizona'), (2008, 'university of arizona'), (2009, 'university of arizona'), (2010, 'university of arizona'), (2011, 'university of arizona'), (2012, 'university of arizona')) +all_us_institutions_year : ((1994, 'university of maryland college park'), (1995, 'university of maryland college park'), (1996, 'university of maryland college park'), (1997, 'university of maryland college park'), (1998, 'university of maryland college park'), (1999, 'university of arizona'), (2000, 'university of arizona'), (2001, 'university of arizona'), (2002, 'university of arizona'), (2003, 'university of arizona'), (2004, 'university of arizona'), (2005, 'university of arizona'), (2006, 'university of arizona'), (2007, 'university of arizona'), (2008, 'university of arizona'), (2009, 'university of arizona'), (2010, 'university of arizona'), (2011, 'university of arizona'), (2012, 'university of arizona')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : alin +lastname : dobra +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of florida'),) +all_us_institutions_year : ((2009, 'university of florida'),) + +firstname : alin +lastname : dobra +middlename : None +year_range : (2001, 2019) +main_us_institutions_year : ((2002, 'cornell university'), (2003, 'cornell university'), (2005, 'university of florida'), (2006, 'university of florida'), (2007, 'university of florida'), (2008, 'university of florida'), (2009, 'university of florida'), (2010, 'university of florida'), (2012, 'university of florida'), (2013, 'university of florida'), (2014, 'university of florida'), (2015, 'university of florida'), (2017, 'university of florida'), (2018, 'university of florida'), (2019, 'university of florida')) +all_us_institutions_year : ((2002, 'cornell university'), (2003, 'cornell university'), (2005, 'university of florida'), (2006, 'university of florida'), (2007, 'university of florida'), (2008, 'university of florida'), (2009, 'university of florida'), (2010, 'university of florida'), (2012, 'university of florida'), (2013, 'university of florida'), (2014, 'university of florida'), (2015, 'university of florida'), (2017, 'university of florida'), (2018, 'university of florida'), (2019, 'university of florida')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : alberto +lastname : sangiovannivincentelli +middlename : l +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of california berkeley'),) +all_us_institutions_year : ((1998, 'university of california berkeley'),) + +firstname : alberto +lastname : garcia +middlename : None +year_range : (1988, 2020) +main_us_institutions_year : ((1989, 'university of california berkeley'), (1990, 'university of california berkeley'), (1992, 'lawrence berkeley national laboratory'), (1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1995, 'parc')) +all_us_institutions_year : ((1989, 'university of california berkeley'), (1990, 'lawrence berkeley national laboratory'), (1990, 'university of california berkeley'), (1992, 'lawrence berkeley national laboratory'), (1992, 'university of california berkeley'), (1993, 'lawrence berkeley national laboratory'), (1993, 'university of california berkeley'), (1995, 'parc')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : sarin +middlename : c +year_range : (1996,) +main_us_institutions_year : ((1996, 'virginia tech'),) +all_us_institutions_year : ((1996, 'virginia tech'),) + +firstname : s +lastname : nagendra +middlename : None +year_range : (1991, 1994) +main_us_institutions_year : ((1991, 'virginia tech'), (1992, 'virginia tech'), (1994, 'virginia tech')) +all_us_institutions_year : ((1991, 'virginia tech'), (1992, 'virginia tech'), (1993, 'virginia tech'), (1994, 'virginia tech'), (1995, 'virginia tech')) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jose +lastname : martinez +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'cornell university'),) +all_us_institutions_year : ((2014, 'cornell university'),) + +firstname : jose +lastname : martinezlorenzo +middlename : angel +year_range : (2006, 2021) +main_us_institutions_year : ((2007, 'northeastern university'), (2008, 'northeastern university'), (2009, 'northeastern university'), (2010, 'northeastern university'), (2011, 'northeastern university'), (2012, 'northeastern university'), (2013, 'northeastern university'), (2014, 'northeastern university'), (2015, 'northeastern university'), (2016, 'northeastern university'), (2017, 'northeastern university'), (2018, 'northeastern university'), (2019, 'northeastern university'), (2020, 'northeastern university'), (2021, 'northeastern university')) +all_us_institutions_year : ((2007, 'northeastern university'), (2008, 'northeastern university'), (2009, 'northeastern university'), (2010, 'northeastern university'), (2011, 'northeastern university'), (2012, 'northeastern university'), (2013, 'northeastern university'), (2014, 'northeastern university'), (2015, 'northeastern university'), (2016, 'northeastern university'), (2017, 'northeastern university'), (2018, 'northeastern university'), (2019, 'northeastern university'), (2020, 'northeastern university'), (2021, 'northeastern university')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hector +lastname : garciamolina +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'stanford university'),) +all_us_institutions_year : ((1999, 'stanford university'),) + +firstname : hector +lastname : garcia +middlename : garcia +year_range : (2006, 2021) +main_us_institutions_year : ((2016, 'medstar washington hospital center'), (2019, 'medstar washington hospital center'), (2020, 'medstar washington hospital center')) +all_us_institutions_year : ((2016, 'medstar washington hospital center'), (2019, 'medstar washington hospital center'), (2020, 'medstar washington hospital center')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : juan +lastname : alonso +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'stanford university'),) +all_us_institutions_year : ((2011, 'stanford university'),) + +firstname : juan +lastname : alonsolarraga +middlename : octavio +year_range : (2008, 2021) +main_us_institutions_year : ((2017, 'yahoo'),) +all_us_institutions_year : ((2017, 'yahoo'),) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : z +lastname : ozsoyoglu +middlename : meral +year_range : (1994,) +main_us_institutions_year : ((1994, 'case western reserve university'),) +all_us_institutions_year : ((1994, 'case western reserve university'),) + +firstname : z +lastname : ozsoyoǧlu +middlename : meral +year_range : (1998, 2011) +main_us_institutions_year : ((1998, 'case western reserve university'), (2004, 'case western reserve university'), (2011, 'case western reserve university')) +all_us_institutions_year : ((1998, 'case western reserve university'), (2004, 'case western reserve university'), (2011, 'case western reserve university')) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : harrison +middlename : michael +year_range : (1997,) +main_us_institutions_year : ((1997, 'stanford university'),) +all_us_institutions_year : ((1997, 'stanford university'),) + +firstname : j +lastname : harris +middlename : r +year_range : (1991, 2021) +main_us_institutions_year : ((2002, 'university of maryland college park'), (2003, 'university of maryland college park'), (2004, 'university of maryland college park'), (2005, 'university of maryland college park'), (2006, 'lawrence livermore national laboratory'), (2007, 'lawrence livermore national laboratory'), (2008, 'lawrence livermore national laboratory'), (2009, 'lawrence livermore national laboratory'), (2011, 'naval postgraduate school'), (2014, 'air force research laboratory'), (2015, 'united states department of the navy'), (2016, 'air force research laboratory'), (2017, 'air force research laboratory'), (2018, 'air force research laboratory'), (2019, 'air force research laboratory'), (2020, 'air force research laboratory'), (2021, 'air force research laboratory')) +all_us_institutions_year : ((2001, 'university of maryland college park'), (2002, 'united states department of the navy'), (2002, 'university of maryland college park'), (2003, 'university of maryland college park'), (2004, 'university of maryland college park'), (2005, 'university of maryland college park'), (2006, 'lawrence livermore national laboratory'), (2007, 'lawrence livermore national laboratory'), (2008, 'lawrence livermore national laboratory'), (2009, 'lawrence livermore national laboratory'), (2011, 'naval postgraduate school'), (2012, 'lawrence livermore national laboratory'), (2014, 'air force research laboratory'), (2014, 'lawrence livermore national laboratory'), (2015, 'united states department of the navy'), (2016, 'air force research laboratory'), (2016, 'united states department of the navy'), (2017, 'air force research laboratory'), (2017, 'lawrence livermore national laboratory'), (2017, 'united states department of the navy'), (2018, 'air force research laboratory'), (2019, 'air force research laboratory'), (2020, 'air force research laboratory'), (2021, 'air force research laboratory')) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : doug +lastname : jacobson +middlename : w +year_range : (1999,) +main_us_institutions_year : ((1999, 'iowa state university'),) +all_us_institutions_year : ((1999, 'iowa state university'),) + +firstname : doug +lastname : jacobsen +middlename : w +year_range : (2011, 2014) +main_us_institutions_year : ((2011, 'florida state university'), (2013, 'los alamos national laboratory'), (2014, 'los alamos national laboratory')) +all_us_institutions_year : ((2011, 'florida state university'), (2013, 'los alamos national laboratory'), (2014, 'los alamos national laboratory')) + +2/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dennis +lastname : lin +middlename : k j +year_range : (2008,) +main_us_institutions_year : ((2008, 'pennsylvania state university'),) +all_us_institutions_year : ((2008, 'pennsylvania state university'),) + +firstname : j +lastname : li +middlename : k j +year_range : (1979, 2020) +main_us_institutions_year : ((1979, 'university of pennsylvania'), (1980, 'university of pennsylvania'), (1983, 'rutgers university'), (1984, 'rutgers university'), (1985, 'rutgers university'), (1986, 'rutgers university'), (1988, 'rutgers university'), (1989, 'rutgers university'), (1990, 'rutgers university'), (1991, 'rutgers university'), (1992, 'rutgers university'), (1993, 'rutgers university'), (1994, 'rutgers university'), (1995, 'rutgers university'), (1996, 'rutgers university'), (1997, 'rutgers university'), (1998, 'rutgers university'), (1999, 'rutgers university'), (2000, 'rutgers university'), (2001, 'rutgers university'), (2002, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2012, 'rutgers university'), (2013, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2017, 'rutgers university'), (2018, 'rutgers university'), (2019, 'rutgers university'), (2020, 'rutgers university')) +all_us_institutions_year : ((1977, 'university of pennsylvania'), (1979, 'university of pennsylvania'), (1980, 'university of pennsylvania'), (1983, 'rutgers university'), (1984, 'rutgers university'), (1985, 'rutgers university'), (1986, 'rutgers university'), (1988, 'rutgers university'), (1989, 'rutgers university'), (1990, 'rutgers university'), (1991, 'rutgers university'), (1992, 'rutgers university'), (1993, 'rutgers university'), (1994, 'rutgers university'), (1995, 'rutgers university'), (1996, 'rutgers university'), (1997, 'rutgers university'), (1998, 'rutgers university'), (1999, 'rutgers university'), (2000, 'rutgers university'), (2001, 'rutgers university'), (2002, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2004, 'university of pennsylvania'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2012, 'rutgers university'), (2013, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2016, 'university of pennsylvania'), (2017, 'rutgers university'), (2018, 'rutgers university'), (2018, 'university of pennsylvania'), (2019, 'rutgers university'), (2020, 'rutgers university')) + +2/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rajan +lastname : batta +middlename : None +year_range : (1991,) +main_us_institutions_year : ((1991, 'university at buffalo'),) +all_us_institutions_year : ((1991, 'university at buffalo'),) + +firstname : francine +lastname : battaglia +middlename : None +year_range : (1993, 2021) +main_us_institutions_year : ((1997, 'pennsylvania state university'), (1998, 'national institute of standards and technology'), (1998, 'pennsylvania state university'), (1999, 'iowa state university'), (2004, 'iowa state university'), (2006, 'iowa state university'), (2007, 'iowa state university'), (2008, 'iowa state university'), (2009, 'virginia tech'), (2011, 'virginia tech'), (2012, 'virginia tech'), (2013, 'virginia tech'), (2014, 'virginia tech'), (2015, 'virginia tech'), (2016, 'virginia tech'), (2017, 'virginia tech'), (2018, 'university at buffalo'), (2019, 'university at buffalo'), (2020, 'university at buffalo'), (2021, 'state university of new york system')) +all_us_institutions_year : ((1996, 'pennsylvania state university'), (1997, 'pennsylvania state university'), (1998, 'national institute of standards and technology'), (1998, 'pennsylvania state university'), (1999, 'iowa state university'), (2003, 'iowa state university'), (2004, 'iowa state university'), (2005, 'iowa state university'), (2006, 'iowa state university'), (2006, 'virginia tech'), (2007, 'iowa state university'), (2007, 'virginia tech'), (2008, 'iowa state university'), (2008, 'virginia tech'), (2009, 'virginia tech'), (2010, 'virginia tech'), (2011, 'virginia tech'), (2012, 'virginia tech'), (2013, 'virginia tech'), (2014, 'virginia tech'), (2015, 'virginia tech'), (2016, 'virginia tech'), (2017, 'university at buffalo'), (2017, 'virginia tech'), (2018, 'university at buffalo'), (2018, 'virginia tech'), (2019, 'state university of new york system'), (2019, 'university at buffalo'), (2020, 'state university of new york system'), (2020, 'university at buffalo'), (2021, 'state university of new york system')) + +2/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : daniela +lastname : rus +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2007, 'massachusetts institute of technology'),) + +firstname : daniel +lastname : rush +middlename : None +year_range : (2011, 2020) +main_us_institutions_year : ((2011, 'university of michigan'), (2012, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of northern colorado'), (2017, 'university of northern colorado'), (2018, 'boise state university'), (2019, 'boise state university'), (2020, 'boise state university')) +all_us_institutions_year : ((2011, 'university of michigan'), (2012, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of northern colorado'), (2017, 'boise state university'), (2017, 'university of northern colorado'), (2018, 'boise state university'), (2019, 'boise state university'), (2020, 'boise state university')) + +2/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lie +lastname : tang +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'iowa state university'),) +all_us_institutions_year : ((2013, 'iowa state university'),) + +firstname : li +lastname : tang +middlename : None +year_range : (2011, 2014) +main_us_institutions_year : ((2014, 'florida international university'),) +all_us_institutions_year : ((2012, 'florida international university'), (2014, 'florida international university')) + +2/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jun +lastname : xu +middlename : jim +year_range : (2010,) +main_us_institutions_year : ((2010, 'georgia institute of technology'),) +all_us_institutions_year : ((2010, 'georgia institute of technology'),) + +firstname : junming +lastname : xu +middlename : None +year_range : (1980, 2019) +main_us_institutions_year : None +all_us_institutions_year : ((2012, 'chinese academy of sciences'),) + +2/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : xi +lastname : zhang +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'texas a m university college station'),) +all_us_institutions_year : ((2010, 'texas a m university college station'),) + +firstname : xi +lastname : zhang +middlename : None +year_range : (2015, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2018, 'electric power research institute'),) + +2/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : joshua +lastname : stuart +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of california santa cruz'),) +all_us_institutions_year : ((2009, 'university of california santa cruz'),) + +firstname : josh +lastname : stuart +middlename : None +year_range : (2003, 2021) +main_us_institutions_year : ((2005, 'university of california santa cruz'), (2013, 'university of california santa cruz'), (2014, 'university of california santa cruz'), (2015, 'university of california santa cruz'), (2016, 'university of california santa cruz'), (2017, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2019, 'university of california santa cruz'), (2020, 'university of california santa cruz'), (2021, 'university of california santa cruz')) +all_us_institutions_year : ((2005, 'university of california santa cruz'), (2013, 'university of california santa cruz'), (2014, 'university of california santa cruz'), (2015, 'university of california santa cruz'), (2016, 'university of california santa cruz'), (2017, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2019, 'university of california santa cruz'), (2020, 'university of california santa cruz'), (2021, 'university of california santa cruz')) + +2/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : karsten +lastname : schwan +middlename : None +year_range : (1992,) +main_us_institutions_year : ((1992, 'georgia institute of technology'),) +all_us_institutions_year : ((1992, 'georgia institute of technology'),) + +firstname : karsten +lastname : schwan +middlename : None +year_range : (2002, 2014) +main_us_institutions_year : ((2002, 'georgia institute of technology college of computing'), (2014, 'georgia institute of technology college of computing')) +all_us_institutions_year : ((2002, 'georgia institute of technology college of computing'), (2014, 'georgia institute of technology college of computing')) + +3/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ling +lastname : wang +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'nova southeastern university'),) +all_us_institutions_year : ((2013, 'nova southeastern university'),) + +firstname : ling +lastname : wang +middlename : None +year_range : (2014, 2018) +main_us_institutions_year : ((2014, 'syracuse university'), (2014, 'northeast normal university')) +all_us_institutions_year : ((2014, 'northeast normal university'), (2014, 'syracuse university')) + +3/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : srikanth +lastname : krishnamurthy +middlename : v +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of california riverside'),) +all_us_institutions_year : ((2012, 'university of california riverside'),) + +firstname : s +lastname : krishnamurthy +middlename : v +year_range : (1998, 2002) +main_us_institutions_year : ((1998, 'university of california san diego'), (1998, 'hrl laboratories'), (2000, 'university of california san diego'), (2001, 'university of california san diego'), (2002, 'university of california san diego')) +all_us_institutions_year : ((1998, 'hrl laboratories'), (1998, 'university of california san diego'), (2000, 'university of california san diego'), (2001, 'university of california san diego'), (2002, 'university of california san diego')) + +3/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : arditi +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'illinois institute of technology'),) +all_us_institutions_year : ((2008, 'illinois institute of technology'),) + +firstname : david +lastname : arditi +middlename : None +year_range : (1983, 2021) +main_us_institutions_year : ((1983, 'illinois institute of technology'), (1984, 'illinois institute of technology'), (1985, 'illinois institute of technology'), (1989, 'illinois institute of technology'), (1991, 'illinois institute of technology'), (1995, 'illinois institute of technology'), (1996, 'illinois institute of technology'), (1997, 'illinois institute of technology'), (1998, 'illinois institute of technology'), (1999, 'illinois institute of technology'), (2000, 'illinois institute of technology'), (2001, 'illinois institute of technology'), (2002, 'illinois institute of technology'), (2003, 'illinois institute of technology'), (2004, 'illinois institute of technology'), (2005, 'illinois institute of technology'), (2006, 'illinois institute of technology'), (2007, 'illinois institute of technology'), (2008, 'illinois institute of technology'), (2009, 'illinois institute of technology'), (2010, 'illinois institute of technology'), (2011, 'illinois institute of technology'), (2012, 'illinois institute of technology'), (2013, 'illinois institute of technology'), (2014, 'illinois institute of technology'), (2015, 'illinois institute of technology'), (2016, 'illinois institute of technology'), (2017, 'illinois institute of technology'), (2018, 'illinois institute of technology'), (2019, 'illinois institute of technology'), (2020, 'illinois institute of technology'), (2021, 'illinois institute of technology')) +all_us_institutions_year : ((1983, 'illinois institute of technology'), (1984, 'illinois institute of technology'), (1985, 'illinois institute of technology'), (1989, 'illinois institute of technology'), (1991, 'illinois institute of technology'), (1995, 'illinois institute of technology'), (1996, 'illinois institute of technology'), (1997, 'illinois institute of technology'), (1998, 'illinois institute of technology'), (1999, 'illinois institute of technology'), (2000, 'illinois institute of technology'), (2001, 'illinois institute of technology'), (2002, 'illinois institute of technology'), (2003, 'illinois institute of technology'), (2004, 'illinois institute of technology'), (2005, 'illinois institute of technology'), (2006, 'illinois institute of technology'), (2006, 'southern illinois university edwardsville'), (2007, 'illinois institute of technology'), (2008, 'illinois institute of technology'), (2009, 'illinois institute of technology'), (2010, 'illinois institute of technology'), (2011, 'illinois institute of technology'), (2012, 'illinois institute of technology'), (2013, 'illinois institute of technology'), (2014, 'illinois institute of technology'), (2015, 'illinois institute of technology'), (2016, 'illinois institute of technology'), (2017, 'illinois institute of technology'), (2018, 'illinois institute of technology'), (2019, 'illinois institute of technology'), (2020, 'illinois institute of technology'), (2021, 'illinois institute of technology')) + +3/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ravi +lastname : pendse +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'wichita state university'),) +all_us_institutions_year : ((2003, 'wichita state university'),) + +firstname : ravi +lastname : pendse +middlename : None +year_range : (1996, 2017) +main_us_institutions_year : ((1996, 'wichita state university'), (1997, 'wichita state university'), (1998, 'wichita state university'), (1999, 'wichita state university'), (2002, 'wichita state university'), (2003, 'wichita state university'), (2004, 'wichita state university'), (2005, 'wichita state university'), (2006, 'wichita state university'), (2007, 'wichita state university'), (2008, 'wichita state university'), (2009, 'wichita state university'), (2010, 'wichita state university'), (2011, 'wichita state university'), (2012, 'wichita state university'), (2013, 'wichita state university'), (2014, 'wichita state university'), (2015, 'wichita state university'), (2017, 'brown university')) +all_us_institutions_year : ((1996, 'wichita state university'), (1997, 'wichita state university'), (1998, 'wichita state university'), (1999, 'wichita state university'), (2002, 'wichita state university'), (2003, 'wichita state university'), (2004, 'wichita state university'), (2005, 'wichita state university'), (2006, 'wichita state university'), (2007, 'wichita state university'), (2008, 'wichita state university'), (2009, 'wichita state university'), (2010, 'wichita state university'), (2011, 'wichita state university'), (2012, 'wichita state university'), (2013, 'wichita state university'), (2014, 'wichita state university'), (2015, 'wichita state university'), (2017, 'brown university')) + +4/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : iyer +middlename : purushothaman +year_range : (1993,) +main_us_institutions_year : ((1993, 'pennsylvania state university'),) +all_us_institutions_year : ((1993, 'pennsylvania state university'),) + +firstname : s +lastname : iyer +middlename : purushothaman +year_range : (1993, 2007) +main_us_institutions_year : ((1994, 'north carolina state university'), (1995, 'north carolina state university'), (1996, 'north carolina state university'), (1997, 'north carolina state university'), (1999, 'north carolina state university'), (2000, 'north carolina state university'), (2002, 'north carolina state university'), (2003, 'north carolina state university'), (2004, 'north carolina state university'), (2005, 'north carolina state university'), (2006, 'north carolina state university'), (2007, 'north carolina state university')) +all_us_institutions_year : ((1994, 'north carolina state university'), (1995, 'north carolina state university'), (1996, 'north carolina state university'), (1997, 'north carolina state university'), (1999, 'north carolina state university'), (2000, 'north carolina state university'), (2002, 'north carolina state university'), (2003, 'north carolina state university'), (2004, 'north carolina state university'), (2005, 'north carolina state university'), (2006, 'north carolina state university'), (2007, 'north carolina state university')) + +5/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : e +lastname : garcia +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'vanderbilt university'),) +all_us_institutions_year : ((1995, 'vanderbilt university'),) + +firstname : ephrahim +lastname : garcia +middlename : None +year_range : (1992, 2018) +main_us_institutions_year : ((1992, 'vanderbilt university'), (1995, 'vanderbilt university'), (1996, 'vanderbilt university'), (1997, 'vanderbilt university'), (1998, 'vanderbilt university'), (1999, 'vanderbilt university'), (2001, 'vanderbilt university'), (2002, 'cornell university'), (2002, 'vanderbilt university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university')) +all_us_institutions_year : ((1992, 'vanderbilt university'), (1993, 'vanderbilt university'), (1994, 'vanderbilt university'), (1995, 'vanderbilt university'), (1996, 'vanderbilt university'), (1997, 'vanderbilt university'), (1998, 'vanderbilt university'), (1999, 'vanderbilt university'), (2001, 'vanderbilt university'), (2002, 'cornell university'), (2002, 'vanderbilt university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2011, 'united states department of homeland security'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university')) + +5/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : heinrich +lastname : bulthoff +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'brown university'),) +all_us_institutions_year : ((1995, 'brown university'),) + +firstname : h +lastname : bulthoff +middlename : h +year_range : (1979, 2021) +main_us_institutions_year : ((1989, 'brown university'), (1991, 'brown university'), (1992, 'brown university')) +all_us_institutions_year : ((1989, 'brown university'), (1989, 'massachusetts institute of technology'), (1991, 'brown university'), (1992, 'brown university'), (1994, 'massachusetts institute of technology')) + +6/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nicholas +lastname : schork +middlename : j +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of california san diego'),) +all_us_institutions_year : ((2006, 'university of california san diego'),) + +firstname : nicholas +lastname : schork +middlename : j +year_range : (2019, 2020) +main_us_institutions_year : ((2019, 'city of hope national medical center'), (2020, 'city of hope national medical center'), (2020, 'translational genomics research institute')) +all_us_institutions_year : ((2019, 'city of hope national medical center'), (2020, 'city of hope national medical center'), (2020, 'translational genomics research institute')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ahmed +lastname : elamawy +middlename : None +year_range : (1997,) +main_us_institutions_year : ((1997, 'louisiana state university and agricultural mechanical college'),) +all_us_institutions_year : ((1997, 'louisiana state university and agricultural mechanical college'),) + +firstname : a +lastname : elamawy +middlename : None +year_range : (1991, 1993) +main_us_institutions_year : ((1991, 'louisiana state university'), (1992, 'louisiana state university'), (1993, 'louisiana state university')) +all_us_institutions_year : ((1991, 'louisiana state university'), (1992, 'louisiana state university'), (1993, 'louisiana state university')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : vijayalakshmi +lastname : atluri +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'rutgers university'),) +all_us_institutions_year : ((2007, 'rutgers university'),) + +firstname : vijayalakshmi +lastname : atluri +middlename : None +year_range : (1992, 1993) +main_us_institutions_year : ((1992, 'george mason university'), (1993, 'george mason university')) +all_us_institutions_year : ((1992, 'george mason university'), (1993, 'george mason university'), (1994, 'george mason university'), (1996, 'rutgers university'), (1997, 'rutgers university')) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kristofer +lastname : pister +middlename : s j +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of california berkeley'),) +all_us_institutions_year : ((2003, 'university of california berkeley'),) + +firstname : k +lastname : pister +middlename : s j +year_range : (1994, 2001) +main_us_institutions_year : ((1994, 'university of california los angeles'), (1995, 'university of california los angeles'), (1996, 'university of california los angeles')) +all_us_institutions_year : ((1994, 'university of california los angeles'), (1995, 'university of california los angeles'), (1996, 'university of california los angeles'), (1997, 'university of california los angeles')) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : appelbe +middlename : f +year_range : (1990,) +main_us_institutions_year : ((1990, 'georgia institute of technology'),) +all_us_institutions_year : ((1990, 'georgia institute of technology'),) + +firstname : bill +lastname : appelbe +middlename : None +year_range : (1987, 1996) +main_us_institutions_year : ((1987, 'georgia institute of technology'), (1989, 'georgia institute of technology'), (1990, 'georgia institute of technology'), (1991, 'georgia institute of technology'), (1993, 'georgia institute of technology'), (1995, 'georgia institute of technology'), (1996, 'georgia institute of technology')) +all_us_institutions_year : ((1987, 'georgia institute of technology'), (1989, 'georgia institute of technology'), (1990, 'georgia institute of technology'), (1991, 'georgia institute of technology'), (1993, 'georgia institute of technology'), (1995, 'georgia institute of technology'), (1996, 'georgia institute of technology')) + +9/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : gropp +middlename : d +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2015, 'university of illinois at urbana champaign'),) + +firstname : bill +lastname : gropp +middlename : None +year_range : (2009, 2011) +main_us_institutions_year : ((2009, 'university of illinois at urbana champaign'), (2010, 'university of illinois at urbana champaign'), (2011, 'university of tennessee')) +all_us_institutions_year : ((2009, 'university of illinois at urbana champaign'), (2010, 'university of illinois at urbana champaign'), (2011, 'university of tennessee'), (2015, 'university of illinois at urbana champaign')) + +10/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : carol +lastname : buell +middlename : robin +year_range : (2009,) +main_us_institutions_year : ((2009, 'iowa state university'),) +all_us_institutions_year : ((2009, 'iowa state university'),) + +firstname : c +lastname : buell +middlename : robin +year_range : (1998, 2021) +main_us_institutions_year : ((1998, 'louisiana state university'), (2001, 'j craig venter institute'), (2002, 'j craig venter institute'), (2003, 'j craig venter institute'), (2005, 'j craig venter institute'), (2006, 'tigerlogic'), (2007, 'j craig venter institute'), (2008, 'michigan state university'), (2009, 'michigan state university'), (2010, 'michigan state university'), (2011, 'michigan state university'), (2012, 'michigan state university'), (2013, 'michigan state university'), (2014, 'michigan state university'), (2015, 'michigan state university'), (2016, 'michigan state university'), (2017, 'michigan state university'), (2018, 'michigan state university'), (2019, 'michigan state university'), (2020, 'michigan state university'), (2021, 'michigan state university')) +all_us_institutions_year : ((1998, 'louisiana state university'), (2001, 'j craig venter institute'), (2002, 'j craig venter institute'), (2002, 'tigerlogic'), (2003, 'j craig venter institute'), (2005, 'j craig venter institute'), (2005, 'tigerlogic'), (2006, 'j craig venter institute'), (2006, 'research medical center'), (2006, 'tigerlogic'), (2007, 'j craig venter institute'), (2007, 'michigan state university'), (2007, 'research medical center'), (2007, 'tigerlogic'), (2008, 'j craig venter institute'), (2008, 'michigan state university'), (2008, 'tigerlogic'), (2009, 'j craig venter institute'), (2009, 'michigan state university'), (2010, 'michigan state university'), (2011, 'colorado state university'), (2011, 'great lakes bioenergy research center'), (2011, 'michigan state university'), (2012, 'great lakes bioenergy research center'), (2012, 'michigan state university'), (2013, 'great lakes bioenergy research center'), (2013, 'michigan state university'), (2014, 'great lakes bioenergy research center'), (2014, 'michigan state university'), (2015, 'great lakes bioenergy research center'), (2015, 'michigan state university'), (2016, 'great lakes bioenergy research center'), (2016, 'michigan state university'), (2017, 'great lakes bioenergy research center'), (2017, 'michigan state university'), (2018, 'great lakes bioenergy research center'), (2018, 'michigan state university'), (2019, 'great lakes bioenergy research center'), (2019, 'michigan state university'), (2020, 'michigan state university'), (2021, 'michigan state university')) + +11/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dingzhu +lastname : du +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'university of texas at dallas'),) +all_us_institutions_year : ((2013, 'university of texas at dallas'),) + +firstname : jing +lastname : du +middlename : None +year_range : (2011, 2021) +main_us_institutions_year : ((2011, 'michigan state university'), (2012, 'michigan state university'), (2014, 'university of texas at san antonio'), (2015, 'texas a m university'), (2015, 'university of texas at san antonio'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2019, 'university of florida'), (2020, 'university of florida'), (2021, 'university of florida')) +all_us_institutions_year : ((2010, 'michigan state university'), (2011, 'michigan state university'), (2012, 'michigan state university'), (2014, 'university of texas at san antonio'), (2015, 'texas a m university'), (2015, 'university of texas at san antonio'), (2016, 'texas a m university'), (2016, 'university of texas at san antonio'), (2017, 'texas a m university'), (2018, 'michigan state university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2019, 'university of florida'), (2020, 'university of florida'), (2021, 'university of florida')) + +12/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gaurav +lastname : sharma +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of rochester'),) +all_us_institutions_year : ((2011, 'university of rochester'),) + +firstname : gaurav +lastname : sharma +middlename : None +year_range : (2016, 2019) +main_us_institutions_year : ((2016, 'ohio state university'), (2019, 'ohio state university')) +all_us_institutions_year : ((2016, 'ohio state university'), (2019, 'ohio state university')) + +12/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : h +lastname : seung +middlename : sebastian +year_range : (2009,) +main_us_institutions_year : ((2009, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2009, 'massachusetts institute of technology'),) + +firstname : h +lastname : seung +middlename : sebastian +year_range : (2003, 2004) +main_us_institutions_year : ((2003, 'new york university'), (2004, 'howard hughes medical institute')) +all_us_institutions_year : ((2003, 'new york university'), (2004, 'howard hughes medical institute')) + +12/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jaideep +lastname : srivastava +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of minnesota'),) +all_us_institutions_year : ((2002, 'university of minnesota'),) + +firstname : s +lastname : srivastava +middlename : None +year_range : (2006, 2008) +main_us_institutions_year : ((2006, 'university of minnesota'), (2007, 'university of minnesota'), (2008, 'university of minnesota')) +all_us_institutions_year : ((2006, 'university of minnesota'), (2007, 'university of minnesota'), (2008, 'university of minnesota')) + +12/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 1001.6107290387154 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_economics_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_economics_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..9f186b1 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_economics_christoph_degree0_advisors_9015.log @@ -0,0 +1,874 @@ +Namespace(testing=False, verbose=1, field=['economics'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [162324750] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008942127227783203 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 16.200123103459678 minutes + +Starting active labeling... +firstname : michael +lastname : moore +middlename : r +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of michigan'),) +all_us_institutions_year : ((2003, 'university of michigan'),) + +firstname : michael +lastname : moore +middlename : l +year_range : (1974, 2020) +main_us_institutions_year : ((1974, 'michigan state university'), (1975, 'michigan state university'), (1978, 'michigan state university'), (1987, 'michigan state university'), (1992, 'michigan state university'), (1994, 'michigan state university'), (1995, 'michigan state university'), (2008, 'michigan state university'), (2014, 'michigan state university'), (2020, 'michigan state university')) +all_us_institutions_year : ((1974, 'michigan state university'), (1975, 'michigan state university'), (1978, 'michigan state university'), (1987, 'michigan state university'), (1992, 'michigan state university'), (1994, 'michigan state university'), (1995, 'michigan state university'), (2008, 'michigan state university'), (2014, 'michigan state university'), (2020, 'michigan state university')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : m +lastname : khan +middlename : ali +year_range : (2010,) +main_us_institutions_year : ((2010, 'johns hopkins university'),) +all_us_institutions_year : ((2010, 'johns hopkins university'),) + +firstname : m +lastname : khan +middlename : m k +year_range : (1987, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'centre college'),) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : margo +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'boston university'),) +all_us_institutions_year : ((2011, 'boston university'),) + +firstname : robert +lastname : margo +middlename : a +year_range : (1982, 2021) +main_us_institutions_year : ((1982, 'university of pennsylvania'), (1983, 'university of pennsylvania'), (1984, 'national bureau of economic research'), (1986, 'national bureau of economic research'), (1989, 'vanderbilt university'), (1990, 'vanderbilt university'), (1990, 'national bureau of economic research'), (1991, 'vanderbilt university'), (1991, 'national bureau of economic research'), (1992, 'vanderbilt university'), (1992, 'national bureau of economic research'), (1993, 'vanderbilt university'), (1994, 'national bureau of economic research'), (1995, 'national bureau of economic research'), (1996, 'vanderbilt university'), (1997, 'vanderbilt university'), (1998, 'national bureau of economic research'), (1999, 'national bureau of economic research'), (2000, 'national bureau of economic research'), (2001, 'national bureau of economic research'), (2002, 'vanderbilt university'), (2003, 'vanderbilt university'), (2004, 'vanderbilt university'), (2004, 'national bureau of economic research'), (2005, 'boston university'), (2006, 'boston university'), (2007, 'national bureau of economic research'), (2008, 'boston university'), (2009, 'boston university'), (2010, 'boston university'), (2011, 'boston university'), (2013, 'national bureau of economic research'), (2016, 'national bureau of economic research'), (2018, 'boston university'), (2019, 'boston university'), (2019, 'national bureau of economic research'), (2021, 'boston university'), (2021, 'national bureau of economic research')) +all_us_institutions_year : ((1982, 'university of pennsylvania'), (1983, 'boston university'), (1983, 'national bureau of economic research'), (1983, 'university of pennsylvania'), (1984, 'national bureau of economic research'), (1985, 'boston university'), (1985, 'national bureau of economic research'), (1986, 'national bureau of economic research'), (1987, 'boston university'), (1987, 'national bureau of economic research'), (1988, 'boston university'), (1988, 'national bureau of economic research'), (1989, 'boston university'), (1989, 'national bureau of economic research'), (1989, 'vanderbilt university'), (1990, 'boston university'), (1990, 'national bureau of economic research'), (1990, 'vanderbilt university'), (1991, 'boston university'), (1991, 'national bureau of economic research'), (1991, 'vanderbilt university'), (1992, 'boston university'), (1992, 'national bureau of economic research'), (1992, 'vanderbilt university'), (1993, 'boston university'), (1993, 'national bureau of economic research'), (1993, 'vanderbilt university'), (1994, 'boston university'), (1994, 'national bureau of economic research'), (1995, 'boston university'), (1995, 'national bureau of economic research'), (1996, 'boston university'), (1996, 'national bureau of economic research'), (1996, 'vanderbilt university'), (1997, 'boston university'), (1997, 'national bureau of economic research'), (1997, 'vanderbilt university'), (1998, 'boston university'), (1998, 'national bureau of economic research'), (1999, 'boston university'), (1999, 'national bureau of economic research'), (2000, 'boston university'), (2000, 'national bureau of economic research'), (2000, 'vanderbilt university'), (2001, 'boston university'), (2001, 'national bureau of economic research'), (2002, 'boston university'), (2002, 'national bureau of economic research'), (2002, 'vanderbilt university'), (2003, 'boston university'), (2003, 'national bureau of economic research'), (2003, 'vanderbilt university'), (2004, 'boston university'), (2004, 'national bureau of economic research'), (2004, 'vanderbilt university'), (2005, 'boston university'), (2005, 'national bureau of economic research'), (2006, 'boston university'), (2006, 'national bureau of economic research'), (2007, 'boston university'), (2007, 'national bureau of economic research'), (2008, 'boston university'), (2008, 'national bureau of economic research'), (2009, 'boston university'), (2009, 'national bureau of economic research'), (2010, 'boston university'), (2011, 'boston university'), (2011, 'national bureau of economic research'), (2013, 'boston university'), (2013, 'national bureau of economic research'), (2014, 'boston university'), (2014, 'national bureau of economic research'), (2016, 'boston university'), (2016, 'national bureau of economic research'), (2017, 'boston university'), (2017, 'national bureau of economic research'), (2018, 'boston university'), (2019, 'boston university'), (2019, 'national bureau of economic research'), (2020, 'boston university'), (2021, 'boston university'), (2021, 'national bureau of economic research')) + +0/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : pingsun +lastname : leung +middlename : None +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of hawaii at manoa'),) +all_us_institutions_year : ((1997, 'university of hawaii at manoa'),) + +firstname : pingsun +lastname : leung +middlename : None +year_range : (1977, 2020) +main_us_institutions_year : ((1977, 'university of hawaii'), (1982, 'university of hawaii'), (1984, 'university of hawaii'), (1985, 'university of hawaii'), (1986, 'university of hawaii'), (1988, 'university of hawaii'), (1989, 'university of hawaii'), (1990, 'university of hawaii'), (1992, 'university of hawaii'), (1993, 'university of hawaii'), (1994, 'university of hawaii at manoa'), (1995, 'university of hawaii at manoa'), (1996, 'university of hawaii at manoa'), (1997, 'university of hawaii at manoa'), (1998, 'university of hawaii at manoa'), (1999, 'university of hawaii at manoa'), (2000, 'university of hawaii at manoa'), (2001, 'university of hawaii at manoa'), (2003, 'university of hawaii'), (2004, 'university of hawaii at manoa'), (2006, 'university of hawaii at manoa'), (2007, 'university of hawaii at manoa'), (2009, 'university of hawaii at manoa'), (2010, 'university of hawaii at manoa'), (2011, 'university of hawaii at manoa'), (2012, 'university of hawaii at manoa'), (2013, 'university of hawaii at manoa'), (2014, 'university of hawaii'), (2014, 'university of hawaii at manoa'), (2015, 'university of hawaii at manoa'), (2016, 'university of hawaii at manoa'), (2017, 'university of hawaii at manoa'), (2019, 'university of hawaii at manoa'), (2020, 'university of hawaii at manoa')) +all_us_institutions_year : ((1977, 'university of hawaii'), (1982, 'university of hawaii'), (1984, 'university of hawaii'), (1985, 'university of hawaii'), (1986, 'university of hawaii'), (1988, 'university of hawaii'), (1989, 'university of hawaii'), (1990, 'university of hawaii'), (1992, 'university of hawaii'), (1993, 'university of hawaii'), (1994, 'university of hawaii at manoa'), (1995, 'university of hawaii at manoa'), (1996, 'university of hawaii at manoa'), (1997, 'university of hawaii'), (1997, 'university of hawaii at manoa'), (1998, 'university of hawaii at manoa'), (1999, 'university of hawaii'), (1999, 'university of hawaii at manoa'), (2000, 'university of hawaii at manoa'), (2001, 'joint institute for marine and atmospheric research'), (2001, 'university of hawaii'), (2001, 'university of hawaii at manoa'), (2002, 'university of hawaii'), (2003, 'university of hawaii'), (2003, 'university of hawaii at manoa'), (2004, 'university of hawaii at manoa'), (2005, 'university of hawaii at manoa'), (2006, 'university of hawaii'), (2006, 'university of hawaii at manoa'), (2007, 'university of hawaii at manoa'), (2008, 'university of hawaii at manoa'), (2009, 'university of hawaii at manoa'), (2010, 'university of hawaii at manoa'), (2011, 'university of hawaii'), (2011, 'university of hawaii at manoa'), (2012, 'university of hawaii'), (2012, 'university of hawaii at manoa'), (2013, 'university of hawaii at manoa'), (2014, 'university of hawaii'), (2014, 'university of hawaii at manoa'), (2015, 'university of hawaii'), (2015, 'university of hawaii at manoa'), (2016, 'university of hawaii at manoa'), (2017, 'university of hawaii at manoa'), (2019, 'university of hawaii at manoa'), (2020, 'university of hawaii at manoa')) + +1/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : caroline +lastname : carline +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of minnesota'),) +all_us_institutions_year : ((2011, 'university of minnesota'),) + +firstname : caroline +lastname : carlin +middlename : s +year_range : (2004, 2021) +main_us_institutions_year : ((2005, 'university of minnesota'), (2006, 'university of minnesota'), (2012, 'university of minnesota'), (2014, 'university of minnesota'), (2018, 'university of minnesota'), (2019, 'university of minnesota'), (2020, 'university of minnesota'), (2021, 'university of minnesota')) +all_us_institutions_year : ((2005, 'university of minnesota'), (2006, 'university of minnesota'), (2007, 'university of minnesota'), (2012, 'university of minnesota'), (2014, 'university of minnesota'), (2018, 'university of minnesota'), (2019, 'university of minnesota'), (2020, 'university of minnesota'), (2021, 'university of minnesota')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : marsha +lastname : goldfarb +middlename : g +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of maryland baltimore county'),) +all_us_institutions_year : ((1999, 'university of maryland baltimore county'),) + +firstname : marsha +lastname : gold +middlename : None +year_range : (1979, 2018) +main_us_institutions_year : ((1993, 'mathematica policy research'), (1994, 'mathematica policy research'), (1995, 'mathematica policy research'), (1996, 'mathematica policy research'), (1997, 'mathematica policy research'), (1998, 'mathematica policy research'), (1999, 'mathematica policy research'), (2000, 'mathematica policy research'), (2001, 'mathematica policy research'), (2002, 'mathematica policy research'), (2003, 'mathematica policy research'), (2005, 'mathematica policy research'), (2006, 'mathematica policy research'), (2008, 'mathematica policy research'), (2009, 'mathematica policy research'), (2012, 'mathematica policy research'), (2016, 'mathematica policy research'), (2017, 'mathematica policy research'), (2018, 'mathematica policy research')) +all_us_institutions_year : ((1993, 'mathematica policy research'), (1994, 'mathematica policy research'), (1995, 'mathematica policy research'), (1996, 'mathematica policy research'), (1997, 'mathematica policy research'), (1998, 'mathematica policy research'), (1999, 'mathematica policy research'), (2000, 'mathematica policy research'), (2001, 'mathematica policy research'), (2002, 'mathematica policy research'), (2003, 'mathematica policy research'), (2004, 'mathematica policy research'), (2005, 'mathematica policy research'), (2006, 'mathematica policy research'), (2008, 'mathematica policy research'), (2009, 'mathematica policy research'), (2012, 'mathematica policy research'), (2016, 'mathematica policy research'), (2017, 'mathematica policy research'), (2018, 'mathematica policy research')) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : paul +lastname : rothstein +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'washington university in st louis'),) +all_us_institutions_year : ((2008, 'washington university in st louis'),) + +firstname : paul +lastname : roth +middlename : a +year_range : (1978, 2020) +main_us_institutions_year : ((1980, 'university of missouri st louis'), (1983, 'university of missouri st louis'), (1984, 'university of missouri st louis'), (1988, 'university of missouri st louis'), (1990, 'university of missouri'), (1990, 'university of missouri st louis'), (1991, 'university of missouri st louis'), (1992, 'university of missouri st louis'), (1994, 'university of missouri st louis'), (1995, 'university of missouri st louis'), (1996, 'university of missouri st louis'), (1999, 'university of missouri st louis'), (2002, 'university of missouri st louis'), (2003, 'university of missouri st louis'), (2007, 'university of california santa cruz'), (2008, 'university of california santa cruz'), (2009, 'university of california santa cruz'), (2011, 'university of california santa cruz'), (2013, 'university of california santa cruz'), (2014, 'university of california santa cruz'), (2016, 'university of california santa cruz'), (2017, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2019, 'university of california santa cruz'), (2020, 'university of california santa cruz')) +all_us_institutions_year : ((1980, 'university of missouri st louis'), (1983, 'university of missouri st louis'), (1984, 'university of missouri st louis'), (1988, 'university of missouri st louis'), (1990, 'university of missouri'), (1990, 'university of missouri st louis'), (1991, 'university of missouri st louis'), (1992, 'university of missouri st louis'), (1994, 'university of missouri st louis'), (1995, 'university of missouri st louis'), (1996, 'university of missouri'), (1996, 'university of missouri st louis'), (1999, 'university of missouri st louis'), (2002, 'university of missouri st louis'), (2003, 'university of missouri'), (2003, 'university of missouri st louis'), (2007, 'university of california santa cruz'), (2008, 'university of california santa cruz'), (2009, 'university of california santa cruz'), (2010, 'university of california santa cruz'), (2011, 'university of california santa cruz'), (2013, 'university of california santa cruz'), (2014, 'university of california santa cruz'), (2016, 'university of california santa cruz'), (2017, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2019, 'university of california santa cruz'), (2020, 'university of california santa cruz')) + +3/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : laurence +lastname : katlikoff +middlename : j +year_range : (2011,) +main_us_institutions_year : ((2011, 'boston university'),) +all_us_institutions_year : ((2011, 'boston university'),) + +firstname : laurence +lastname : kotlikoff +middlename : j +year_range : (1975, 2021) +main_us_institutions_year : ((1979, 'university of california los angeles'), (1979, 'national bureau of economic research'), (1985, 'national bureau of economic research'), (1987, 'national bureau of economic research'), (1988, 'boston university'), (1991, 'boston university'), (1992, 'national bureau of economic research'), (1993, 'national bureau of economic research'), (1994, 'boston university'), (1995, 'boston university'), (1999, 'boston university'), (2000, 'boston university'), (2001, 'boston university'), (2001, 'national bureau of economic research'), (2002, 'boston university'), (2003, 'boston university'), (2007, 'boston university'), (2007, 'national bureau of economic research'), (2008, 'boston university'), (2009, 'boston university'), (2010, 'boston university'), (2012, 'boston university'), (2013, 'national bureau of economic research'), (2016, 'boston university'), (2019, 'boston university'), (2019, 'economic policy institute'), (2019, 'national bureau of economic research'), (2021, 'national bureau of economic research')) +all_us_institutions_year : ((1979, 'boston university'), (1979, 'economic policy institute'), (1979, 'national bureau of economic research'), (1979, 'university of california los angeles'), (1980, 'boston university'), (1980, 'economic policy institute'), (1980, 'national bureau of economic research'), (1981, 'boston university'), (1981, 'economic policy institute'), (1981, 'national bureau of economic research'), (1982, 'boston university'), (1982, 'economic policy institute'), (1982, 'national bureau of economic research'), (1983, 'boston university'), (1983, 'economic policy institute'), (1983, 'national bureau of economic research'), (1984, 'boston university'), (1984, 'economic policy institute'), (1984, 'national bureau of economic research'), (1985, 'boston university'), (1985, 'economic policy institute'), (1985, 'national bureau of economic research'), (1986, 'boston university'), (1986, 'economic policy institute'), (1986, 'national bureau of economic research'), (1987, 'boston university'), (1987, 'economic policy institute'), (1987, 'national bureau of economic research'), (1988, 'boston university'), (1988, 'economic policy institute'), (1988, 'national bureau of economic research'), (1989, 'boston university'), (1989, 'economic policy institute'), (1989, 'national bureau of economic research'), (1990, 'boston university'), (1990, 'economic policy institute'), (1990, 'national bureau of economic research'), (1991, 'boston university'), (1991, 'economic policy institute'), (1991, 'national bureau of economic research'), (1992, 'boston university'), (1992, 'economic policy institute'), (1992, 'national bureau of economic research'), (1993, 'national bureau of economic research'), (1994, 'boston university'), (1994, 'national bureau of economic research'), (1995, 'boston university'), (1995, 'economic policy institute'), (1995, 'national bureau of economic research'), (1996, 'boston university'), (1996, 'economic policy institute'), (1996, 'national bureau of economic research'), (1997, 'boston university'), (1997, 'economic policy institute'), (1997, 'national bureau of economic research'), (1998, 'boston university'), (1998, 'economic policy institute'), (1998, 'national bureau of economic research'), (1999, 'boston university'), (1999, 'economic policy institute'), (1999, 'national bureau of economic research'), (2000, 'boston university'), (2001, 'boston university'), (2001, 'economic policy institute'), (2001, 'national bureau of economic research'), (2002, 'boston university'), (2002, 'economic policy institute'), (2002, 'national bureau of economic research'), (2003, 'boston university'), (2003, 'economic policy institute'), (2003, 'national bureau of economic research'), (2004, 'boston university'), (2004, 'economic policy institute'), (2004, 'national bureau of economic research'), (2005, 'boston university'), (2005, 'economic policy institute'), (2005, 'national bureau of economic research'), (2006, 'boston university'), (2006, 'economic policy institute'), (2006, 'national bureau of economic research'), (2007, 'boston university'), (2007, 'national bureau of economic research'), (2008, 'boston university'), (2008, 'economic policy institute'), (2008, 'national bureau of economic research'), (2009, 'boston university'), (2009, 'economic policy institute'), (2009, 'national bureau of economic research'), (2010, 'boston university'), (2010, 'economic policy institute'), (2010, 'national bureau of economic research'), (2012, 'boston university'), (2012, 'economic policy institute'), (2012, 'national bureau of economic research'), (2013, 'boston university'), (2013, 'economic policy institute'), (2013, 'national bureau of economic research'), (2015, 'boston university'), (2015, 'economic policy institute'), (2015, 'national bureau of economic research'), (2016, 'boston university'), (2016, 'economic policy institute'), (2016, 'national bureau of economic research'), (2017, 'boston university'), (2017, 'economic policy institute'), (2017, 'national bureau of economic research'), (2018, 'boston university'), (2018, 'economic policy institute'), (2018, 'national bureau of economic research'), (2019, 'boston university'), (2019, 'economic policy institute'), (2019, 'national bureau of economic research'), (2020, 'boston university'), (2020, 'economic policy institute'), (2020, 'national bureau of economic research'), (2021, 'boston university'), (2021, 'economic policy institute'), (2021, 'national bureau of economic research')) + +3/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gina +lastname : connor +middlename : colarelli o +year_range : (2010,) +main_us_institutions_year : ((2010, 'rensselaer polytechnic institute'),) +all_us_institutions_year : ((2010, 'rensselaer polytechnic institute'),) + +firstname : gina +lastname : oconnor +middlename : colarelli +year_range : (1994, 2021) +main_us_institutions_year : ((1994, 'rensselaer polytechnic institute'), (1995, 'rensselaer polytechnic institute'), (1996, 'rensselaer polytechnic institute'), (1997, 'rensselaer polytechnic institute'), (2000, 'rensselaer polytechnic institute'), (2001, 'rensselaer polytechnic institute'), (2002, 'rensselaer polytechnic institute'), (2003, 'rensselaer polytechnic institute'), (2004, 'rensselaer polytechnic institute'), (2005, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2009, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute'), (2017, 'rensselaer polytechnic institute'), (2020, 'babson college'), (2021, 'babson college')) +all_us_institutions_year : ((1994, 'rensselaer polytechnic institute'), (1995, 'rensselaer polytechnic institute'), (1996, 'rensselaer polytechnic institute'), (1997, 'rensselaer polytechnic institute'), (2000, 'rensselaer polytechnic institute'), (2001, 'rensselaer polytechnic institute'), (2002, 'rensselaer polytechnic institute'), (2003, 'rensselaer polytechnic institute'), (2004, 'rensselaer polytechnic institute'), (2005, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2009, 'rensselaer polytechnic institute'), (2011, 'babson college'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2013, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2015, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute'), (2017, 'rensselaer polytechnic institute'), (2020, 'babson college'), (2021, 'babson college')) + +4/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : villasboas +middlename : miguel +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of california berkeley'),) +all_us_institutions_year : ((2015, 'university of california berkeley'),) + +firstname : juan +lastname : villa +middlename : miguel +year_range : (2014, 2019) +main_us_institutions_year : ((2016, 'center for global development'),) +all_us_institutions_year : ((2016, 'center for global development'),) + +5/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : villasboas +middlename : miguel +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of california berkeley'),) +all_us_institutions_year : ((2006, 'university of california berkeley'),) + +firstname : juan +lastname : villa +middlename : miguel +year_range : (2014, 2019) +main_us_institutions_year : ((2016, 'center for global development'),) +all_us_institutions_year : ((2016, 'center for global development'),) + +5/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : elizabeth +lastname : powers +middlename : t +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2007, 'university of illinois at urbana champaign'),) + +firstname : elizabeth +lastname : hsiaowecksler +middlename : t +year_range : (2003, 2021) +main_us_institutions_year : ((2003, 'boston university'), (2005, 'university of illinois at urbana champaign'), (2007, 'university of illinois at urbana champaign'), (2008, 'university of illinois at urbana champaign'), (2009, 'university of illinois at urbana champaign'), (2010, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2012, 'university of illinois at urbana champaign'), (2013, 'university of illinois at urbana champaign'), (2014, 'university of illinois at urbana champaign'), (2015, 'university of illinois at urbana champaign'), (2016, 'university of illinois at urbana champaign'), (2017, 'university of illinois at urbana champaign'), (2018, 'university of illinois at urbana champaign'), (2019, 'university of illinois at urbana champaign'), (2020, 'university of illinois at urbana champaign'), (2021, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((2003, 'boston university'), (2005, 'university of illinois at urbana champaign'), (2007, 'university of illinois at urbana champaign'), (2008, 'university of illinois at urbana champaign'), (2009, 'university of illinois at urbana champaign'), (2010, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2012, 'university of illinois at urbana champaign'), (2013, 'university of illinois at urbana champaign'), (2014, 'university of illinois at urbana champaign'), (2015, 'university of illinois at urbana champaign'), (2016, 'university of illinois at urbana champaign'), (2017, 'university of illinois at urbana champaign'), (2018, 'university of illinois at urbana champaign'), (2019, 'university of illinois at urbana champaign'), (2020, 'university of illinois at urbana champaign'), (2021, 'university of illinois at urbana champaign')) + +5/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : chernew +middlename : None +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of michigan'),) +all_us_institutions_year : ((1998, 'university of michigan'),) + +firstname : michael +lastname : charney +middlename : w +year_range : (1995, 2019) +main_us_institutions_year : ((1995, 'university of michigan'), (1998, 'university of michigan')) +all_us_institutions_year : ((1995, 'university of michigan'), (1998, 'university of michigan')) + +5/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : anderson +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'boston college'),) +all_us_institutions_year : ((1999, 'boston college'),) + +firstname : james +lastname : anderson +middlename : l +year_range : (1973, 2021) +main_us_institutions_year : ((1985, 'university of rhode island'), (1986, 'university of rhode island'), (1991, 'university of rhode island'), (1993, 'university of rhode island'), (1995, 'university of rhode island'), (1996, 'university of rhode island'), (1997, 'university of rhode island'), (1998, 'university of rhode island'), (1999, 'university of rhode island'), (2000, 'university of rhode island'), (2001, 'university of rhode island'), (2002, 'university of rhode island'), (2005, 'university of rhode island'), (2006, 'university of rhode island'), (2009, 'university of rhode island'), (2010, 'university of rhode island'), (2013, 'university of rhode island'), (2015, 'university of florida'), (2017, 'university of florida'), (2018, 'university of florida'), (2019, 'university of florida'), (2020, 'university of florida'), (2021, 'university of florida')) +all_us_institutions_year : ((1985, 'university of rhode island'), (1986, 'university of rhode island'), (1991, 'university of rhode island'), (1993, 'university of rhode island'), (1995, 'university of rhode island'), (1996, 'university of rhode island'), (1997, 'university of rhode island'), (1998, 'university of rhode island'), (1999, 'university of rhode island'), (2000, 'university of rhode island'), (2001, 'university of rhode island'), (2002, 'university of rhode island'), (2005, 'university of rhode island'), (2006, 'university of rhode island'), (2008, 'university of rhode island'), (2009, 'university of rhode island'), (2010, 'university of rhode island'), (2012, 'university of rhode island'), (2013, 'university of rhode island'), (2015, 'university of florida'), (2017, 'university of florida'), (2018, 'university of florida'), (2019, 'university of florida'), (2020, 'university of florida'), (2021, 'university of florida')) + +5/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : harrington +middlename : w +year_range : (2001,) +main_us_institutions_year : ((2001, 'university of washington'),) +all_us_institutions_year : ((2001, 'university of washington'),) + +firstname : james +lastname : harrington +middlename : None +year_range : (2018, 2019) +main_us_institutions_year : ((2018, 'allen institute for brain science'), (2019, 'allen institute for brain science')) +all_us_institutions_year : ((2017, 'allen institute for brain science'), (2018, 'allen institute for brain science'), (2019, 'allen institute for brain science')) + +5/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : goldberg +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of new hampshire main campus'),) +all_us_institutions_year : ((2014, 'university of new hampshire main campus'),) + +firstname : michael +lastname : goldberg +middlename : j +year_range : (1974, 2021) +main_us_institutions_year : ((2000, 'widener university'), (2001, 'university of california berkeley'), (2002, 'university of california berkeley')) +all_us_institutions_year : ((2000, 'widener university'), (2001, 'university of california berkeley'), (2002, 'university of california berkeley'), (2002, 'university of delaware'), (2005, 'university of delaware'), (2008, 'university of delaware'), (2010, 'university of delaware'), (2014, 'university of delaware')) + +5/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kevin +lastname : lancaster +middlename : None +year_range : (1991,) +main_us_institutions_year : ((1991, 'columbia university'),) +all_us_institutions_year : ((1991, 'columbia university'),) + +firstname : kelvin +lastname : lancaster +middlename : None +year_range : (1955, 1991) +main_us_institutions_year : ((1977, 'columbia university'), (1980, 'columbia university'), (1981, 'columbia university'), (1990, 'columbia university'), (1991, 'columbia university')) +all_us_institutions_year : ((1977, 'columbia university'), (1980, 'columbia university'), (1981, 'columbia university'), (1990, 'columbia university'), (1991, 'columbia university'), (1998, 'columbia university')) + +5/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jon +lastname : sutinen +middlename : g +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of rhode island'),) +all_us_institutions_year : ((1992, 'university of rhode island'),) + +firstname : jon +lastname : sutinen +middlename : g +year_range : (1984, 2007) +main_us_institutions_year : ((2007, 'university of wisconsin madison'),) +all_us_institutions_year : ((2007, 'university of wisconsin madison'),) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : romer +middlename : h +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of california berkeley'),) +all_us_institutions_year : ((1997, 'university of california berkeley'),) + +firstname : adam +lastname : romero +middlename : None +year_range : (2012, 2020) +main_us_institutions_year : ((2012, 'university of california berkeley'), (2016, 'university of california berkeley'), (2017, 'university of washington'), (2018, 'university of washington')) +all_us_institutions_year : ((2012, 'university of california berkeley'), (2016, 'university of california berkeley'), (2017, 'university of washington'), (2018, 'university of washington'), (2019, 'university of washington'), (2021, 'university of washington')) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nick +lastname : bloom +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'stanford university'),) +all_us_institutions_year : ((2015, 'stanford university'),) + +firstname : laura +lastname : bloomfield +middlename : s p +year_range : (2016, 2021) +main_us_institutions_year : ((2016, 'stanford university'), (2017, 'stanford university'), (2018, 'stanford university'), (2020, 'stanford university'), (2021, 'stanford university')) +all_us_institutions_year : ((2016, 'stanford university'), (2017, 'stanford university'), (2018, 'stanford university'), (2020, 'stanford university'), (2021, 'stanford university')) + +6/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : lucas +middlename : e b +year_range : (2011,) +main_us_institutions_year : ((2011, 'boston university'),) +all_us_institutions_year : ((2011, 'boston university'),) + +firstname : robert +lastname : lucas +middlename : e b +year_range : (2002, 2010) +main_us_institutions_year : ((2010, 'world bank'),) +all_us_institutions_year : ((2010, 'world bank'), (2015, 'world bank')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : pham +lastname : van +middlename : hoang +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of missouri columbia'),) +all_us_institutions_year : ((2005, 'university of missouri columbia'),) + +firstname : pham +lastname : van +middlename : hoang +year_range : (1999, 2019) +main_us_institutions_year : ((2009, 'baylor university'), (2010, 'texas a m university'), (2010, 'baylor university'), (2012, 'baylor university'), (2017, 'baylor university'), (2019, 'baylor university')) +all_us_institutions_year : ((2009, 'baylor university'), (2010, 'baylor university'), (2010, 'texas a m university'), (2012, 'baylor university'), (2014, 'baylor university'), (2017, 'baylor university'), (2019, 'baylor university')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : pham +lastname : van +middlename : hoang +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of missouri columbia'),) +all_us_institutions_year : ((2003, 'university of missouri columbia'),) + +firstname : pham +lastname : van +middlename : hoang +year_range : (1999, 2019) +main_us_institutions_year : ((2009, 'baylor university'), (2010, 'texas a m university'), (2010, 'baylor university'), (2012, 'baylor university'), (2017, 'baylor university'), (2019, 'baylor university')) +all_us_institutions_year : ((2009, 'baylor university'), (2010, 'baylor university'), (2010, 'texas a m university'), (2012, 'baylor university'), (2014, 'baylor university'), (2017, 'baylor university'), (2019, 'baylor university')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : pham +lastname : van +middlename : hoang +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of missouri columbia'),) +all_us_institutions_year : ((2002, 'university of missouri columbia'),) + +firstname : pham +lastname : van +middlename : hoang +year_range : (1999, 2019) +main_us_institutions_year : ((2009, 'baylor university'), (2010, 'texas a m university'), (2010, 'baylor university'), (2012, 'baylor university'), (2017, 'baylor university'), (2019, 'baylor university')) +all_us_institutions_year : ((2009, 'baylor university'), (2010, 'baylor university'), (2010, 'texas a m university'), (2012, 'baylor university'), (2014, 'baylor university'), (2017, 'baylor university'), (2019, 'baylor university')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : sweeney +middlename : j +year_range : (1996,) +main_us_institutions_year : ((1996, 'georgetown university'),) +all_us_institutions_year : ((1996, 'georgetown university'),) + +firstname : richard +lastname : sweeney +middlename : j +year_range : (1983, 1997) +main_us_institutions_year : ((1983, 'claremont mckenna college'), (1986, 'claremont mckenna college'), (1988, 'claremont mckenna college')) +all_us_institutions_year : ((1983, 'claremont mckenna college'), (1986, 'claremont mckenna college'), (1988, 'claremont mckenna college')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : l +lastname : hatch +middlename : upton +year_range : (1998,) +main_us_institutions_year : ((1998, 'auburn university'),) +all_us_institutions_year : ((1998, 'auburn university'),) + +firstname : l +lastname : hatch +middlename : upton +year_range : (1990, 1998) +main_us_institutions_year : ((1990, 'agricultural applied economics association'), (1994, 'agricultural applied economics association'), (1998, 'agricultural applied economics association')) +all_us_institutions_year : ((1990, 'agricultural applied economics association'), (1994, 'agricultural applied economics association'), (1998, 'agricultural applied economics association')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : l +lastname : hatch +middlename : upton +year_range : (1995,) +main_us_institutions_year : ((1995, 'auburn university'),) +all_us_institutions_year : ((1995, 'auburn university'),) + +firstname : l +lastname : hatch +middlename : upton +year_range : (1990, 1998) +main_us_institutions_year : ((1990, 'agricultural applied economics association'), (1994, 'agricultural applied economics association'), (1998, 'agricultural applied economics association')) +all_us_institutions_year : ((1990, 'agricultural applied economics association'), (1994, 'agricultural applied economics association'), (1998, 'agricultural applied economics association')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : innes +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of arizona'),) +all_us_institutions_year : ((2005, 'university of arizona'),) + +firstname : robert +lastname : innes +middlename : d +year_range : (1989, 1992) +main_us_institutions_year : ((1989, 'university of california davis'), (1990, 'university of california davis'), (1992, 'university of california davis')) +all_us_institutions_year : ((1989, 'university of california davis'), (1990, 'university of california davis'), (1992, 'university of california davis')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : innes +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of arizona'),) +all_us_institutions_year : ((2007, 'university of arizona'),) + +firstname : robert +lastname : innes +middlename : d +year_range : (1989, 1992) +main_us_institutions_year : ((1989, 'university of california davis'), (1990, 'university of california davis'), (1992, 'university of california davis')) +all_us_institutions_year : ((1989, 'university of california davis'), (1990, 'university of california davis'), (1992, 'university of california davis')) + +6/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : brodzinski +middlename : d +year_range : (1997,) +main_us_institutions_year : ((1997, 'nova southeastern university'),) +all_us_institutions_year : ((1997, 'nova southeastern university'),) + +firstname : james +lastname : brodzinski +middlename : d +year_range : (1989, 2016) +main_us_institutions_year : ((1989, 'kennesaw state university'), (1990, 'salisbury university'), (1990, 'kennesaw state university'), (1991, 'salisbury university'), (1992, 'california state university san bernardino'), (1994, 'xavier university'), (2011, 'salisbury university'), (2013, 'valparaiso university')) +all_us_institutions_year : ((1989, 'kennesaw state university'), (1990, 'kennesaw state university'), (1990, 'salisbury university'), (1991, 'salisbury university'), (1992, 'california state university san bernardino'), (1994, 'xavier university'), (2011, 'salisbury university'), (2013, 'valparaiso university')) + +6/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : martin +lastname : gruber +middlename : None +year_range : (1996,) +main_us_institutions_year : ((1996, 'new york university graduate school of business administration'),) +all_us_institutions_year : ((1996, 'new york university graduate school of business administration'),) + +firstname : martin +lastname : gruber +middlename : j +year_range : (1966, 2020) +main_us_institutions_year : ((1966, 'new york university'), (1972, 'new york university'), (1974, 'new york university'), (1976, 'new york university'), (1977, 'new york university'), (1978, 'new york university'), (1981, 'new york university'), (1982, 'new york university'), (1983, 'new york university'), (1984, 'new york university'), (1987, 'new york university'), (1988, 'new york university'), (1989, 'new york university'), (1991, 'new york university'), (1992, 'new york university'), (1993, 'new york university'), (1996, 'new york university'), (1997, 'new york university'), (1999, 'new york university'), (2001, 'new york university'), (2002, 'new york university'), (2003, 'new york university'), (2004, 'new york university'), (2005, 'new york university'), (2006, 'new york university'), (2007, 'new york university'), (2010, 'new york university'), (2011, 'new york university'), (2012, 'new york university'), (2013, 'new york university'), (2014, 'new york university'), (2015, 'new york university'), (2018, 'new york university'), (2019, 'new york university'), (2020, 'new york university')) +all_us_institutions_year : ((1966, 'new york university'), (1972, 'new york university'), (1974, 'international institute of minnesota'), (1974, 'new york university'), (1976, 'new york university'), (1977, 'new york university'), (1978, 'new york university'), (1981, 'new york university'), (1982, 'new york university'), (1983, 'new york university'), (1984, 'new york university'), (1987, 'new york university'), (1988, 'new york university'), (1989, 'new york university'), (1991, 'new york university'), (1992, 'new york university'), (1993, 'new york university'), (1994, 'new york university'), (1995, 'new york university'), (1996, 'new york university'), (1997, 'new york university'), (1998, 'new york university'), (1999, 'new york university'), (2000, 'new york university'), (2001, 'new york university'), (2002, 'new york university'), (2003, 'new york university'), (2004, 'new york university'), (2005, 'new york university'), (2006, 'new york university'), (2007, 'new york university'), (2009, 'new york university'), (2010, 'new york university'), (2011, 'new york university'), (2012, 'new york university'), (2013, 'new york university'), (2014, 'new york university'), (2015, 'new york university'), (2017, 'new york university'), (2018, 'new york university'), (2019, 'new york university'), (2020, 'new york university')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : a +lastname : rourke +middlename : desmond o +year_range : (1991,) +main_us_institutions_year : ((1991, 'washington state university'),) +all_us_institutions_year : ((1991, 'washington state university'),) + +firstname : a +lastname : orourke +middlename : desmond +year_range : (1973, 1999) +main_us_institutions_year : ((1973, 'washington state university'), (1980, 'washington state university'), (1981, 'washington state university'), (1990, 'washington state university'), (1992, 'washington state university'), (1994, 'washington state university'), (1999, 'washington state university')) +all_us_institutions_year : ((1973, 'washington state university'), (1980, 'washington state university'), (1981, 'washington state university'), (1990, 'washington state university'), (1992, 'washington state university'), (1994, 'washington state university'), (1999, 'washington state university')) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : heilbroner +middlename : l +year_range : (1996,) +main_us_institutions_year : ((1996, 'new school for social research'),) +all_us_institutions_year : ((1996, 'new school for social research'),) + +firstname : robert +lastname : heilbroner +middlename : l +year_range : (1953, 2007) +main_us_institutions_year : ((1968, 'the new school'), (1975, 'the new school'), (1988, 'the new school'), (1993, 'the new school'), (1995, 'the new school'), (1996, 'the new school'), (1998, 'the new school')) +all_us_institutions_year : ((1968, 'the new school'), (1975, 'the new school'), (1988, 'the new school'), (1993, 'the new school'), (1995, 'the new school'), (1996, 'the new school'), (1998, 'the new school')) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : katsinas +middlename : g +year_range : (1991,) +main_us_institutions_year : ((1991, 'oklahoma state university'),) +all_us_institutions_year : ((1991, 'oklahoma state university'),) + +firstname : stephen +lastname : katsinas +middlename : g +year_range : (1989, 2020) +main_us_institutions_year : ((1999, 'university of toledo'), (2003, 'university of north texas'), (2005, 'university of alabama'), (2006, 'university of alabama'), (2007, 'university of alabama'), (2008, 'university of alabama'), (2009, 'university of alabama'), (2010, 'university of alabama'), (2011, 'university of alabama'), (2012, 'university of alabama'), (2014, 'university of alabama'), (2015, 'university of alabama'), (2017, 'university of alabama'), (2018, 'university of alabama'), (2019, 'university of alabama'), (2020, 'university of alabama')) +all_us_institutions_year : ((1999, 'university of toledo'), (2003, 'university of north texas'), (2005, 'university of alabama'), (2006, 'university of alabama'), (2007, 'university of alabama'), (2008, 'university of alabama'), (2009, 'university of alabama'), (2010, 'university of alabama'), (2011, 'university of alabama'), (2012, 'university of alabama'), (2014, 'university of alabama'), (2015, 'university of alabama'), (2017, 'university of alabama'), (2018, 'university of alabama'), (2019, 'university of alabama'), (2020, 'university of alabama')) + +10/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : paul +lastname : rubin +middlename : h +year_range : (2005,) +main_us_institutions_year : ((2005, 'emory university'),) +all_us_institutions_year : ((2005, 'emory university'),) + +firstname : paul +lastname : rubin +middlename : h +year_range : (1982, 1985) +main_us_institutions_year : ((1984, 'federal trade commission'), (1985, 'federal trade commission')) +all_us_institutions_year : ((1984, 'federal trade commission'), (1985, 'federal trade commission')) + +10/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : gould +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'walden university'),) +all_us_institutions_year : ((2011, 'walden university'),) + +firstname : david +lastname : gould +middlename : m +year_range : (1995, 2020) +main_us_institutions_year : ((1996, 'federal reserve bank of dallas'), (1996, 'federal reserve system'), (1997, 'federal reserve bank of dallas'), (1998, 'federal reserve bank of dallas'), (2013, 'world bank'), (2016, 'world bank'), (2017, 'world bank')) +all_us_institutions_year : ((1994, 'federal reserve bank of dallas'), (1996, 'federal reserve bank of dallas'), (1996, 'federal reserve system'), (1997, 'federal reserve bank of dallas'), (1998, 'federal reserve bank of dallas'), (2013, 'world bank'), (2016, 'world bank'), (2017, 'world bank')) + +10/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : buccola +middlename : t +year_range : (2005,) +main_us_institutions_year : ((2005, 'oregon state university'),) +all_us_institutions_year : ((2005, 'oregon state university'),) + +firstname : steven +lastname : buccola +middlename : t +year_range : (1977, 2020) +main_us_institutions_year : ((1979, 'virginia tech'), (1980, 'virginia tech'), (1981, 'oregon state university'), (1982, 'oregon state university'), (1983, 'oregon state university'), (1984, 'oregon state university'), (1985, 'oregon state university'), (1986, 'oregon state university'), (1987, 'oregon state university'), (1988, 'oregon state university'), (1991, 'oregon state university'), (1994, 'oregon state university'), (1995, 'oregon state university'), (1996, 'oregon state university'), (1997, 'oregon state university'), (1999, 'oregon state university'), (2000, 'oregon state university'), (2002, 'oregon state university'), (2003, 'agricultural applied economics association'), (2003, 'oregon state university'), (2004, 'oregon state university'), (2005, 'oregon state university'), (2006, 'oregon state university'), (2007, 'oregon state university'), (2008, 'oregon state university'), (2009, 'oregon state university'), (2010, 'oregon state university'), (2012, 'oregon state university'), (2013, 'oregon state university'), (2020, 'oregon state university')) +all_us_institutions_year : ((1979, 'virginia tech'), (1980, 'virginia tech'), (1981, 'oregon state university'), (1982, 'oregon state university'), (1983, 'oregon state university'), (1984, 'oregon state university'), (1985, 'oregon state university'), (1986, 'oregon state university'), (1987, 'oregon state university'), (1988, 'oregon state university'), (1991, 'oregon state university'), (1994, 'oregon state university'), (1995, 'oregon state university'), (1996, 'oregon state university'), (1997, 'oregon state university'), (1999, 'oregon state university'), (2000, 'oregon state university'), (2002, 'oregon state university'), (2003, 'agricultural applied economics association'), (2003, 'oregon state university'), (2004, 'oregon state university'), (2005, 'oregon state university'), (2006, 'oregon state university'), (2007, 'oregon state university'), (2008, 'oregon state university'), (2009, 'oregon state university'), (2009, 'portland state university'), (2010, 'oregon state university'), (2010, 'portland state university'), (2012, 'oregon state university'), (2013, 'oregon state university'), (2018, 'oregon state university'), (2020, 'oregon state university')) + +10/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : glynn +lastname : lunney +middlename : s +year_range : (2009,) +main_us_institutions_year : ((2009, 'tulane university'),) +all_us_institutions_year : ((2009, 'tulane university'),) + +firstname : glynn +lastname : lunney +middlename : s +year_range : (1999, 2020) +main_us_institutions_year : ((1999, 'texas a m university'), (2014, 'texas a m university'), (2018, 'texas a m university')) +all_us_institutions_year : ((1999, 'texas a m university'), (2000, 'texas a m university'), (2001, 'texas a m university'), (2002, 'texas a m university'), (2003, 'texas a m university'), (2012, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2016, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) + +11/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeremy +lastname : jackson +middlename : bc +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of california san diego'),) +all_us_institutions_year : ((2011, 'university of california san diego'),) + +firstname : jeremy +lastname : jackson +middlename : b c +year_range : (1968, 2020) +main_us_institutions_year : ((1972, 'johns hopkins university'), (1975, 'university of baltimore'), (1979, 'johns hopkins university'), (1980, 'johns hopkins university'), (1981, 'johns hopkins university'), (1982, 'johns hopkins university'), (1984, 'johns hopkins university'), (2000, 'university of california san diego'), (2001, 'university of california san diego'), (2004, 'university of california san diego'), (2005, 'university of california san diego'), (2005, 'scripps institution of oceanography'), (2007, 'scripps institution of oceanography'), (2008, 'university of california san diego'), (2009, 'university of california san diego'), (2010, 'university of california san diego'), (2011, 'scripps institution of oceanography'), (2012, 'university of california san diego'), (2012, 'scripps institution of oceanography'), (2013, 'smithsonian institution'), (2014, 'university of california san diego'), (2015, 'university of california san diego'), (2016, 'national museum of natural history'), (2016, 'scripps institution of oceanography'), (2017, 'university of california san diego'), (2018, 'university of california san diego'), (2018, 'national museum of natural history')) +all_us_institutions_year : ((1972, 'johns hopkins university'), (1975, 'university of baltimore'), (1979, 'johns hopkins university'), (1980, 'johns hopkins university'), (1981, 'johns hopkins university'), (1982, 'johns hopkins university'), (1984, 'johns hopkins university'), (1992, 'smithsonian institution'), (2000, 'university of california san diego'), (2001, 'scripps institution of oceanography'), (2001, 'university of california san diego'), (2002, 'scripps institution of oceanography'), (2002, 'university of california san diego'), (2003, 'scripps institution of oceanography'), (2003, 'university of california san diego'), (2004, 'university of california san diego'), (2005, 'scripps institution of oceanography'), (2005, 'university of california san diego'), (2006, 'scripps institution of oceanography'), (2006, 'university of california san diego'), (2007, 'scripps institution of oceanography'), (2007, 'university of california san diego'), (2008, 'scripps institution of oceanography'), (2008, 'university of california san diego'), (2009, 'university of california san diego'), (2010, 'scripps institution of oceanography'), (2010, 'university of california san diego'), (2011, 'scripps institution of oceanography'), (2012, 'scripps institution of oceanography'), (2012, 'university of california san diego'), (2013, 'smithsonian institution'), (2014, 'national museum of natural history'), (2014, 'university of california san diego'), (2015, 'national museum of natural history'), (2015, 'smithsonian institution'), (2015, 'university of california san diego'), (2016, 'national museum of natural history'), (2016, 'scripps institution of oceanography'), (2017, 'national museum of natural history'), (2017, 'university of california san diego'), (2018, 'national museum of natural history'), (2018, 'university of california san diego')) + +11/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : wim +lastname : vijverberg +middlename : pm +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of texas at dallas'),) +all_us_institutions_year : ((2008, 'university of texas at dallas'),) + +firstname : wim +lastname : vijverberg +middlename : p m +year_range : (1980, 2021) +main_us_institutions_year : ((1980, 'university of pittsburgh'), (1991, 'university of texas at dallas'), (1993, 'university of texas at dallas'), (1994, 'university of texas at dallas'), (1994, 'east carolina university'), (1995, 'university of texas at dallas'), (1996, 'university of texas at dallas'), (1997, 'university of texas at dallas'), (2000, 'university of texas at dallas'), (2003, 'university of texas at dallas'), (2005, 'university of texas at dallas'), (2007, 'university of texas at dallas'), (2008, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'the graduate center cuny'), (2010, 'university of texas at dallas'), (2011, 'city university of new york'), (2016, 'the graduate center cuny'), (2018, 'city university of new york'), (2021, 'the graduate center cuny')) +all_us_institutions_year : ((1980, 'university of pittsburgh'), (1991, 'university of texas at dallas'), (1992, 'university of texas at dallas'), (1993, 'university of texas at dallas'), (1994, 'east carolina university'), (1994, 'university of texas at dallas'), (1995, 'university of texas at dallas'), (1996, 'university of texas at dallas'), (1997, 'university of texas at dallas'), (1999, 'university of texas at dallas'), (2000, 'the graduate center cuny'), (2000, 'university of texas at dallas'), (2001, 'the graduate center cuny'), (2002, 'the graduate center cuny'), (2003, 'university of texas at dallas'), (2004, 'the graduate center cuny'), (2004, 'university of texas at dallas'), (2005, 'the graduate center cuny'), (2005, 'university of texas at dallas'), (2007, 'university of texas at dallas'), (2008, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'the graduate center cuny'), (2010, 'university of texas at dallas'), (2011, 'city university of new york'), (2011, 'the graduate center cuny'), (2012, 'the graduate center cuny'), (2015, 'the graduate center cuny'), (2016, 'the graduate center cuny'), (2018, 'city university of new york'), (2021, 'the graduate center cuny')) + +12/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : joseph +lastname : harrington +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'johns hopkins university'),) +all_us_institutions_year : ((2009, 'johns hopkins university'),) + +firstname : jr +lastname : harrington +middlename : joseph e +year_range : (1992, 2011) +main_us_institutions_year : ((1992, 'johns hopkins university'), (2011, 'johns hopkins university')) +all_us_institutions_year : ((1992, 'johns hopkins university'), (2009, 'johns hopkins university'), (2011, 'johns hopkins university')) + +13/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dennis +lastname : weisman +middlename : l +year_range : (2005,) +main_us_institutions_year : ((2005, 'kansas state university'),) +all_us_institutions_year : ((2005, 'kansas state university'),) + +firstname : dennis +lastname : weisman +middlename : l +year_range : (1986, 1987) +main_us_institutions_year : ((1986, 'southwestern bell'), (1987, 'southwestern bell')) +all_us_institutions_year : ((1986, 'southwestern bell'), (1987, 'southwestern bell')) + +14/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : peter +lastname : gordon +middlename : None +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of southern california'),) +all_us_institutions_year : ((1996, 'university of southern california'),) + +firstname : peter +lastname : gordon +middlename : None +year_range : (1996, 2020) +main_us_institutions_year : ((2003, 'exxonmobil'), (2005, 'exxonmobil'), (2006, 'exxonmobil'), (2007, 'exxonmobil'), (2009, 'exxonmobil'), (2010, 'exxonmobil'), (2016, 'exxonmobil'), (2017, 'exxonmobil'), (2019, 'exxonmobil'), (2020, 'exxonmobil')) +all_us_institutions_year : ((2003, 'exxonmobil'), (2005, 'exxonmobil'), (2006, 'exxonmobil'), (2007, 'exxonmobil'), (2009, 'exxonmobil'), (2010, 'exxonmobil'), (2012, 'exxonmobil'), (2014, 'university of southern california'), (2015, 'exxonmobil'), (2016, 'exxonmobil'), (2017, 'exxonmobil'), (2019, 'exxonmobil'), (2020, 'exxonmobil')) + +14/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 759.2459566990534 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_engineering_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_engineering_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..d38fe80 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_engineering_christoph_degree0_advisors_9015.log @@ -0,0 +1,875 @@ +Namespace(testing=False, verbose=1, field=['engineering'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [127413603] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008928656578063964 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 189.19803936878841 minutes + +Starting active labeling... +firstname : kaushik +lastname : roy +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'purdue university'),) +all_us_institutions_year : ((2006, 'purdue university'),) + +firstname : kaushik +lastname : roy +middlename : None +year_range : (2002, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2019, 'purdue university'),) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : reginald +lastname : tan +middlename : b h +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2007, 'university of illinois at urbana champaign'),) + +firstname : reginald +lastname : tan +middlename : b h +year_range : (1986, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2006, 'university of illinois at urbana champaign'), (2007, 'university of illinois at urbana champaign'), (2008, 'university of illinois at urbana champaign'), (2009, 'university of illinois at urbana champaign')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : pamela +lastname : mccauleybell +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of central florida'),) +all_us_institutions_year : ((2002, 'university of central florida'),) + +firstname : pamela +lastname : mccauley +middlename : None +year_range : (2015, 2021) +main_us_institutions_year : ((2015, 'university of central florida'), (2016, 'university of central florida'), (2017, 'university of central florida'), (2018, 'university of central florida'), (2021, 'university of central florida')) +all_us_institutions_year : ((2015, 'university of central florida'), (2016, 'university of central florida'), (2017, 'university of central florida'), (2018, 'university of central florida'), (2021, 'university of central florida')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : berrien +lastname : moore +middlename : None +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of new hampshire main campus'),) +all_us_institutions_year : ((1996, 'university of new hampshire main campus'),) + +firstname : b +lastname : moor +middlename : de +year_range : (1985, 2020) +main_us_institutions_year : None +all_us_institutions_year : ((2005, 'university of michigan'),) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : brooke +middlename : a +year_range : (1992,) +main_us_institutions_year : ((1992, 'georgia institute of technology'),) +all_us_institutions_year : ((1992, 'georgia institute of technology'),) + +firstname : anthony +lastname : brookes +middlename : j +year_range : (1994, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2013, 'university of california san francisco'),) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : bronner +middlename : eric +year_range : (1993,) +main_us_institutions_year : ((1993, 'rutgers university'),) +all_us_institutions_year : ((1993, 'rutgers university'),) + +firstname : stephen +lastname : brown +middlename : h m +year_range : (1984, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2020, 'university of minnesota'),) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : selvana +middlename : panneer +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of arkansas'),) +all_us_institutions_year : ((2011, 'university of arkansas'),) + +firstname : r +lastname : selvam +middlename : panneer +year_range : (1990, 2020) +main_us_institutions_year : ((1990, 'university of arkansas'), (1992, 'university of arkansas'), (1993, 'university of arkansas'), (1996, 'university of arkansas'), (1997, 'university of arkansas'), (1998, 'university of arkansas'), (2000, 'university of arkansas'), (2002, 'university of arkansas'), (2004, 'university of arkansas'), (2005, 'university of arkansas'), (2006, 'university of arkansas'), (2007, 'university of arkansas'), (2009, 'university of arkansas'), (2013, 'university of arkansas'), (2014, 'university of arkansas'), (2015, 'university of arkansas'), (2016, 'university of arkansas'), (2019, 'university of arkansas'), (2020, 'university of arkansas')) +all_us_institutions_year : ((1990, 'university of arkansas'), (1992, 'university of arkansas'), (1993, 'university of arkansas'), (1996, 'university of arkansas'), (1997, 'university of arkansas'), (1998, 'university of arkansas'), (2000, 'university of arkansas'), (2002, 'university of arkansas'), (2004, 'university of arkansas'), (2005, 'university of arkansas'), (2006, 'university of arkansas'), (2007, 'university of arkansas'), (2008, 'university of arkansas'), (2009, 'university of arkansas'), (2010, 'university of arkansas'), (2011, 'university of arkansas'), (2013, 'university of arkansas'), (2014, 'university of arkansas'), (2015, 'university of arkansas'), (2016, 'university of arkansas'), (2018, 'university of arkansas'), (2019, 'university of arkansas'), (2020, 'university of arkansas'), (2021, 'university of arkansas')) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : xiaobo +lastname : hu +middlename : sharon +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of notre dame'),) +all_us_institutions_year : ((2007, 'university of notre dame'),) + +firstname : x +lastname : huo +middlename : sharon +year_range : (2005, 2008) +main_us_institutions_year : ((2005, 'tennessee department of transportation'), (2005, 'tennessee technological university'), (2006, 'tennessee technological university'), (2008, 'tennessee technological university')) +all_us_institutions_year : ((2005, 'tennessee department of transportation'), (2005, 'tennessee technological university'), (2006, 'tennessee department of transportation'), (2006, 'tennessee technological university'), (2008, 'federal highway administration'), (2008, 'tennessee technological university')) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : varim +middlename : nazmul +year_range : (2005,) +main_us_institutions_year : ((2005, 'colorado state university'),) +all_us_institutions_year : ((2005, 'colorado state university'),) + +firstname : m +lastname : karim +middlename : nazmul +year_range : (1981, 2020) +main_us_institutions_year : ((1981, 'colorado state university'), (1982, 'colorado state university'), (1983, 'colorado state university'), (1987, 'colorado state university'), (1988, 'colorado state university'), (1989, 'colorado state university'), (1990, 'colorado state university'), (1992, 'colorado state university'), (1994, 'colorado state university'), (1996, 'colorado state university'), (1997, 'colorado state university'), (1998, 'colorado state university'), (1999, 'colorado state university'), (2001, 'colorado state university'), (2002, 'colorado state university'), (2003, 'colorado state university'), (2004, 'colorado state university'), (2005, 'texas tech university'), (2006, 'texas tech university'), (2007, 'texas tech university'), (2008, 'texas tech university'), (2009, 'texas tech university'), (2011, 'texas tech university'), (2012, 'texas tech university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2015, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) +all_us_institutions_year : ((1981, 'colorado state university'), (1982, 'colorado state university'), (1983, 'colorado state university'), (1987, 'colorado state university'), (1988, 'colorado state university'), (1989, 'colorado state university'), (1990, 'colorado state university'), (1992, 'colorado state university'), (1994, 'colorado state university'), (1996, 'colorado state university'), (1997, 'colorado state university'), (1998, 'colorado state university'), (1999, 'colorado state university'), (2001, 'colorado state university'), (2002, 'colorado state university'), (2003, 'colorado state university'), (2004, 'colorado state university'), (2005, 'texas tech university'), (2006, 'texas tech university'), (2007, 'texas tech university'), (2008, 'colorado state university'), (2008, 'texas tech university'), (2009, 'texas tech university'), (2010, 'texas tech university'), (2011, 'texas tech university'), (2012, 'texas tech university'), (2013, 'texas a m university'), (2013, 'texas tech university'), (2014, 'texas a m university'), (2015, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) + +2/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hao +lastname : yan +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'arizona state university'),) +all_us_institutions_year : ((2014, 'arizona state university'),) + +firstname : hao +lastname : wang +middlename : None +year_range : (1999, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'argonne national laboratory'), (2016, 'argonne national laboratory'), (2017, 'argonne national laboratory'), (2019, 'argonne national laboratory'), (2020, 'argonne national laboratory'), (2021, 'argonne national laboratory')) + +3/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : albert +lastname : yee +middlename : f +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of michigan'),) +all_us_institutions_year : ((2000, 'university of michigan'),) + +firstname : albert +lastname : lee +middlename : None +year_range : (1997, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'indiana university'), (2011, 'indiana university'), (2011, 'new york university'), (2012, 'johns hopkins university'), (2012, 'johns hopkins university school of medicine'), (2013, 'indiana university'), (2013, 'johns hopkins university school of medicine'), (2014, 'indiana university'), (2014, 'johns hopkins university school of medicine'), (2015, 'indiana university'), (2015, 'johns hopkins university'), (2016, 'johns hopkins university school of medicine'), (2018, 'indiana university'), (2020, 'indiana university'), (2021, 'johns hopkins university school of medicine')) + +3/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : linda +lastname : katehi +middlename : p b +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of michigan'),) +all_us_institutions_year : ((2005, 'university of michigan'),) + +firstname : l +lastname : katchi +middlename : p b +year_range : (1995, 2002) +main_us_institutions_year : ((1995, 'university of michigan'), (2002, 'university of michigan')) +all_us_institutions_year : ((1995, 'university of michigan'), (2002, 'university of michigan')) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gabriel +lastname : lopez +middlename : p +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of new mexico'),) +all_us_institutions_year : ((2004, 'university of new mexico'),) + +firstname : gabriel +lastname : lopezberestein +middlename : None +year_range : (1983, 1997) +main_us_institutions_year : ((1983, 'university of texas system'), (1984, 'university of texas at austin'), (1985, 'university of texas md anderson cancer center'), (1985, 'university of texas system'), (1986, 'university of texas system'), (1988, 'university of texas at austin'), (1995, 'university of texas md anderson cancer center')) +all_us_institutions_year : ((1983, 'university of texas system'), (1984, 'university of texas at austin'), (1984, 'university of texas system'), (1985, 'university of texas md anderson cancer center'), (1985, 'university of texas system'), (1986, 'university of texas system'), (1987, 'university of texas system'), (1988, 'university of texas at austin'), (1988, 'university of texas system'), (1989, 'university of texas system'), (1991, 'university of texas system'), (1992, 'university of texas system'), (1995, 'university of texas md anderson cancer center'), (1996, 'university of texas system'), (1997, 'university of texas system'), (2001, 'university of texas system'), (2003, 'university of texas system'), (2006, 'university of texas system'), (2007, 'university of texas system'), (2008, 'university of texas system'), (2012, 'university of texas system'), (2016, 'university of texas system')) + +3/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chidchanok +lastname : lursinsap +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of southwestern louisiana'),) +all_us_institutions_year : ((1995, 'university of southwestern louisiana'),) + +firstname : c +lastname : lursinsap +middlename : None +year_range : (1987, 1989) +main_us_institutions_year : ((1987, 'sewanee the university of the south'), (1989, 'sewanee the university of the south')) +all_us_institutions_year : ((1987, 'sewanee the university of the south'), (1989, 'sewanee the university of the south')) + +3/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ali +lastname : koc +middlename : bulent +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of missouri columbia'),) +all_us_institutions_year : ((2014, 'university of missouri columbia'),) + +firstname : a +lastname : koc +middlename : bulent +year_range : (2007, 2020) +main_us_institutions_year : ((2020, 'clemson university'),) +all_us_institutions_year : ((2020, 'clemson university'),) + +3/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : tranter +middlename : h +year_range : (2003,) +main_us_institutions_year : ((2003, 'virginia tech'),) +all_us_institutions_year : ((2003, 'virginia tech'),) + +firstname : w +lastname : tranter +middlename : h +year_range : (1986, 2013) +main_us_institutions_year : ((1986, 'missouri university of science and technology'), (1991, 'missouri university of science and technology'), (1996, 'missouri university of science and technology')) +all_us_institutions_year : ((1986, 'missouri university of science and technology'), (1991, 'missouri university of science and technology'), (1996, 'missouri university of science and technology')) + +4/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ws +lastname : ho +middlename : winston +year_range : (2010,) +main_us_institutions_year : ((2010, 'ohio state university'),) +all_us_institutions_year : ((2010, 'ohio state university'),) + +firstname : w +lastname : ho +middlename : s winston +year_range : (1972, 2021) +main_us_institutions_year : ((1983, 'exxonmobil'), (1986, 'exxonmobil'), (1987, 'exxonmobil'), (1988, 'exxonmobil'), (1994, 'exxonmobil'), (1995, 'exxonmobil'), (2001, 'university of kentucky'), (2003, 'ohio state university'), (2005, 'ohio state university'), (2006, 'ohio state university'), (2007, 'ohio state university'), (2008, 'ohio state university'), (2009, 'ohio state university'), (2010, 'ohio state university'), (2011, 'ohio state university'), (2012, 'ohio state university'), (2013, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university'), (2016, 'ohio state university'), (2017, 'ohio state university'), (2018, 'ohio state university'), (2019, 'ohio state university'), (2020, 'ohio state university'), (2021, 'ohio state university')) +all_us_institutions_year : ((1982, 'exxonmobil'), (1983, 'exxonmobil'), (1984, 'exxonmobil'), (1986, 'exxonmobil'), (1987, 'exxonmobil'), (1988, 'exxonmobil'), (1989, 'exxonmobil'), (1990, 'exxonmobil'), (1991, 'exxonmobil'), (1992, 'exxonmobil'), (1993, 'exxonmobil'), (1994, 'exxonmobil'), (1995, 'exxonmobil'), (1996, 'exxonmobil'), (1998, 'exxonmobil'), (1999, 'exxonmobil'), (2001, 'university of kentucky'), (2002, 'university of kentucky'), (2003, 'ohio state university'), (2004, 'ohio state university'), (2005, 'ohio state university'), (2006, 'ohio state university'), (2007, 'ohio state university'), (2008, 'ohio state university'), (2008, 'ohio university'), (2009, 'ohio state university'), (2010, 'ohio state university'), (2011, 'momentive'), (2011, 'ohio state university'), (2012, 'ohio state university'), (2013, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university'), (2016, 'ohio state university'), (2017, 'ohio state university'), (2018, 'ohio state university'), (2019, 'ohio state university'), (2020, 'ohio state university'), (2021, 'ohio state university')) + +5/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yong +lastname : liu +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'polytechnic institute of new york university'),) +all_us_institutions_year : ((2013, 'polytechnic institute of new york university'),) + +firstname : yonggang +lastname : liu +middlename : None +year_range : (2009, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2011, 'university of michigan'),) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : sherman +middlename : h +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of minnesota'),) +all_us_institutions_year : ((2005, 'university of minnesota'),) + +firstname : david +lastname : sherman +middlename : james +year_range : (1991, 2018) +main_us_institutions_year : ((1991, 'university of chicago'),) +all_us_institutions_year : ((1991, 'university of chicago'), (1994, 'university of chicago')) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ali +lastname : nayfeh +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'virginia tech'),) +all_us_institutions_year : ((2003, 'virginia tech'),) + +firstname : a +lastname : nayfeh +middlename : h +year_range : (1973, 2013) +main_us_institutions_year : ((1973, 'virginia tech'), (1974, 'virginia tech'), (1975, 'virginia tech'), (1976, 'virginia tech'), (1977, 'virginia tech'), (1978, 'virginia tech'), (1979, 'virginia tech'), (1980, 'virginia tech'), (1981, 'virginia tech'), (1982, 'virginia tech'), (1983, 'virginia tech'), (1984, 'virginia tech'), (1985, 'virginia tech'), (1986, 'virginia tech'), (1987, 'virginia tech'), (1988, 'virginia tech'), (1989, 'virginia tech'), (1990, 'virginia tech'), (1991, 'virginia tech'), (1992, 'virginia tech'), (1993, 'virginia tech'), (1994, 'virginia tech'), (1995, 'virginia tech'), (1996, 'virginia tech'), (1997, 'virginia tech'), (1998, 'virginia tech'), (1999, 'virginia tech'), (2000, 'virginia tech'), (2001, 'virginia tech'), (2002, 'virginia tech'), (2003, 'virginia tech'), (2004, 'virginia tech'), (2005, 'virginia tech'), (2006, 'virginia tech'), (2009, 'virginia tech'), (2010, 'virginia tech'), (2012, 'virginia tech'), (2013, 'virginia tech')) +all_us_institutions_year : ((1973, 'virginia tech'), (1974, 'virginia tech'), (1975, 'virginia tech'), (1976, 'virginia tech'), (1977, 'virginia tech'), (1978, 'virginia tech'), (1979, 'virginia tech'), (1980, 'virginia tech'), (1981, 'virginia tech'), (1982, 'virginia tech'), (1983, 'virginia tech'), (1984, 'virginia tech'), (1985, 'virginia tech'), (1986, 'virginia tech'), (1987, 'virginia tech'), (1988, 'virginia tech'), (1989, 'virginia tech'), (1990, 'virginia tech'), (1991, 'virginia tech'), (1992, 'virginia tech'), (1993, 'virginia tech'), (1994, 'virginia tech'), (1995, 'virginia tech'), (1996, 'virginia tech'), (1997, 'virginia tech'), (1998, 'virginia tech'), (1999, 'virginia tech'), (2000, 'virginia tech'), (2001, 'virginia tech'), (2002, 'virginia tech'), (2003, 'virginia tech'), (2004, 'virginia tech'), (2005, 'virginia tech'), (2006, 'virginia tech'), (2007, 'virginia tech'), (2008, 'virginia tech'), (2009, 'virginia tech'), (2010, 'virginia tech'), (2012, 'virginia tech'), (2013, 'virginia tech')) + +6/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : so +middlename : m c +year_range : (2003,) +main_us_institutions_year : ((2003, 'polytechnic university'),) +all_us_institutions_year : ((2003, 'polytechnic university'),) + +firstname : joao +lastname : sousa +middlename : m c +year_range : (1994, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'massachusetts institute of technology'), (2012, 'massachusetts institute of technology')) + +7/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ronald +lastname : so +middlename : m c +year_range : (1993,) +main_us_institutions_year : ((1993, 'arizona state university'),) +all_us_institutions_year : ((1993, 'arizona state university'),) + +firstname : joao +lastname : sousa +middlename : m c +year_range : (1994, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'massachusetts institute of technology'), (2012, 'massachusetts institute of technology')) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : anderson +middlename : v +year_range : (2012,) +main_us_institutions_year : ((2012, 'georgia institute of technology'),) +all_us_institutions_year : ((2012, 'georgia institute of technology'),) + +firstname : david +lastname : anderson +middlename : None +year_range : (1990, 2021) +main_us_institutions_year : ((2005, 'franklin w olin college of engineering'),) +all_us_institutions_year : ((2005, 'franklin w olin college of engineering'),) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : harley +lastname : cudney +middlename : h +year_range : (1996,) +main_us_institutions_year : ((1996, 'virginia tech'),) +all_us_institutions_year : ((1996, 'virginia tech'),) + +firstname : harley +lastname : cudney +middlename : None +year_range : (2007, 2019) +main_us_institutions_year : ((2016, 'engineer research and development center'), (2017, 'engineer research and development center'), (2018, 'engineer research and development center'), (2019, 'engineer research and development center')) +all_us_institutions_year : ((2012, 'engineer research and development center'), (2016, 'engineer research and development center'), (2017, 'engineer research and development center'), (2018, 'engineer research and development center'), (2019, 'engineer research and development center')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gerald +lastname : heydt +middlename : t +year_range : (2006,) +main_us_institutions_year : ((2006, 'arizona state university'),) +all_us_institutions_year : ((2006, 'arizona state university'),) + +firstname : g +lastname : heydt +middlename : t +year_range : (1989, 1990) +main_us_institutions_year : ((1989, 'purdue university'), (1990, 'purdue university')) +all_us_institutions_year : ((1989, 'purdue university'), (1990, 'purdue university')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : c +lastname : johnson +middlename : r +year_range : (2000,) +main_us_institutions_year : ((2000, 'cornell university'),) +all_us_institutions_year : ((2000, 'cornell university'),) + +firstname : c +lastname : johnson +middlename : anderson +year_range : (1979, 2020) +main_us_institutions_year : ((1979, 'university of southern california'), (1982, 'university of southern california'), (1983, 'university of southern california'), (1984, 'university of southern california'), (1986, 'university of southern california'), (1987, 'university of southern california'), (1988, 'university of southern california'), (1989, 'university of southern california'), (1990, 'university of southern california'), (1994, 'university of southern california'), (1995, 'university of southern california'), (1996, 'university of southern california'), (1997, 'university of southern california'), (1999, 'university of southern california'), (2000, 'university of southern california'), (2001, 'university of southern california'), (2002, 'university of southern california'), (2003, 'university of southern california'), (2004, 'university of southern california'), (2005, 'university of southern california'), (2006, 'university of southern california'), (2007, 'university of southern california'), (2008, 'university of southern california'), (2009, 'university of southern california'), (2010, 'claremont graduate university'), (2011, 'claremont graduate university'), (2012, 'claremont graduate university'), (2013, 'claremont graduate university'), (2015, 'claremont graduate university'), (2017, 'claremont graduate university'), (2020, 'claremont graduate university')) +all_us_institutions_year : ((1979, 'university of southern california'), (1982, 'university of southern california'), (1983, 'university of southern california'), (1984, 'university of southern california'), (1986, 'university of southern california'), (1987, 'national institutes of health'), (1987, 'university of southern california'), (1988, 'university of southern california'), (1989, 'university of illinois at chicago'), (1989, 'university of southern california'), (1990, 'university of southern california'), (1994, 'university of southern california'), (1995, 'university of southern california'), (1996, 'university of southern california'), (1997, 'university of southern california'), (1999, 'university of southern california'), (2000, 'university of southern california'), (2001, 'university of southern california'), (2002, 'university of southern california'), (2003, 'university of southern california'), (2004, 'university of southern california'), (2005, 'university of southern california'), (2006, 'university of southern california'), (2007, 'university of southern california'), (2008, 'university of southern california'), (2009, 'university of southern california'), (2010, 'claremont graduate university'), (2011, 'claremont graduate university'), (2012, 'claremont graduate university'), (2013, 'claremont graduate university'), (2015, 'claremont graduate university'), (2017, 'claremont graduate university'), (2020, 'claremont graduate university')) + +7/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : pillai +middlename : unnikrishna +year_range : (1992,) +main_us_institutions_year : ((1992, 'polytechnic university'),) +all_us_institutions_year : ((1992, 'polytechnic university'),) + +firstname : s +lastname : pillai +middlename : unnikrishna +year_range : (1989, 2014) +main_us_institutions_year : ((1989, 'new york university'), (1990, 'new york university'), (1994, 'new york university'), (1995, 'new york university'), (2004, 'new york university'), (2006, 'new york university'), (2009, 'new york university'), (2010, 'new york university'), (2011, 'new york university'), (2012, 'new york university'), (2014, 'new york university')) +all_us_institutions_year : ((1989, 'new york university'), (1990, 'new york university'), (1993, 'new york university'), (1994, 'new york university'), (1995, 'new york university'), (2004, 'new york university'), (2006, 'new york university'), (2009, 'new york university'), (2010, 'new york university'), (2011, 'new york university'), (2012, 'new york university'), (2014, 'new york university')) + +7/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gustavious +lastname : williams +middlename : p +year_range : (2012,) +main_us_institutions_year : ((2012, 'brigham young university'),) +all_us_institutions_year : ((2012, 'brigham young university'),) + +firstname : gus +lastname : williams +middlename : None +year_range : (2009, 2013) +main_us_institutions_year : ((2009, 'brigham young university'), (2011, 'brigham young university'), (2012, 'brigham young university'), (2013, 'brigham young university')) +all_us_institutions_year : ((2009, 'brigham young university'), (2011, 'brigham young university'), (2012, 'brigham young university'), (2013, 'brigham young university')) + +7/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : raj +lastname : mutharasan +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'drexel university'),) +all_us_institutions_year : ((2010, 'drexel university'),) + +firstname : rajakkannu +lastname : mutharasan +middlename : None +year_range : (1990, 1994) +main_us_institutions_year : ((1990, 'drexel university'), (1992, 'drexel university'), (1993, 'drexel university'), (1994, 'drexel university')) +all_us_institutions_year : ((1990, 'drexel university'), (1992, 'drexel university'), (1993, 'drexel university'), (1994, 'drexel university')) + +8/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : williams +middlename : l +year_range : (2010,) +main_us_institutions_year : ((2010, 'ohio university'),) +all_us_institutions_year : ((2010, 'ohio university'),) + +firstname : r +lastname : williams +middlename : l +year_range : (1987, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((1988, 'university of california davis'), (2011, 'vision sciences inc')) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : adamczyk +middlename : j +year_range : (2009,) +main_us_institutions_year : ((2009, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2009, 'massachusetts institute of technology'),) + +firstname : john +lastname : adamczyk +middlename : j +year_range : (1978, 2008) +main_us_institutions_year : ((1978, 'glenn research center'), (1985, 'glenn research center'), (1986, 'glenn research center'), (1989, 'glenn research center'), (1990, 'glenn research center'), (1992, 'glenn research center'), (1993, 'glenn research center'), (1996, 'glenn research center'), (1997, 'glenn research center'), (1999, 'glenn research center'), (2000, 'glenn research center'), (2002, 'glenn research center'), (2005, 'glenn research center'), (2008, 'glenn research center')) +all_us_institutions_year : ((1978, 'glenn research center'), (1985, 'glenn research center'), (1986, 'glenn research center'), (1987, 'glenn research center'), (1989, 'glenn research center'), (1990, 'glenn research center'), (1991, 'glenn research center'), (1992, 'glenn research center'), (1993, 'glenn research center'), (1994, 'glenn research center'), (1995, 'glenn research center'), (1996, 'glenn research center'), (1997, 'glenn research center'), (1998, 'glenn research center'), (1999, 'glenn research center'), (2000, 'glenn research center'), (2001, 'glenn research center'), (2002, 'glenn research center'), (2004, 'glenn research center'), (2005, 'glenn research center'), (2006, 'glenn research center'), (2007, 'glenn research center'), (2008, 'glenn research center'), (2012, 'glenn research center')) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : frank +lastname : lucia +middlename : c de +year_range : (1995,) +main_us_institutions_year : ((1995, 'duke university'),) +all_us_institutions_year : ((1995, 'duke university'),) + +firstname : frank +lastname : lucia +middlename : c de +year_range : (2003, 2020) +main_us_institutions_year : ((2003, 'research triangle park'), (2003, 'united states army research laboratory'), (2005, 'united states army research laboratory'), (2006, 'united states army research laboratory'), (2007, 'united states army research laboratory'), (2008, 'united states army research laboratory'), (2009, 'united states army research laboratory'), (2010, 'united states army research laboratory'), (2011, 'united states army research laboratory'), (2012, 'united states army research laboratory'), (2013, 'united states army research laboratory'), (2014, 'united states army research laboratory'), (2015, 'united states army research laboratory'), (2017, 'united states army research laboratory'), (2020, 'united states army research laboratory')) +all_us_institutions_year : ((2003, 'research triangle park'), (2003, 'united states army research laboratory'), (2004, 'united states army research laboratory'), (2005, 'united states army research laboratory'), (2006, 'united states army research laboratory'), (2007, 'united states army research laboratory'), (2008, 'united states army research laboratory'), (2009, 'united states army research laboratory'), (2010, 'united states army research laboratory'), (2011, 'united states army research laboratory'), (2012, 'united states army research laboratory'), (2013, 'united states army research laboratory'), (2014, 'united states army research laboratory'), (2015, 'united states army research laboratory'), (2017, 'united states army research laboratory'), (2019, 'united states army research laboratory'), (2020, 'united states army research laboratory')) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : c +lastname : walton +middlename : michael +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of texas at austin'),) +all_us_institutions_year : ((1996, 'university of texas at austin'),) + +firstname : c +lastname : walton +middlename : w +year_range : (1978, 1992) +main_us_institutions_year : ((1978, 'texas a m university'), (1982, 'texas a m university'), (1987, 'university of south carolina')) +all_us_institutions_year : ((1978, 'texas a m university'), (1982, 'texas a m university'), (1987, 'university of south carolina')) + +8/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : wu +middlename : felix +year_range : (1999,) +main_us_institutions_year : ((1999, 'north carolina state university'),) +all_us_institutions_year : ((1999, 'north carolina state university'),) + +firstname : s +lastname : wu +middlename : felix +year_range : (2013, 2019) +main_us_institutions_year : ((2013, 'university of california davis'), (2015, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis')) +all_us_institutions_year : ((2013, 'university of california davis'), (2015, 'university of california davis'), (2017, 'university of california'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis')) + +8/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : zanten +middlename : h van +year_range : (2001,) +main_us_institutions_year : ((2001, 'johns hopkins university'),) +all_us_institutions_year : ((2001, 'johns hopkins university'),) + +firstname : john +lastname : zanten +middlename : h van +year_range : (1994, 2017) +main_us_institutions_year : ((2000, 'north carolina state university'), (2001, 'north carolina state university'), (2002, 'north carolina state university'), (2003, 'north carolina state university'), (2004, 'north carolina state university'), (2006, 'north carolina state university'), (2007, 'north carolina state university'), (2009, 'north carolina state university'), (2010, 'north carolina state university'), (2015, 'north carolina state university'), (2016, 'north carolina state university'), (2017, 'north carolina state university')) +all_us_institutions_year : ((2000, 'north carolina state university'), (2001, 'north carolina state university'), (2002, 'north carolina state university'), (2003, 'north carolina state university'), (2004, 'north carolina state university'), (2006, 'north carolina state university'), (2007, 'north carolina state university'), (2009, 'north carolina state university'), (2010, 'north carolina state university'), (2015, 'north carolina state university'), (2016, 'north carolina state university'), (2017, 'north carolina state university')) + +8/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nh +lastname : wang +middlename : linda +year_range : (1994,) +main_us_institutions_year : ((1994, 'purdue university'),) +all_us_institutions_year : ((1994, 'purdue university'),) + +firstname : nienhwa +lastname : wang +middlename : linda +year_range : (1985, 2021) +main_us_institutions_year : ((1985, 'purdue university'), (1997, 'purdue university'), (1999, 'purdue university'), (2003, 'purdue university'), (2004, 'purdue university'), (2005, 'purdue university'), (2006, 'purdue university'), (2008, 'purdue university'), (2009, 'purdue university'), (2010, 'purdue university'), (2013, 'purdue university'), (2014, 'purdue university'), (2015, 'purdue university'), (2016, 'purdue university'), (2017, 'purdue university'), (2018, 'purdue university'), (2019, 'purdue university'), (2020, 'purdue university'), (2021, 'purdue university')) +all_us_institutions_year : ((1985, 'purdue university'), (1997, 'purdue university'), (1999, 'purdue university'), (2003, 'purdue university'), (2004, 'purdue university'), (2005, 'purdue university'), (2006, 'purdue university'), (2008, 'purdue university'), (2009, 'purdue university'), (2010, 'purdue university'), (2013, 'purdue university'), (2014, 'purdue university'), (2015, 'purdue university'), (2016, 'purdue university'), (2017, 'purdue university'), (2018, 'purdue university'), (2019, 'purdue university'), (2020, 'purdue university'), (2021, 'purdue university')) + +8/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : didem +lastname : ozevin +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of illinois chicago'),) +all_us_institutions_year : ((2014, 'university of illinois chicago'),) + +firstname : didem +lastname : ozevin +middlename : None +year_range : (2003, 2021) +main_us_institutions_year : ((2005, 'lehigh university'), (2006, 'princeton university'), (2008, 'university of illinois at chicago'), (2010, 'university of illinois at chicago'), (2011, 'university of illinois at chicago'), (2012, 'university of illinois at chicago'), (2013, 'university of illinois at chicago'), (2014, 'university of illinois at chicago'), (2015, 'university of illinois at chicago'), (2016, 'university of illinois at chicago'), (2017, 'university of illinois at chicago'), (2018, 'university of illinois at chicago'), (2019, 'university of illinois at chicago'), (2020, 'university of illinois at chicago'), (2021, 'university of illinois at chicago')) +all_us_institutions_year : ((2003, 'lehigh university'), (2004, 'lehigh university'), (2005, 'lehigh university'), (2006, 'lehigh university'), (2006, 'princeton university'), (2008, 'university of illinois at chicago'), (2009, 'university of illinois at chicago'), (2010, 'university of illinois at chicago'), (2011, 'university of illinois at chicago'), (2012, 'university of illinois at chicago'), (2012, 'university of illinois at urbana champaign'), (2013, 'university of illinois at chicago'), (2014, 'university of illinois at chicago'), (2014, 'university of illinois at urbana champaign'), (2015, 'university of illinois at chicago'), (2015, 'university of illinois at urbana champaign'), (2016, 'university of illinois at chicago'), (2016, 'university of illinois at urbana champaign'), (2017, 'university of illinois at chicago'), (2018, 'university of illinois at chicago'), (2019, 'university of illinois at chicago'), (2020, 'rush university medical center'), (2020, 'university of illinois at chicago'), (2021, 'university of illinois at chicago')) + +9/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sizhao +lastname : qin +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of texas at austin'),) +all_us_institutions_year : ((2008, 'university of texas at austin'),) + +firstname : s +lastname : qin +middlename : joe +year_range : (1996, 2020) +main_us_institutions_year : ((1996, 'university of texas at austin'), (1997, 'university of texas at austin'), (1998, 'university of texas at austin'), (1999, 'university of texas at austin'), (2000, 'university of texas at austin'), (2001, 'university of texas at austin'), (2002, 'university of texas at austin'), (2003, 'university of texas at austin'), (2004, 'university of texas at austin'), (2005, 'university of texas at austin'), (2006, 'university of texas at austin'), (2007, 'university of texas at austin'), (2008, 'university of southern california'), (2009, 'university of southern california'), (2010, 'university of southern california'), (2011, 'university of southern california'), (2012, 'university of southern california'), (2013, 'university of southern california'), (2014, 'university of southern california'), (2015, 'university of southern california'), (2016, 'university of southern california'), (2017, 'university of southern california'), (2018, 'university of southern california'), (2019, 'university of southern california')) +all_us_institutions_year : ((1996, 'university of texas at austin'), (1997, 'university of texas at austin'), (1998, 'university of texas at austin'), (1999, 'university of texas at austin'), (2000, 'university of texas at austin'), (2001, 'university of texas at austin'), (2002, 'university of texas at austin'), (2003, 'university of texas at austin'), (2004, 'advanced micro devices'), (2004, 'university of texas at austin'), (2005, 'university of texas at austin'), (2006, 'university of texas at austin'), (2007, 'university of texas at austin'), (2008, 'university of southern california'), (2008, 'university of texas at austin'), (2009, 'university of southern california'), (2009, 'university of texas at austin'), (2010, 'university of southern california'), (2011, 'university of southern california'), (2012, 'university of southern california'), (2012, 'university of texas at austin'), (2013, 'university of southern california'), (2014, 'university of southern california'), (2015, 'university of southern california'), (2016, 'university of southern california'), (2017, 'university of southern california'), (2018, 'university of southern california'), (2019, 'university of southern california'), (2020, 'university of southern california')) + +10/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : walsh +middlename : j +year_range : (1991,) +main_us_institutions_year : ((1991, 'university of south florida'),) +all_us_institutions_year : ((1991, 'university of south florida'),) + +firstname : j +lastname : walsh +middlename : j +year_range : (1983, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2001, 'analysis group'), (2003, 'analysis group'), (2012, 'schlumberger'), (2015, 'analysis group'), (2016, 'analysis group'), (2017, 'analysis group'), (2020, 'analysis group')) + +11/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : green +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2014, 'massachusetts institute of technology'),) + +firstname : j +lastname : green +middlename : b +year_range : (1984, 1988) +main_us_institutions_year : ((1988, 'massachusetts institute of technology'),) +all_us_institutions_year : ((1988, 'massachusetts institute of technology'),) + +11/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : shihchun +lastname : chang +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'george mason university'),) +all_us_institutions_year : ((2002, 'george mason university'),) + +firstname : chihchun +lastname : chang +middlename : None +year_range : (2007, 2008) +main_us_institutions_year : ((2007, 'george washington university'), (2008, 'george washington university')) +all_us_institutions_year : ((2007, 'george washington university'), (2008, 'george washington university')) + +11/10 positive, 21/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : anderson +middlename : m m +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of florida'),) +all_us_institutions_year : ((1998, 'university of florida'),) + +firstname : john +lastname : anderson +middlename : m m +year_range : (2008, 2019) +main_us_institutions_year : ((2008, 'howard university'), (2013, 'howard university'), (2014, 'howard university'), (2015, 'howard university'), (2016, 'howard university'), (2017, 'howard university'), (2018, 'howard university'), (2019, 'howard university')) +all_us_institutions_year : ((2008, 'howard university'), (2012, 'howard university'), (2013, 'howard university'), (2014, 'howard university'), (2015, 'howard university'), (2016, 'howard university'), (2017, 'howard university'), (2018, 'howard university'), (2019, 'howard university')) + +11/10 positive, 22/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : g +lastname : peterson +middlename : p bud +year_range : (2006,) +main_us_institutions_year : ((2006, 'rensselaer polytechnic institute'),) +all_us_institutions_year : ((2006, 'rensselaer polytechnic institute'),) + +firstname : g +lastname : peterson +middlename : p bud +year_range : (2007, 2017) +main_us_institutions_year : ((2007, 'university of colorado boulder'), (2010, 'georgia institute of technology'), (2014, 'georgia institute of technology'), (2017, 'georgia institute of technology')) +all_us_institutions_year : ((1994, 'georgia institute of technology'), (2007, 'university of colorado boulder'), (2010, 'georgia institute of technology'), (2014, 'georgia institute of technology'), (2017, 'georgia institute of technology')) + +11/10 positive, 23/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 1655.348048945268 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_environmental science_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_environmental science_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..9678b4c --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_environmental science_christoph_degree0_advisors_9015.log @@ -0,0 +1,959 @@ +Namespace(testing=False, verbose=1, field=['environmental science'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [39432304] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0005051374435424804 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 129.65330580472946 minutes + +Starting active labeling... +firstname : j +lastname : boyd +middlename : p +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of michigan'),) +all_us_institutions_year : ((1996, 'university of michigan'),) + +firstname : carol +lastname : boyd +middlename : j +year_range : (1983, 2021) +main_us_institutions_year : ((1990, 'university of michigan'), (1993, 'university of michigan'), (1994, 'university of michigan'), (1995, 'university of washington'), (1996, 'university of michigan'), (1997, 'university of michigan'), (1998, 'university of michigan'), (1999, 'university of michigan'), (2000, 'university of michigan'), (2001, 'university of michigan'), (2002, 'university of michigan'), (2003, 'university of michigan'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) +all_us_institutions_year : ((1990, 'university of michigan'), (1993, 'university of michigan'), (1994, 'university of michigan'), (1995, 'university of washington'), (1996, 'university of michigan'), (1997, 'university of michigan'), (1998, 'university of michigan'), (1999, 'university of michigan'), (2000, 'university of michigan'), (2001, 'university of michigan'), (2002, 'university of michigan'), (2003, 'university of michigan'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : david +lastname : lewis +middlename : t +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of nebraska lincoln'),) +all_us_institutions_year : ((1992, 'university of nebraska lincoln'),) + +firstname : a +lastname : lewis +middlename : j +year_range : (1977, 2016) +main_us_institutions_year : ((1977, 'university of nebraska lincoln'), (1980, 'university of nebraska lincoln'), (1981, 'university of nebraska lincoln'), (1982, 'university of nebraska lincoln'), (1983, 'university of nebraska lincoln'), (1984, 'university of nebraska lincoln'), (1985, 'university of nebraska lincoln'), (1986, 'university of nebraska lincoln'), (1987, 'university of nebraska lincoln'), (1989, 'university of nebraska lincoln'), (1990, 'university of nebraska lincoln'), (1991, 'university of nebraska lincoln'), (1992, 'university of nebraska lincoln'), (1993, 'university of nebraska lincoln'), (1994, 'university of nebraska lincoln'), (1995, 'university of nebraska lincoln'), (1997, 'university of nebraska lincoln'), (1998, 'university of nebraska lincoln'), (1999, 'university of nebraska lincoln'), (2000, 'university of nebraska lincoln'), (2001, 'university of nebraska lincoln'), (2002, 'university of nebraska lincoln'), (2003, 'university of nebraska lincoln'), (2004, 'university of nebraska lincoln'), (2010, 'university of nebraska lincoln'), (2013, 'university of nebraska lincoln'), (2016, 'university of nebraska lincoln')) +all_us_institutions_year : ((1977, 'university of nebraska lincoln'), (1980, 'university of nebraska lincoln'), (1981, 'university of nebraska lincoln'), (1982, 'university of nebraska lincoln'), (1983, 'university of nebraska lincoln'), (1984, 'university of nebraska lincoln'), (1985, 'university of nebraska lincoln'), (1986, 'university of nebraska lincoln'), (1987, 'university of nebraska lincoln'), (1989, 'university of nebraska lincoln'), (1990, 'university of nebraska lincoln'), (1991, 'university of nebraska lincoln'), (1992, 'university of nebraska lincoln'), (1993, 'university of nebraska lincoln'), (1994, 'university of nebraska lincoln'), (1995, 'university of nebraska lincoln'), (1997, 'university of nebraska lincoln'), (1998, 'university of nebraska lincoln'), (1999, 'university of nebraska lincoln'), (2000, 'university of nebraska lincoln'), (2001, 'university of nebraska lincoln'), (2002, 'university of nebraska lincoln'), (2003, 'university of nebraska lincoln'), (2004, 'university of nebraska lincoln'), (2010, 'university of nebraska lincoln'), (2013, 'university of nebraska lincoln'), (2016, 'university of nebraska lincoln')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jane +lastname : hill +middlename : h +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of arizona'),) +all_us_institutions_year : ((1995, 'university of arizona'),) + +firstname : r +lastname : hill +middlename : e +year_range : (1995, 2012) +main_us_institutions_year : ((1996, 'university of arizona'), (1997, 'university of arizona'), (1998, 'university of arizona'), (2000, 'university of arizona'), (2001, 'university of arizona'), (2003, 'university of arizona'), (2006, 'university of arizona'), (2009, 'university of arizona'), (2012, 'university of arizona')) +all_us_institutions_year : ((1996, 'university of arizona'), (1997, 'university of arizona'), (1998, 'university of arizona'), (2000, 'university of arizona'), (2001, 'university of arizona'), (2003, 'university of arizona'), (2006, 'university of arizona'), (2009, 'university of arizona'), (2012, 'university of arizona')) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : upmanu +lastname : lall +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'utah state university'),) +all_us_institutions_year : ((1995, 'utah state university'),) + +firstname : upmanu +lastname : lall +middlename : None +year_range : (1981, 2021) +main_us_institutions_year : ((1981, 'university of texas at austin'), (1987, 'temple university'), (1987, 'university of utah'), (1988, 'university of utah'), (1989, 'utah state university'), (1991, 'utah state university'), (1993, 'utah state university'), (1995, 'utah state university'), (1996, 'utah state university'), (1997, 'utah state university'), (1998, 'utah state university'), (1999, 'utah state university'), (2000, 'utah state university'), (2002, 'utah state university'), (2003, 'columbia university'), (2004, 'columbia university'), (2005, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2008, 'columbia university'), (2009, 'columbia university'), (2010, 'columbia university'), (2011, 'columbia university'), (2012, 'columbia university'), (2013, 'columbia university'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2018, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university'), (2021, 'columbia university')) +all_us_institutions_year : ((1981, 'university of texas at austin'), (1987, 'temple university'), (1987, 'university of utah'), (1988, 'university of utah'), (1989, 'utah state university'), (1990, 'pennsylvania state university'), (1991, 'utah state university'), (1993, 'utah state university'), (1994, 'utah state university'), (1995, 'utah state university'), (1996, 'utah state university'), (1997, 'utah state university'), (1998, 'utah state university'), (1999, 'utah state university'), (2000, 'utah state university'), (2002, 'utah state university'), (2003, 'columbia university'), (2003, 'lamont doherty earth observatory'), (2004, 'columbia university'), (2004, 'lamont doherty earth observatory'), (2005, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2008, 'columbia university'), (2009, 'columbia university'), (2010, 'columbia university'), (2011, 'columbia university'), (2012, 'columbia university'), (2013, 'columbia university'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2018, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university'), (2021, 'columbia university')) + +0/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : agnew +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of california los angeles'),) +all_us_institutions_year : ((2003, 'university of california los angeles'),) + +firstname : john +lastname : agnew +middlename : None +year_range : (1978, 2021) +main_us_institutions_year : ((1978, 'syracuse university'), (1979, 'syracuse university'), (1981, 'syracuse university'), (1982, 'syracuse university'), (1983, 'syracuse university'), (1984, 'syracuse university'), (1985, 'syracuse university'), (1987, 'syracuse university'), (1988, 'syracuse university'), (1989, 'syracuse university'), (1990, 'syracuse university'), (1991, 'syracuse university'), (1992, 'syracuse university'), (1993, 'syracuse university'), (1994, 'syracuse university'), (1995, 'syracuse university'), (1996, 'syracuse university'), (1997, 'syracuse university'), (1997, 'university of california los angeles'), (1998, 'university of california los angeles'), (1999, 'university of california los angeles'), (2000, 'university of california los angeles'), (2001, 'university of california los angeles'), (2002, 'university of california los angeles'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2005, 'university of california los angeles'), (2006, 'university of california los angeles'), (2007, 'university of california los angeles'), (2008, 'university of california los angeles'), (2009, 'university of california los angeles'), (2010, 'university of california los angeles'), (2011, 'university of california los angeles'), (2013, 'university of california los angeles'), (2014, 'university of california los angeles'), (2015, 'university of california los angeles'), (2016, 'university of california los angeles'), (2017, 'university of california los angeles'), (2018, 'university of california los angeles'), (2019, 'university of california los angeles'), (2020, 'university of california los angeles')) +all_us_institutions_year : ((1978, 'syracuse university'), (1979, 'syracuse university'), (1981, 'syracuse university'), (1982, 'syracuse university'), (1983, 'syracuse university'), (1984, 'syracuse university'), (1985, 'syracuse university'), (1987, 'syracuse university'), (1988, 'syracuse university'), (1989, 'syracuse university'), (1990, 'syracuse university'), (1991, 'syracuse university'), (1992, 'syracuse university'), (1993, 'syracuse university'), (1994, 'syracuse university'), (1995, 'syracuse university'), (1996, 'syracuse university'), (1996, 'university of california los angeles'), (1997, 'syracuse university'), (1997, 'university of california los angeles'), (1998, 'university of california los angeles'), (1999, 'university of california los angeles'), (2000, 'university of california los angeles'), (2001, 'university of california los angeles'), (2002, 'university of california los angeles'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2005, 'university of california los angeles'), (2006, 'university of california los angeles'), (2007, 'university of california los angeles'), (2008, 'university of california'), (2008, 'university of california los angeles'), (2009, 'university of california los angeles'), (2010, 'university of california los angeles'), (2011, 'university of california los angeles'), (2012, 'university of california los angeles'), (2013, 'university of california'), (2013, 'university of california los angeles'), (2014, 'university of california'), (2014, 'university of california los angeles'), (2015, 'university of california'), (2015, 'university of california los angeles'), (2016, 'university of california'), (2016, 'university of california los angeles'), (2017, 'university of california los angeles'), (2018, 'university of california los angeles'), (2019, 'university of california los angeles'), (2020, 'university of california los angeles'), (2021, 'university of california los angeles')) + +1/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ray +lastname : williamson +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'george washington university'),) +all_us_institutions_year : ((2006, 'george washington university'),) + +firstname : ray +lastname : william +middlename : d +year_range : (1990, 2006) +main_us_institutions_year : ((1993, 'oregon state university'), (1999, 'oregon state university'), (2002, 'oregon state university')) +all_us_institutions_year : ((1993, 'oregon state university'), (1999, 'oregon state university'), (2002, 'oregon state university')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : stevenson +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of massachusetts boston'),) +all_us_institutions_year : ((2005, 'university of massachusetts boston'),) + +firstname : robert +lastname : stevens +middlename : g +year_range : (2002, 2016) +main_us_institutions_year : ((2002, 'washington state university'), (2005, 'washington state university'), (2006, 'washington state university'), (2012, 'washington state university'), (2016, 'washington state university')) +all_us_institutions_year : ((2002, 'washington state university'), (2005, 'washington state university'), (2006, 'washington state university'), (2012, 'washington state university'), (2016, 'washington state university')) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : walker +middlename : todd +year_range : (2014,) +main_us_institutions_year : ((2014, 'cornell university'),) +all_us_institutions_year : ((2014, 'cornell university'),) + +firstname : m +lastname : walter +middlename : todd +year_range : (1999, 2021) +main_us_institutions_year : ((1999, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2019, 'cornell university'), (2020, 'cornell university'), (2021, 'cornell university')) +all_us_institutions_year : ((1999, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2005, 'ithaca college'), (2005, 'washington state university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2018, 'ithaca college'), (2019, 'cornell university'), (2020, 'cornell university'), (2021, 'cornell university')) + +2/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ramani +lastname : narayan +middlename : None +year_range : (1997,) +main_us_institutions_year : ((1997, 'michigan state university'),) +all_us_institutions_year : ((1997, 'michigan state university'),) + +firstname : ram +lastname : narayanan +middlename : m +year_range : (1986, 2021) +main_us_institutions_year : ((1986, 'university of massachusetts amherst'), (1988, 'university of massachusetts amherst'), (1990, 'university of nebraska lincoln'), (1992, 'university of nebraska lincoln'), (1993, 'university of nebraska lincoln'), (1995, 'university of nebraska lincoln'), (1996, 'university of nebraska lincoln'), (1997, 'university of nebraska lincoln'), (1998, 'university of nebraska lincoln'), (1999, 'university of nebraska lincoln'), (2000, 'university of nebraska lincoln'), (2001, 'university of nebraska lincoln'), (2002, 'university of nebraska lincoln'), (2003, 'university of nebraska lincoln'), (2004, 'pennsylvania state university'), (2005, 'pennsylvania state university'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2018, 'pennsylvania state university'), (2019, 'pennsylvania state university'), (2020, 'pennsylvania state university'), (2021, 'pennsylvania state university')) +all_us_institutions_year : ((1986, 'university of massachusetts amherst'), (1988, 'university of massachusetts amherst'), (1990, 'university of nebraska lincoln'), (1992, 'university of nebraska lincoln'), (1993, 'university of nebraska lincoln'), (1995, 'university of nebraska lincoln'), (1996, 'university of nebraska lincoln'), (1997, 'university of nebraska lincoln'), (1998, 'university of nebraska lincoln'), (1999, 'university of nebraska lincoln'), (2000, 'university of nebraska lincoln'), (2001, 'university of nebraska lincoln'), (2002, 'university of nebraska lincoln'), (2003, 'university of kansas'), (2003, 'university of nebraska lincoln'), (2004, 'pennsylvania state university'), (2004, 'university of nebraska lincoln'), (2005, 'pennsylvania state university'), (2005, 'university of nebraska lincoln'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'air force research laboratory'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2018, 'pennsylvania state university'), (2019, 'pennsylvania state university'), (2020, 'pennsylvania state university'), (2020, 'united states department of state'), (2021, 'pennsylvania state university')) + +3/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : miguel +lastname : marino +middlename : a +year_range : (2013,) +main_us_institutions_year : ((2013, 'university of california davis'),) +all_us_institutions_year : ((2013, 'university of california davis'),) + +firstname : m +lastname : moreno +middlename : a +year_range : (1985, 1989) +main_us_institutions_year : ((1985, 'university of california los angeles'),) +all_us_institutions_year : ((1985, 'university of california los angeles'),) + +3/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : stern +middlename : i +year_range : (2007,) +main_us_institutions_year : ((2007, 'rensselaer polytechnic institute'),) +all_us_institutions_year : ((2007, 'rensselaer polytechnic institute'),) + +firstname : david +lastname : sternberg +middlename : None +year_range : (2008, 2019) +main_us_institutions_year : ((2015, 'iowa department of natural resources'),) +all_us_institutions_year : ((2015, 'iowa department of natural resources'),) + +3/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hongbin +lastname : zhan +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'texas a m university college station'),) +all_us_institutions_year : ((2015, 'texas a m university college station'),) + +firstname : hongbin +lastname : zhang +middlename : None +year_range : (2009, 2021) +main_us_institutions_year : ((2009, 'idaho national laboratory'), (2013, 'idaho national laboratory'), (2014, 'idaho national laboratory'), (2015, 'idaho national laboratory'), (2016, 'idaho national laboratory'), (2017, 'idaho national laboratory'), (2018, 'idaho national laboratory'), (2019, 'idaho national laboratory'), (2020, 'idaho national laboratory'), (2021, 'idaho national laboratory')) +all_us_institutions_year : ((2008, 'idaho national laboratory'), (2009, 'idaho national laboratory'), (2010, 'idaho national laboratory'), (2011, 'idaho national laboratory'), (2012, 'idaho national laboratory'), (2013, 'idaho national laboratory'), (2014, 'idaho national laboratory'), (2015, 'idaho national laboratory'), (2016, 'idaho national laboratory'), (2017, 'idaho national laboratory'), (2018, 'idaho national laboratory'), (2019, 'idaho national laboratory'), (2020, 'idaho national laboratory'), (2021, 'idaho national laboratory')) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : smith +middlename : c +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of rhode island'),) +all_us_institutions_year : ((2003, 'university of rhode island'),) + +firstname : david +lastname : smith +middlename : None +year_range : (1862, 2021) +main_us_institutions_year : ((1983, 'ibm'), (1986, 'ibm'), (1991, 'ibm'), (1992, 'andrews university'), (1992, 'ibm'), (1993, 'ibm'), (1993, 'university of florida'), (2019, 'ibm')) +all_us_institutions_year : ((1980, 'ibm'), (1981, 'ibm'), (1982, 'ibm'), (1983, 'ibm'), (1984, 'ibm'), (1985, 'ibm'), (1986, 'ibm'), (1987, 'ibm'), (1988, 'ibm'), (1990, 'ibm'), (1991, 'ibm'), (1992, 'andrews university'), (1992, 'ibm'), (1993, 'ibm'), (1993, 'university of florida'), (1995, 'pennsylvania state university'), (1997, 'university of florida'), (1999, 'goddard space flight center'), (1999, 'ibm'), (1999, 'university of maryland college park'), (2001, 'university of maryland college park'), (2002, 'university of maryland college park'), (2002, 'vanderbilt university'), (2003, 'university of maryland college park'), (2006, 'university of maryland college park'), (2011, 'allegheny general hospital'), (2014, 'zoetis'), (2017, 'broad institute'), (2018, 'ibm'), (2019, 'ibm'), (2020, 'ibm')) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : patrick +lastname : sullivan +middlename : j +year_range : (2013,) +main_us_institutions_year : ((2013, 'cornell university'),) +all_us_institutions_year : ((2013, 'cornell university'),) + +firstname : d +lastname : sullivan +middlename : j +year_range : (1977, 2020) +main_us_institutions_year : None +all_us_institutions_year : ((2003, 'vaughn college of aeronautics and technology'), (2014, 'vaughn college of aeronautics and technology')) + +3/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : e +lastname : adams +middlename : eric +year_range : (1995,) +main_us_institutions_year : ((1995, 'massachusetts institute of technology'),) +all_us_institutions_year : ((1995, 'massachusetts institute of technology'),) + +firstname : e +lastname : adams +middlename : r +year_range : (2006, 2017) +main_us_institutions_year : ((2007, 'massachusetts institute of technology'), (2008, 'massachusetts institute of technology'), (2010, 'massachusetts institute of technology'), (2011, 'smithsonian astrophysical observatory')) +all_us_institutions_year : ((2007, 'massachusetts institute of technology'), (2008, 'massachusetts institute of technology'), (2010, 'massachusetts institute of technology'), (2011, 'smithsonian astrophysical observatory')) + +3/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : birl +lastname : lowery +middlename : None +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of wisconsin madison'),) +all_us_institutions_year : ((1990, 'university of wisconsin madison'),) + +firstname : b +lastname : lowery +middlename : None +year_range : (1984, 2017) +main_us_institutions_year : ((1986, 'university of wisconsin madison'), (1987, 'university of wisconsin madison'), (1988, 'university of wisconsin madison'), (1989, 'university of wisconsin madison'), (1990, 'university of wisconsin madison'), (1991, 'university of wisconsin madison'), (1992, 'university of wisconsin madison'), (1993, 'university of arkansas'), (1994, 'university of wisconsin madison'), (1995, 'university of wisconsin madison'), (1996, 'university of wisconsin madison'), (1997, 'university of wisconsin madison'), (1998, 'university of wisconsin madison'), (2000, 'university of wisconsin madison'), (2001, 'university of wisconsin madison'), (2003, 'university of wisconsin madison'), (2004, 'university of wisconsin madison'), (2005, 'university of wisconsin madison'), (2007, 'university of wisconsin madison'), (2008, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2010, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison'), (2013, 'university of wisconsin madison'), (2014, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison')) +all_us_institutions_year : ((1986, 'university of wisconsin madison'), (1987, 'university of wisconsin madison'), (1988, 'university of wisconsin madison'), (1989, 'university of wisconsin madison'), (1990, 'university of wisconsin madison'), (1991, 'university of wisconsin madison'), (1992, 'university of wisconsin madison'), (1993, 'university of arkansas'), (1994, 'university of wisconsin madison'), (1995, 'university of wisconsin madison'), (1996, 'university of wisconsin madison'), (1997, 'university of wisconsin madison'), (1998, 'university of wisconsin madison'), (2000, 'university of wisconsin madison'), (2001, 'university of wisconsin madison'), (2003, 'university of wisconsin madison'), (2004, 'university of wisconsin madison'), (2005, 'university of wisconsin madison'), (2007, 'university of wisconsin madison'), (2008, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2010, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison'), (2013, 'university of wisconsin madison'), (2014, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison')) + +3/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : samuel +lastname : mozley +middlename : c +year_range : (1992,) +main_us_institutions_year : ((1992, 'north carolina state university'),) +all_us_institutions_year : ((1992, 'north carolina state university'),) + +firstname : samuel +lastname : mozley +middlename : c +year_range : (1977, 1989) +main_us_institutions_year : ((1977, 'university of michigan'), (1978, 'university of michigan'), (1985, 'university of michigan')) +all_us_institutions_year : ((1977, 'university of michigan'), (1978, 'university of michigan'), (1985, 'university of michigan')) + +4/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeffrey +lastname : welker +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'colorado state university'),) +all_us_institutions_year : ((2005, 'colorado state university'),) + +firstname : jeffery +lastname : welker +middlename : m +year_range : (1993, 2014) +main_us_institutions_year : ((1995, 'colorado state university'), (1996, 'colorado state university'), (1997, 'colorado state university'), (2003, 'colorado state university'), (2011, 'university of alaska anchorage'), (2012, 'university of alaska anchorage')) +all_us_institutions_year : ((1995, 'colorado state university'), (1996, 'colorado state university'), (1997, 'colorado state university'), (1997, 'university of wyoming'), (2003, 'colorado state university'), (2011, 'university of alaska anchorage'), (2012, 'university of alaska anchorage')) + +4/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gabriel +lastname : katul +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'duke university'),) +all_us_institutions_year : ((2014, 'duke university'),) + +firstname : gg +lastname : katul +middlename : None +year_range : (2001, 2010) +main_us_institutions_year : ((2001, 'duke university'), (2008, 'duke university'), (2010, 'duke university')) +all_us_institutions_year : ((2001, 'duke university'), (2008, 'duke university'), (2010, 'duke university')) + +5/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : peterson +middlename : e +year_range : (1991,) +main_us_institutions_year : ((1991, 'university of wisconsin madison'),) +all_us_institutions_year : ((1991, 'university of wisconsin madison'),) + +firstname : shanan +lastname : peters +middlename : e +year_range : (1998, 2021) +main_us_institutions_year : ((1998, 'denison university'), (2001, 'university of chicago'), (2002, 'university of chicago'), (2004, 'university of michigan'), (2004, 'university of chicago'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2010, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison'), (2013, 'university of wisconsin madison'), (2014, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison'), (2018, 'university of wisconsin madison'), (2019, 'university of wisconsin madison'), (2020, 'university of wisconsin madison')) +all_us_institutions_year : ((1998, 'denison university'), (2001, 'university of chicago'), (2002, 'university of chicago'), (2004, 'university of chicago'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of chicago'), (2008, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2010, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison'), (2013, 'university of wisconsin madison'), (2014, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison'), (2018, 'university of wisconsin madison'), (2019, 'university of wisconsin madison'), (2020, 'university of wisconsin madison')) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : george +lastname : parks +middlename : a +year_range : (1998,) +main_us_institutions_year : ((1998, 'stanford university'),) +all_us_institutions_year : ((1998, 'stanford university'),) + +firstname : sulgiye +lastname : park +middlename : None +year_range : (2014, 2021) +main_us_institutions_year : ((2014, 'university of michigan'), (2015, 'stanford university'), (2016, 'stanford university'), (2017, 'stanford university'), (2018, 'stanford university'), (2019, 'stanford university'), (2020, 'stanford university'), (2021, 'stanford university')) +all_us_institutions_year : ((2014, 'university of michigan'), (2015, 'stanford university'), (2016, 'stanford university'), (2017, 'stanford university'), (2018, 'stanford university'), (2019, 'stanford university'), (2020, 'slac national accelerator laboratory'), (2020, 'stanford university'), (2021, 'stanford university')) + +6/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christopher +lastname : higgins +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'colorado school of mines'),) +all_us_institutions_year : ((2014, 'colorado school of mines'),) + +firstname : christopher +lastname : higgins +middlename : l +year_range : (2004, 2020) +main_us_institutions_year : ((2004, 'texas tech university'), (2005, 'texas tech university'), (2006, 'tarleton state university'), (2007, 'tarleton state university'), (2008, 'texas tech university'), (2009, 'tarleton state university'), (2010, 'university of connecticut'), (2010, 'tarleton state university'), (2011, 'university of connecticut'), (2011, 'tarleton state university'), (2012, 'tarleton state university'), (2013, 'tarleton state university'), (2014, 'tarleton state university'), (2015, 'tarleton state university'), (2016, 'tarleton state university'), (2017, 'tarleton state university'), (2018, 'tarleton state university'), (2020, 'tarleton state university')) +all_us_institutions_year : ((2004, 'texas tech university'), (2005, 'texas tech university'), (2006, 'tarleton state university'), (2006, 'texas tech university'), (2007, 'tarleton state university'), (2008, 'texas tech university'), (2009, 'tarleton state university'), (2009, 'texas tech university'), (2010, 'tarleton state university'), (2010, 'university of connecticut'), (2011, 'tarleton state university'), (2011, 'university of connecticut'), (2012, 'tarleton state university'), (2013, 'tarleton state university'), (2014, 'tarleton state university'), (2015, 'tarleton state university'), (2016, 'tarleton state university'), (2017, 'tarleton state university'), (2018, 'tarleton state university'), (2020, 'tarleton state university')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christopher +lastname : duffy +middlename : j +year_range : (1993,) +main_us_institutions_year : ((1993, 'pennsylvania state university'),) +all_us_institutions_year : ((1993, 'pennsylvania state university'),) + +firstname : christopher +lastname : duffy +middlename : j +year_range : (1984, 1988) +main_us_institutions_year : ((1984, 'utah state university'), (1988, 'utah state university')) +all_us_institutions_year : ((1984, 'utah state university'), (1988, 'utah state university')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : t +lastname : williams +middlename : h lee +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of oklahoma'),) +all_us_institutions_year : ((1992, 'university of oklahoma'),) + +firstname : t +lastname : williams +middlename : h lee +year_range : (1980, 1986) +main_us_institutions_year : ((1980, 'university of kansas'), (1986, 'university of kansas')) +all_us_institutions_year : ((1980, 'university of kansas'), (1986, 'university of kansas')) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dorothy +lastname : peteet +middlename : None +year_range : (1996,) +main_us_institutions_year : ((1996, 'columbia university'),) +all_us_institutions_year : ((1996, 'columbia university'),) + +firstname : dorothy +lastname : peteet +middlename : m +year_range : (1985, 2021) +main_us_institutions_year : ((1985, 'goddard space flight center'), (1986, 'goddard space flight center'), (1990, 'goddard space flight center'), (1992, 'goddard institute for space studies'), (1993, 'lamont doherty earth observatory'), (1993, 'goddard institute for space studies'), (1994, 'goddard space flight center'), (1994, 'goddard institute for space studies'), (1995, 'lamont doherty earth observatory'), (1995, 'goddard institute for space studies'), (1998, 'goddard institute for space studies'), (1999, 'goddard institute for space studies'), (2000, 'goddard institute for space studies'), (2001, 'lamont doherty earth observatory'), (2002, 'goddard institute for space studies'), (2003, 'goddard institute for space studies'), (2004, 'lamont doherty earth observatory'), (2005, 'lamont doherty earth observatory'), (2005, 'goddard institute for space studies'), (2006, 'lamont doherty earth observatory'), (2006, 'goddard institute for space studies'), (2007, 'goddard institute for space studies'), (2009, 'lamont doherty earth observatory'), (2009, 'goddard institute for space studies'), (2010, 'lamont doherty earth observatory'), (2010, 'goddard institute for space studies'), (2011, 'lamont doherty earth observatory'), (2011, 'goddard institute for space studies'), (2012, 'lamont doherty earth observatory'), (2012, 'goddard institute for space studies'), (2013, 'goddard institute for space studies'), (2014, 'lamont doherty earth observatory'), (2014, 'goddard institute for space studies'), (2015, 'lamont doherty earth observatory'), (2015, 'goddard institute for space studies'), (2016, 'lamont doherty earth observatory'), (2017, 'lamont doherty earth observatory'), (2018, 'lamont doherty earth observatory'), (2018, 'goddard institute for space studies'), (2019, 'goddard institute for space studies'), (2021, 'lamont doherty earth observatory'), (2021, 'goddard institute for space studies')) +all_us_institutions_year : ((1985, 'goddard space flight center'), (1986, 'goddard space flight center'), (1987, 'goddard institute for space studies'), (1990, 'goddard space flight center'), (1992, 'goddard institute for space studies'), (1992, 'goddard space flight center'), (1993, 'goddard institute for space studies'), (1993, 'goddard space flight center'), (1993, 'lamont doherty earth observatory'), (1994, 'goddard institute for space studies'), (1994, 'goddard space flight center'), (1994, 'lamont doherty earth observatory'), (1995, 'goddard institute for space studies'), (1995, 'lamont doherty earth observatory'), (1998, 'goddard institute for space studies'), (1998, 'lamont doherty earth observatory'), (1999, 'goddard institute for space studies'), (1999, 'lamont doherty earth observatory'), (2000, 'goddard institute for space studies'), (2001, 'goddard institute for space studies'), (2001, 'goddard space flight center'), (2001, 'lamont doherty earth observatory'), (2002, 'goddard institute for space studies'), (2003, 'goddard institute for space studies'), (2003, 'lamont doherty earth observatory'), (2004, 'lamont doherty earth observatory'), (2005, 'goddard institute for space studies'), (2005, 'lamont doherty earth observatory'), (2006, 'goddard institute for space studies'), (2006, 'lamont doherty earth observatory'), (2007, 'goddard institute for space studies'), (2009, 'goddard institute for space studies'), (2009, 'lamont doherty earth observatory'), (2010, 'goddard institute for space studies'), (2010, 'lamont doherty earth observatory'), (2011, 'goddard institute for space studies'), (2011, 'lamont doherty earth observatory'), (2012, 'goddard institute for space studies'), (2012, 'lamont doherty earth observatory'), (2013, 'goddard institute for space studies'), (2013, 'lamont doherty earth observatory'), (2014, 'columbia university'), (2014, 'goddard institute for space studies'), (2014, 'goddard space flight center'), (2014, 'lamont doherty earth observatory'), (2015, 'goddard institute for space studies'), (2015, 'lamont doherty earth observatory'), (2016, 'goddard institute for space studies'), (2016, 'lamont doherty earth observatory'), (2017, 'goddard institute for space studies'), (2017, 'lamont doherty earth observatory'), (2018, 'goddard institute for space studies'), (2018, 'lamont doherty earth observatory'), (2019, 'goddard institute for space studies'), (2019, 'lamont doherty earth observatory'), (2021, 'goddard institute for space studies'), (2021, 'lamont doherty earth observatory')) + +6/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : derksen +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'ohio state university'),) +all_us_institutions_year : ((2003, 'ohio state university'),) + +firstname : r +lastname : derksen +middlename : c +year_range : (2005, 2012) +main_us_institutions_year : ((2005, 'ohio agricultural research and development center'), (2012, 'ohio agricultural research and development center')) +all_us_institutions_year : ((2005, 'ohio agricultural research and development center'), (2012, 'ohio agricultural research and development center')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : qiong +lastname : zhang +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of south florida'),) +all_us_institutions_year : ((2012, 'university of south florida'),) + +firstname : xi +lastname : zhang +middlename : None +year_range : (2009, 2021) +main_us_institutions_year : ((2009, 'university of south florida'), (2010, 'university of south florida')) +all_us_institutions_year : ((2009, 'university of south florida'), (2010, 'university of south florida')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : steichen +middlename : None +year_range : (1990,) +main_us_institutions_year : ((1990, 'kansas state university'),) +all_us_institutions_year : ((1990, 'kansas state university'),) + +firstname : james +lastname : steichen +middlename : m +year_range : (1973, 1988) +main_us_institutions_year : ((1974, 'oklahoma state university stillwater'), (1988, 'oklahoma state university stillwater')) +all_us_institutions_year : ((1974, 'oklahoma state university stillwater'), (1988, 'oklahoma state university stillwater')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : weikuo +lastname : tao +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'colorado state university'),) +all_us_institutions_year : ((2012, 'colorado state university'),) + +firstname : lei +lastname : tao +middlename : None +year_range : (2008, 2021) +main_us_institutions_year : ((2008, 'colorado state university'), (2009, 'colorado state university'), (2010, 'colorado state university'), (2012, 'princeton university'), (2013, 'princeton university'), (2014, 'princeton university'), (2015, 'princeton university'), (2016, 'princeton university'), (2017, 'princeton university'), (2020, 'princeton university'), (2021, 'princeton university')) +all_us_institutions_year : ((2007, 'colorado state university'), (2008, 'colorado state university'), (2009, 'colorado state university'), (2010, 'colorado state university'), (2011, 'colorado state university'), (2012, 'princeton university'), (2013, 'princeton university'), (2014, 'colorado state university'), (2014, 'princeton university'), (2015, 'princeton university'), (2016, 'princeton university'), (2017, 'princeton university'), (2020, 'princeton university'), (2021, 'princeton university')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : watson +middlename : None +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of nevada reno'),) +all_us_institutions_year : ((1996, 'university of nevada reno'),) + +firstname : john +lastname : watson +middlename : w +year_range : (1999, 2012) +main_us_institutions_year : ((2010, 'united states department of commerce'),) +all_us_institutions_year : ((2010, 'united states department of commerce'),) + +7/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : hale +middlename : c +year_range : (1995,) +main_us_institutions_year : ((1995, 'college of william mary'),) +all_us_institutions_year : ((1995, 'college of william mary'),) + +firstname : robert +lastname : hale +middlename : None +year_range : (2006, 2014) +main_us_institutions_year : ((2007, 'colorado state university'), (2009, 'colorado state university'), (2010, 'colorado state university')) +all_us_institutions_year : ((2007, 'colorado state university'), (2009, 'colorado state university'), (2010, 'colorado state university'), (2012, 'colorado state university')) + +7/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : alexander +middlename : joan +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of colorado at boulder'),) +all_us_institutions_year : ((2015, 'university of colorado at boulder'),) + +firstname : m +lastname : alexander +middlename : joan +year_range : (1995, 2021) +main_us_institutions_year : ((1995, 'university of washington'), (1999, 'university of washington'), (2011, 'national waste recycling association')) +all_us_institutions_year : ((1995, 'university of washington'), (1997, 'university of washington'), (1999, 'university of washington'), (2011, 'national waste recycling association'), (2011, 'university of colorado boulder'), (2015, 'national waste recycling association')) + +7/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : kapila +middlename : None +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of missouri columbia'),) +all_us_institutions_year : ((1992, 'university of missouri columbia'),) + +firstname : shubhender +lastname : kapila +middlename : None +year_range : (1972, 1998) +main_us_institutions_year : ((1972, 'university of missouri'), (1973, 'university of missouri'), (1979, 'university of missouri'), (1980, 'university of missouri'), (1981, 'university of missouri'), (1982, 'university of missouri'), (1983, 'university of missouri'), (1986, 'university of missouri'), (1989, 'university of missouri'), (1990, 'university of missouri'), (1991, 'university of missouri'), (1992, 'university of missouri'), (1993, 'university of missouri')) +all_us_institutions_year : ((1972, 'university of missouri'), (1973, 'university of missouri'), (1979, 'university of missouri'), (1980, 'university of missouri'), (1981, 'university of missouri'), (1982, 'university of missouri'), (1983, 'university of missouri'), (1986, 'university of missouri'), (1989, 'university of missouri'), (1990, 'edison international'), (1990, 'university of missouri'), (1991, 'university of missouri'), (1992, 'university of missouri'), (1993, 'university of missouri')) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : elizabeth +lastname : salafia +middlename : blodgett +year_range : (2015,) +main_us_institutions_year : ((2015, 'north dakota state university'),) +all_us_institutions_year : ((2015, 'north dakota state university'),) + +firstname : elizabeth +lastname : salafia +middlename : h blodgett +year_range : (2007, 2021) +main_us_institutions_year : ((2007, 'university of notre dame'), (2008, 'university of notre dame'), (2008, 'north dakota state university'), (2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame'), (2011, 'north dakota state university'), (2012, 'north dakota state university'), (2013, 'north dakota state university'), (2014, 'north dakota state university'), (2015, 'north dakota state university'), (2016, 'north dakota state university'), (2021, 'north dakota state university')) +all_us_institutions_year : ((2007, 'university of notre dame'), (2008, 'north dakota state university'), (2008, 'university of notre dame'), (2009, 'north dakota state university'), (2009, 'university of notre dame'), (2010, 'north dakota state university'), (2010, 'university of notre dame'), (2011, 'north dakota state university'), (2011, 'university of notre dame'), (2012, 'north dakota state university'), (2013, 'north dakota state university'), (2014, 'north dakota state university'), (2015, 'north dakota state university'), (2016, 'north dakota state university'), (2021, 'north dakota state university')) + +9/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : julie +lastname : coonrod +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of new mexico'),) +all_us_institutions_year : ((2006, 'university of new mexico'),) + +firstname : julia +lastname : coonrod +middlename : e allred +year_range : (2002, 2009) +main_us_institutions_year : ((2002, 'university of new mexico'), (2004, 'university of new mexico'), (2009, 'university of new mexico')) +all_us_institutions_year : ((2001, 'university of new mexico'), (2002, 'university of new mexico'), (2004, 'university of new mexico'), (2009, 'university of new mexico')) + +10/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : k +lastname : reddy +middlename : ramesh +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of florida'),) +all_us_institutions_year : ((2010, 'university of florida'),) + +firstname : k +lastname : reddy +middlename : v +year_range : (1986, 1987) +main_us_institutions_year : ((1987, 'university of florida'),) +all_us_institutions_year : ((1987, 'university of florida'),) + +11/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jackson +lastname : blanton +middlename : o +year_range : (2005,) +main_us_institutions_year : ((2005, 'georgia institute of technology'),) +all_us_institutions_year : ((2005, 'georgia institute of technology'),) + +firstname : j +lastname : blanton +middlename : o +year_range : (1978, 2006) +main_us_institutions_year : ((1978, 'skidaway institute of oceanography'), (1980, 'skidaway institute of oceanography'), (1983, 'skidaway institute of oceanography'), (1984, 'skidaway institute of oceanography'), (1988, 'skidaway institute of oceanography'), (1994, 'skidaway institute of oceanography'), (1998, 'skidaway institute of oceanography'), (1999, 'skidaway institute of oceanography'), (2000, 'skidaway institute of oceanography'), (2004, 'skidaway institute of oceanography'), (2005, 'skidaway institute of oceanography'), (2006, 'skidaway institute of oceanography')) +all_us_institutions_year : ((1978, 'skidaway institute of oceanography'), (1980, 'skidaway institute of oceanography'), (1981, 'skidaway institute of oceanography'), (1983, 'skidaway institute of oceanography'), (1984, 'skidaway institute of oceanography'), (1988, 'skidaway institute of oceanography'), (1994, 'skidaway institute of oceanography'), (1998, 'skidaway institute of oceanography'), (1999, 'skidaway institute of oceanography'), (2000, 'skidaway institute of oceanography'), (2004, 'skidaway institute of oceanography'), (2005, 'skidaway institute of oceanography'), (2006, 'skidaway institute of oceanography'), (2013, 'skidaway institute of oceanography')) + +11/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : stegeman +middlename : j +year_range : (1998,) +main_us_institutions_year : ((1998, 'massachusetts institute of technology'),) +all_us_institutions_year : ((1998, 'massachusetts institute of technology'),) + +firstname : john +lastname : stegeman +middlename : j +year_range : (1973, 2021) +main_us_institutions_year : ((1973, 'woods hole oceanographic institution'), (1975, 'woods hole oceanographic institution'), (1977, 'woods hole oceanographic institution'), (1979, 'woods hole oceanographic institution'), (1980, 'woods hole oceanographic institution'), (1981, 'woods hole oceanographic institution'), (1982, 'woods hole oceanographic institution'), (1983, 'woods hole oceanographic institution'), (1984, 'woods hole oceanographic institution'), (1985, 'woods hole oceanographic institution'), (1986, 'woods hole oceanographic institution'), (1987, 'woods hole oceanographic institution'), (1988, 'woods hole oceanographic institution'), (1989, 'woods hole oceanographic institution'), (1990, 'woods hole oceanographic institution'), (1991, 'woods hole oceanographic institution'), (1992, 'woods hole oceanographic institution'), (1993, 'woods hole oceanographic institution'), (1994, 'woods hole oceanographic institution'), (1995, 'woods hole oceanographic institution'), (1996, 'woods hole oceanographic institution'), (1997, 'woods hole oceanographic institution'), (1998, 'woods hole oceanographic institution'), (1999, 'woods hole oceanographic institution'), (2000, 'woods hole oceanographic institution'), (2001, 'woods hole oceanographic institution'), (2002, 'woods hole oceanographic institution'), (2003, 'woods hole oceanographic institution'), (2004, 'woods hole oceanographic institution'), (2005, 'woods hole oceanographic institution'), (2006, 'woods hole oceanographic institution'), (2007, 'woods hole oceanographic institution'), (2008, 'woods hole oceanographic institution'), (2009, 'woods hole oceanographic institution'), (2010, 'woods hole oceanographic institution'), (2011, 'woods hole oceanographic institution'), (2012, 'woods hole oceanographic institution'), (2013, 'woods hole oceanographic institution'), (2014, 'woods hole oceanographic institution'), (2015, 'woods hole oceanographic institution'), (2016, 'woods hole oceanographic institution'), (2017, 'woods hole oceanographic institution'), (2018, 'woods hole oceanographic institution'), (2019, 'woods hole oceanographic institution'), (2020, 'woods hole oceanographic institution'), (2021, 'woods hole oceanographic institution')) +all_us_institutions_year : ((1973, 'woods hole oceanographic institution'), (1974, 'woods hole oceanographic institution'), (1975, 'woods hole oceanographic institution'), (1977, 'woods hole oceanographic institution'), (1979, 'woods hole oceanographic institution'), (1980, 'woods hole oceanographic institution'), (1981, 'woods hole oceanographic institution'), (1982, 'woods hole oceanographic institution'), (1983, 'woods hole oceanographic institution'), (1984, 'woods hole oceanographic institution'), (1985, 'woods hole oceanographic institution'), (1986, 'woods hole oceanographic institution'), (1987, 'woods hole oceanographic institution'), (1988, 'woods hole oceanographic institution'), (1989, 'woods hole oceanographic institution'), (1990, 'woods hole oceanographic institution'), (1991, 'woods hole oceanographic institution'), (1992, 'woods hole oceanographic institution'), (1993, 'woods hole oceanographic institution'), (1994, 'woods hole oceanographic institution'), (1995, 'woods hole oceanographic institution'), (1996, 'woods hole oceanographic institution'), (1997, 'woods hole oceanographic institution'), (1998, 'woods hole oceanographic institution'), (1999, 'woods hole oceanographic institution'), (2000, 'woods hole oceanographic institution'), (2001, 'woods hole oceanographic institution'), (2002, 'woods hole oceanographic institution'), (2003, 'woods hole oceanographic institution'), (2004, 'woods hole oceanographic institution'), (2005, 'woods hole oceanographic institution'), (2006, 'woods hole oceanographic institution'), (2007, 'woods hole oceanographic institution'), (2008, 'woods hole oceanographic institution'), (2009, 'woods hole oceanographic institution'), (2010, 'woods hole oceanographic institution'), (2011, 'woods hole oceanographic institution'), (2012, 'woods hole oceanographic institution'), (2013, 'woods hole oceanographic institution'), (2014, 'woods hole oceanographic institution'), (2015, 'woods hole oceanographic institution'), (2016, 'woods hole oceanographic institution'), (2017, 'woods hole oceanographic institution'), (2018, 'woods hole oceanographic institution'), (2019, 'woods hole oceanographic institution'), (2020, 'woods hole oceanographic institution'), (2021, 'woods hole oceanographic institution')) + +11/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : leejane +lastname : liu +middlename : sally +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of washington'),) +all_us_institutions_year : ((2004, 'university of washington'),) + +firstname : lj +lastname : liu +middlename : sally +year_range : (2000, 2005) +main_us_institutions_year : ((2000, 'university of washington'), (2001, 'university of washington'), (2003, 'university of washington'), (2005, 'university of washington')) +all_us_institutions_year : ((2000, 'university of washington'), (2001, 'university of washington'), (2003, 'university of washington'), (2005, 'university of washington')) + +12/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : stegeman +middlename : j +year_range : (2003,) +main_us_institutions_year : ((2003, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2003, 'massachusetts institute of technology'),) + +firstname : john +lastname : stegeman +middlename : j +year_range : (1973, 2021) +main_us_institutions_year : ((1973, 'woods hole oceanographic institution'), (1975, 'woods hole oceanographic institution'), (1977, 'woods hole oceanographic institution'), (1979, 'woods hole oceanographic institution'), (1980, 'woods hole oceanographic institution'), (1981, 'woods hole oceanographic institution'), (1982, 'woods hole oceanographic institution'), (1983, 'woods hole oceanographic institution'), (1984, 'woods hole oceanographic institution'), (1985, 'woods hole oceanographic institution'), (1986, 'woods hole oceanographic institution'), (1987, 'woods hole oceanographic institution'), (1988, 'woods hole oceanographic institution'), (1989, 'woods hole oceanographic institution'), (1990, 'woods hole oceanographic institution'), (1991, 'woods hole oceanographic institution'), (1992, 'woods hole oceanographic institution'), (1993, 'woods hole oceanographic institution'), (1994, 'woods hole oceanographic institution'), (1995, 'woods hole oceanographic institution'), (1996, 'woods hole oceanographic institution'), (1997, 'woods hole oceanographic institution'), (1998, 'woods hole oceanographic institution'), (1999, 'woods hole oceanographic institution'), (2000, 'woods hole oceanographic institution'), (2001, 'woods hole oceanographic institution'), (2002, 'woods hole oceanographic institution'), (2003, 'woods hole oceanographic institution'), (2004, 'woods hole oceanographic institution'), (2005, 'woods hole oceanographic institution'), (2006, 'woods hole oceanographic institution'), (2007, 'woods hole oceanographic institution'), (2008, 'woods hole oceanographic institution'), (2009, 'woods hole oceanographic institution'), (2010, 'woods hole oceanographic institution'), (2011, 'woods hole oceanographic institution'), (2012, 'woods hole oceanographic institution'), (2013, 'woods hole oceanographic institution'), (2014, 'woods hole oceanographic institution'), (2015, 'woods hole oceanographic institution'), (2016, 'woods hole oceanographic institution'), (2017, 'woods hole oceanographic institution'), (2018, 'woods hole oceanographic institution'), (2019, 'woods hole oceanographic institution'), (2020, 'woods hole oceanographic institution'), (2021, 'woods hole oceanographic institution')) +all_us_institutions_year : ((1973, 'woods hole oceanographic institution'), (1974, 'woods hole oceanographic institution'), (1975, 'woods hole oceanographic institution'), (1977, 'woods hole oceanographic institution'), (1979, 'woods hole oceanographic institution'), (1980, 'woods hole oceanographic institution'), (1981, 'woods hole oceanographic institution'), (1982, 'woods hole oceanographic institution'), (1983, 'woods hole oceanographic institution'), (1984, 'woods hole oceanographic institution'), (1985, 'woods hole oceanographic institution'), (1986, 'woods hole oceanographic institution'), (1987, 'woods hole oceanographic institution'), (1988, 'woods hole oceanographic institution'), (1989, 'woods hole oceanographic institution'), (1990, 'woods hole oceanographic institution'), (1991, 'woods hole oceanographic institution'), (1992, 'woods hole oceanographic institution'), (1993, 'woods hole oceanographic institution'), (1994, 'woods hole oceanographic institution'), (1995, 'woods hole oceanographic institution'), (1996, 'woods hole oceanographic institution'), (1997, 'woods hole oceanographic institution'), (1998, 'woods hole oceanographic institution'), (1999, 'woods hole oceanographic institution'), (2000, 'woods hole oceanographic institution'), (2001, 'woods hole oceanographic institution'), (2002, 'woods hole oceanographic institution'), (2003, 'woods hole oceanographic institution'), (2004, 'woods hole oceanographic institution'), (2005, 'woods hole oceanographic institution'), (2006, 'woods hole oceanographic institution'), (2007, 'woods hole oceanographic institution'), (2008, 'woods hole oceanographic institution'), (2009, 'woods hole oceanographic institution'), (2010, 'woods hole oceanographic institution'), (2011, 'woods hole oceanographic institution'), (2012, 'woods hole oceanographic institution'), (2013, 'woods hole oceanographic institution'), (2014, 'woods hole oceanographic institution'), (2015, 'woods hole oceanographic institution'), (2016, 'woods hole oceanographic institution'), (2017, 'woods hole oceanographic institution'), (2018, 'woods hole oceanographic institution'), (2019, 'woods hole oceanographic institution'), (2020, 'woods hole oceanographic institution'), (2021, 'woods hole oceanographic institution')) + +13/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : louis +lastname : wicker +middlename : j +year_range : (2000,) +main_us_institutions_year : ((2000, 'texas a m university college station'),) +all_us_institutions_year : ((2000, 'texas a m university college station'),) + +firstname : louis +lastname : wicker +middlename : j +year_range : (2018, 2021) +main_us_institutions_year : ((2018, 'university of oklahoma'), (2019, 'university of oklahoma'), (2019, 'national oceanic and atmospheric administration'), (2021, 'university of oklahoma')) +all_us_institutions_year : ((2018, 'university of oklahoma'), (2019, 'national oceanic and atmospheric administration'), (2019, 'university of oklahoma'), (2021, 'university of oklahoma')) + +14/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : harwell +middlename : a +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of miami'),) +all_us_institutions_year : ((2000, 'university of miami'),) + +firstname : mark +lastname : harwell +middlename : a +year_range : (2007, 2008) +main_us_institutions_year : ((2007, 'florida a m university'), (2008, 'florida a m university')) +all_us_institutions_year : ((2007, 'florida a m university'), (2008, 'florida a m university')) + +14/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : pfrender +middlename : e +year_range : (2009,) +main_us_institutions_year : ((2009, 'utah state university'),) +all_us_institutions_year : ((2009, 'utah state university'),) + +firstname : michael +lastname : pfrender +middlename : e +year_range : (2017, 2021) +main_us_institutions_year : ((2017, 'university of notre dame'), (2020, 'university of notre dame'), (2021, 'university of notre dame')) +all_us_institutions_year : ((2017, 'university of notre dame'), (2018, 'university of notre dame'), (2019, 'university of notre dame'), (2020, 'university of notre dame'), (2021, 'university of notre dame')) + +14/10 positive, 21/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : susan +lastname : heever +middlename : van den +year_range : (2015,) +main_us_institutions_year : ((2015, 'colorado state university'),) +all_us_institutions_year : ((2015, 'colorado state university'),) + +firstname : susan +lastname : heever +middlename : c van den +year_range : (2004, 2021) +main_us_institutions_year : ((2004, 'colorado state university'), (2006, 'colorado state university'), (2007, 'colorado state university'), (2008, 'colorado state university'), (2009, 'colorado state university'), (2010, 'colorado state university'), (2011, 'colorado state university'), (2012, 'colorado state university'), (2013, 'colorado state university'), (2014, 'colorado state university'), (2015, 'colorado state university'), (2016, 'colorado state university'), (2017, 'colorado state university'), (2018, 'colorado state university'), (2019, 'colorado state university'), (2020, 'colorado state university'), (2021, 'colorado state university')) +all_us_institutions_year : ((2000, 'colorado state university'), (2001, 'colorado state university'), (2004, 'colorado state university'), (2006, 'colorado state university'), (2007, 'colorado state university'), (2008, 'colorado state university'), (2009, 'colorado state university'), (2010, 'colorado state university'), (2011, 'colorado state university'), (2012, 'colorado state university'), (2013, 'colorado state university'), (2014, 'colorado state university'), (2015, 'colorado state university'), (2016, 'colorado state university'), (2017, 'colorado state university'), (2018, 'colorado state university'), (2019, 'colorado state university'), (2020, 'colorado state university'), (2021, 'colorado state university')) + +14/10 positive, 22/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : coakley +middlename : a +year_range : (2001,) +main_us_institutions_year : ((2001, 'oregon state university'),) +all_us_institutions_year : ((2001, 'oregon state university'),) + +firstname : jim +lastname : coakley +middlename : None +year_range : (2000, 2005) +main_us_institutions_year : ((2000, 'oregon state university'), (2005, 'oregon state university')) +all_us_institutions_year : ((2000, 'oregon state university'), (2005, 'oregon state university')) + +15/10 positive, 22/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : t +lastname : clement +middlename : prabhakar +year_range : (2009,) +main_us_institutions_year : ((2009, 'auburn university'),) +all_us_institutions_year : ((2009, 'auburn university'),) + +firstname : t +lastname : clement +middlename : prabakhar +year_range : (2012, 2015) +main_us_institutions_year : ((2012, 'auburn university'), (2015, 'auburn university')) +all_us_institutions_year : ((2012, 'auburn university'), (2015, 'auburn university')) + +16/10 positive, 22/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : a +lastname : flegal +middlename : russell +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of california santa cruz'),) +all_us_institutions_year : ((2014, 'university of california santa cruz'),) + +firstname : russell +lastname : flegal +middlename : None +year_range : (1999, 2016) +main_us_institutions_year : ((1999, 'university of california santa cruz'), (2016, 'university of california santa cruz')) +all_us_institutions_year : ((1999, 'university of california santa cruz'), (2016, 'university of california santa cruz')) + +16/10 positive, 22/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hans +lastname : leeuwen +middlename : van +year_range : (2005,) +main_us_institutions_year : ((2005, 'iowa state university'),) +all_us_institutions_year : ((2005, 'iowa state university'),) + +firstname : johannes +lastname : leeuwen +middlename : van +year_range : (2006, 2016) +main_us_institutions_year : ((2007, 'iowa state university'), (2011, 'iowa state university'), (2015, 'iowa state university'), (2016, 'iowa state university')) +all_us_institutions_year : ((2007, 'iowa state university'), (2011, 'iowa state university'), (2015, 'iowa state university'), (2016, 'iowa state university')) + +17/10 positive, 22/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 1660.5668040792148 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_geography_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_geography_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..4508675 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_geography_christoph_degree0_advisors_9015.log @@ -0,0 +1,755 @@ +Namespace(testing=False, verbose=1, field=['geography'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [205649164] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0005253871281941731 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 119.18478360573451 minutes + +Starting active labeling... +firstname : teresa +lastname : caldeira +middlename : pr +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of california berkeley'),) +all_us_institutions_year : ((2014, 'university of california berkeley'),) + +firstname : teresa +lastname : caldeira +middlename : p r +year_range : (1988, 2017) +main_us_institutions_year : ((1988, 'university of california berkeley'), (1999, 'university of california irvine'), (2001, 'university of california irvine'), (2012, 'university of california berkeley'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2017, 'university of california berkeley')) +all_us_institutions_year : ((1988, 'university of california berkeley'), (1999, 'university of california irvine'), (2001, 'university of california irvine'), (2012, 'university of california berkeley'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2017, 'university of california berkeley')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : lisa +lastname : harrington +middlename : mb +year_range : (2009,) +main_us_institutions_year : ((2009, 'kansas state university'),) +all_us_institutions_year : ((2009, 'kansas state university'),) + +firstname : lisa +lastname : harrington +middlename : m b +year_range : (1988, 2020) +main_us_institutions_year : ((1988, 'new mexico state university'), (1998, 'kansas state university'), (2000, 'kansas state university'), (2001, 'kansas state university'), (2002, 'kansas state university'), (2006, 'kansas state university'), (2007, 'kansas state university'), (2010, 'kansas state university'), (2011, 'kansas state university'), (2014, 'kansas state university'), (2018, 'kansas state university')) +all_us_institutions_year : ((1988, 'new mexico state university'), (1998, 'kansas state university'), (2000, 'kansas state university'), (2001, 'kansas state university'), (2002, 'kansas state university'), (2006, 'kansas state university'), (2007, 'kansas state university'), (2010, 'kansas state university'), (2011, 'kansas state university'), (2014, 'kansas state university'), (2016, 'kansas state university'), (2017, 'kansas state university'), (2018, 'kansas state university')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : hall +middlename : r +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of nevada reno'),) +all_us_institutions_year : ((2008, 'university of nevada reno'),) + +firstname : mark +lastname : hall +middlename : m +year_range : (2008, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2011, 'national archives and records administration'),) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : graham +middlename : c +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of california riverside'),) +all_us_institutions_year : ((1993, 'university of california riverside'),) + +firstname : robert +lastname : graham +middlename : m +year_range : (1842, 2021) +main_us_institutions_year : ((1964, 'massachusetts institute of technology'), (1965, 'massachusetts institute of technology'), (1979, 'university of texas southwestern medical center'), (1980, 'university of texas southwestern medical center'), (1980, 'university of texas health science center at san antonio'), (1980, 'university of texas system'), (1981, 'university of texas southwestern medical center'), (1981, 'university of texas health science center at san antonio'), (1981, 'university of texas at austin'), (1982, 'university of texas southwestern medical center'), (1982, 'university of texas system'), (1983, 'university of texas health science center at san antonio'), (1984, 'harvard university'), (1986, 'harvard university'), (1987, 'harvard university'), (1988, 'harvard university'), (1989, 'harvard university'), (1990, 'brigham and women s hospital'), (1990, 'harvard university'), (1990, 'cleveland clinic'), (1991, 'cleveland clinic'), (1992, 'massachusetts institute of technology'), (1992, 'harvard university'), (1992, 'cleveland clinic'), (1993, 'cleveland clinic'), (1994, 'st vincent s health system'), (1994, 'cleveland clinic'), (1995, 'case western reserve university'), (1997, 'st vincent s health system'), (2013, 'st vincent s health system')) +all_us_institutions_year : ((1964, 'massachusetts institute of technology'), (1965, 'massachusetts institute of technology'), (1976, 'st vincent s health system'), (1979, 'university of texas southwestern medical center'), (1980, 'university of texas health science center at san antonio'), (1980, 'university of texas southwestern medical center'), (1980, 'university of texas system'), (1981, 'university of texas at austin'), (1981, 'university of texas health science center at san antonio'), (1981, 'university of texas southwestern medical center'), (1982, 'university of texas southwestern medical center'), (1982, 'university of texas system'), (1983, 'university of texas health science center at san antonio'), (1984, 'harvard university'), (1985, 'harvard university'), (1985, 'university of texas system'), (1986, 'harvard university'), (1987, 'harvard university'), (1987, 'university of california san diego'), (1988, 'brigham and women s hospital'), (1988, 'harvard university'), (1988, 'massachusetts institute of technology'), (1988, 'university of texas southwestern medical center'), (1989, 'harvard university'), (1989, 'university of massachusetts lowell'), (1990, 'brigham and women s hospital'), (1990, 'cleveland clinic'), (1990, 'harvard university'), (1991, 'brigham and women s hospital'), (1991, 'cleveland clinic'), (1991, 'harvard university'), (1992, 'cleveland clinic'), (1992, 'harvard university'), (1992, 'massachusetts institute of technology'), (1993, 'cleveland clinic'), (1994, 'cleveland clinic'), (1994, 'harvard university'), (1994, 'st vincent s health system'), (1995, 'brigham and women s hospital'), (1995, 'case western reserve university'), (1995, 'cleveland clinic'), (1996, 'case western reserve university'), (1996, 'st vincent s health system'), (1997, 'st vincent s health system'), (2002, 'st vincent s health system'), (2005, 'st vincent s health system'), (2007, 'st vincent s health system'), (2008, 'st vincent s health system'), (2009, 'st vincent s health system'), (2010, 'st vincent s health system'), (2011, 'st vincent s health system'), (2013, 'st vincent s health system'), (2014, 'st vincent s health system'), (2015, 'st vincent s health system'), (2020, 'st vincent s health system'), (2021, 'st vincent s health system')) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : paul +lastname : bentzen +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of washington'),) +all_us_institutions_year : ((2002, 'university of washington'),) + +firstname : paul +lastname : bentzen +middlename : None +year_range : (1984, 2021) +main_us_institutions_year : ((1994, 'university of washington'), (1997, 'university of washington'), (1998, 'university of washington'), (1999, 'university of washington'), (2000, 'university of washington'), (2001, 'university of washington'), (2004, 'university of washington')) +all_us_institutions_year : ((1994, 'university of washington'), (1997, 'university of washington'), (1998, 'university of washington'), (1999, 'university of washington'), (2000, 'university of washington'), (2001, 'university of washington'), (2002, 'university of washington'), (2003, 'university of washington'), (2004, 'university of washington'), (2005, 'university of washington'), (2012, 'university of washington')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gary +lastname : rolfe +middlename : None +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2004, 'university of illinois at urbana champaign'),) + +firstname : gary +lastname : rolfe +middlename : l +year_range : (1979, 1987) +main_us_institutions_year : ((1979, 'university of illinois at urbana champaign'), (1982, 'university of illinois at urbana champaign'), (1985, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1975, 'university of illinois at urbana champaign'), (1979, 'university of illinois at urbana champaign'), (1982, 'university of illinois at urbana champaign'), (1985, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign')) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : weining +lastname : xiang +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of north carolina at charlotte'),) +all_us_institutions_year : ((2014, 'university of north carolina at charlotte'),) + +firstname : weining +lastname : xiang +middlename : None +year_range : (1993, 2019) +main_us_institutions_year : ((1993, 'university of north carolina at charlotte'), (1996, 'university of north carolina at charlotte'), (2000, 'university of north carolina at charlotte'), (2001, 'university of north carolina at charlotte'), (2002, 'university of north carolina at charlotte'), (2004, 'university of north carolina at charlotte'), (2006, 'university of north carolina at charlotte'), (2007, 'university of north carolina at charlotte'), (2009, 'university of north carolina at charlotte'), (2011, 'university of north carolina at charlotte'), (2019, 'university of north carolina at charlotte')) +all_us_institutions_year : ((1993, 'university of north carolina at charlotte'), (1996, 'university of north carolina at charlotte'), (2000, 'university of north carolina at charlotte'), (2001, 'university of north carolina at charlotte'), (2002, 'university of north carolina at charlotte'), (2004, 'university of north carolina at charlotte'), (2006, 'university of north carolina at charlotte'), (2007, 'university of north carolina at charlotte'), (2009, 'university of north carolina at charlotte'), (2011, 'university of north carolina at charlotte'), (2012, 'university of north carolina at charlotte'), (2013, 'university of north carolina at charlotte'), (2014, 'university of north carolina at charlotte'), (2015, 'university of north carolina at charlotte'), (2016, 'university of north carolina at charlotte'), (2019, 'university of north carolina at charlotte')) + +3/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christopher +lastname : neale +middlename : m u +year_range : (2004,) +main_us_institutions_year : ((2004, 'utah state university'),) +all_us_institutions_year : ((2004, 'utah state university'),) + +firstname : christopher +lastname : neale +middlename : m u +year_range : (1987, 2021) +main_us_institutions_year : ((1989, 'texas a m university'), (1990, 'utah state university'), (1994, 'utah state university'), (1996, 'utah state university'), (1997, 'utah state university'), (2000, 'utah state university'), (2004, 'utah state university'), (2005, 'utah state university'), (2006, 'utah state university'), (2007, 'utah state university'), (2008, 'utah state university'), (2009, 'utah state university'), (2010, 'utah state university'), (2011, 'utah state university'), (2012, 'utah state university'), (2013, 'utah state university'), (2014, 'utah state university'), (2015, 'utah state university'), (2016, 'university of nebraska lincoln'), (2017, 'university of nebraska lincoln'), (2018, 'university of nebraska lincoln'), (2019, 'university of nebraska lincoln'), (2020, 'university of nebraska lincoln'), (2021, 'university of nebraska lincoln')) +all_us_institutions_year : ((1989, 'texas a m university'), (1990, 'utah state university'), (1994, 'utah state university'), (1996, 'utah state university'), (1997, 'utah state university'), (2000, 'utah state university'), (2004, 'utah state university'), (2005, 'utah state university'), (2006, 'utah state university'), (2007, 'utah state university'), (2008, 'utah state university'), (2009, 'utah state university'), (2010, 'utah state university'), (2011, 'utah state university'), (2012, 'utah state university'), (2013, 'utah state university'), (2014, 'university of nebraska lincoln'), (2014, 'utah state university'), (2015, 'utah state university'), (2016, 'university of nebraska lincoln'), (2016, 'utah state university'), (2017, 'university of nebraska lincoln'), (2017, 'utah state university'), (2018, 'university of nebraska lincoln'), (2019, 'university of nebraska lincoln'), (2020, 'university of nebraska lincoln'), (2021, 'university of nebraska lincoln')) + +4/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ismail +lastname : sirageldin +middlename : None +year_range : (1992,) +main_us_institutions_year : ((1992, 'johns hopkins university'),) +all_us_institutions_year : ((1992, 'johns hopkins university'),) + +firstname : ismail +lastname : serageldin +middlename : None +year_range : (1983, 2019) +main_us_institutions_year : ((1993, 'johns hopkins university'), (1995, 'world bank'), (1998, 'world bank'), (1999, 'world bank'), (2001, 'world bank')) +all_us_institutions_year : ((1993, 'johns hopkins university'), (1995, 'world bank'), (1998, 'world bank'), (1999, 'world bank'), (2001, 'world bank'), (2016, 'world bank')) + +5/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : s +lastname : levine +middlename : joseph +year_range : (2000,) +main_us_institutions_year : ((2000, 'michigan state university'),) +all_us_institutions_year : ((2000, 'michigan state university'),) + +firstname : s +lastname : levin +middlename : None +year_range : (1997, 2003) +main_us_institutions_year : ((2001, 'centers for disease control and prevention'), (2003, 'centers for disease control and prevention')) +all_us_institutions_year : ((2001, 'centers for disease control and prevention'), (2001, 'university of south carolina'), (2003, 'centers for disease control and prevention'), (2003, 'morehead state university')) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : bruce +lastname : bongarten +middlename : c +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of georgia'),) +all_us_institutions_year : ((1998, 'university of georgia'),) + +firstname : bruce +lastname : bongar +middlename : None +year_range : (1988, 2020) +main_us_institutions_year : ((1991, 'palo alto university'), (1992, 'palo alto university'), (1992, 'stanford university'), (1993, 'stanford university'), (1994, 'palo alto university'), (1994, 'stanford university'), (1997, 'stanford university'), (1998, 'palo alto university'), (1999, 'palo alto university'), (2001, 'stanford university'), (2002, 'palo alto university'), (2004, 'palo alto university'), (2010, 'palo alto university'), (2010, 'stanford university'), (2011, 'palo alto university'), (2012, 'palo alto university'), (2013, 'palo alto university'), (2014, 'palo alto university'), (2015, 'palo alto university'), (2016, 'palo alto university'), (2017, 'palo alto university'), (2018, 'palo alto university'), (2020, 'palo alto university')) +all_us_institutions_year : ((1991, 'palo alto university'), (1992, 'palo alto university'), (1992, 'stanford university'), (1993, 'stanford university'), (1994, 'palo alto university'), (1994, 'stanford university'), (1997, 'stanford university'), (1998, 'palo alto university'), (1999, 'palo alto university'), (1999, 'stanford university'), (2001, 'stanford university'), (2002, 'palo alto university'), (2004, 'palo alto university'), (2009, 'palo alto university'), (2009, 'stanford university'), (2010, 'palo alto university'), (2010, 'stanford university'), (2011, 'palo alto university'), (2012, 'palo alto university'), (2013, 'palo alto university'), (2014, 'palo alto university'), (2015, 'palo alto university'), (2016, 'palo alto university'), (2017, 'palo alto university'), (2018, 'palo alto university'), (2020, 'palo alto university')) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rui +lastname : liu +middlename : hai +year_range : (2008,) +main_us_institutions_year : ((2008, 'cornell university'),) +all_us_institutions_year : ((2008, 'cornell university'),) + +firstname : rui +lastname : li +middlename : None +year_range : (2005, 2021) +main_us_institutions_year : ((2005, 'chinese academy of sciences'), (2009, 'state university of new york system'), (2010, 'state university of new york system'), (2011, 'university at albany suny'), (2011, 'state university of new york system'), (2013, 'state university of new york system'), (2016, 'chinese academy of sciences'), (2018, 'chinese academy of sciences'), (2019, 'chinese academy of sciences'), (2020, 'chinese academy of sciences')) +all_us_institutions_year : ((2005, 'chinese academy of sciences'), (2009, 'state university of new york system'), (2010, 'state university of new york system'), (2010, 'university at albany suny'), (2011, 'state university of new york system'), (2011, 'university at albany suny'), (2013, 'state university of new york system'), (2013, 'university at albany suny'), (2014, 'state university of new york system'), (2014, 'university at albany suny'), (2015, 'chinese academy of sciences'), (2015, 'state university of new york system'), (2016, 'chinese academy of sciences'), (2017, 'chinese academy of sciences'), (2017, 'state university of new york system'), (2018, 'chinese academy of sciences'), (2019, 'carnegie institution for science'), (2019, 'chinese academy of sciences'), (2020, 'chinese academy of sciences'), (2021, 'chinese academy of sciences')) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : e +lastname : v +middlename : wyllys andrews +year_range : (2013,) +main_us_institutions_year : ((2013, 'tulane university'),) +all_us_institutions_year : ((2013, 'tulane university'),) + +firstname : e +lastname : andrews +middlename : wyllys +year_range : (1938, 2018) +main_us_institutions_year : ((1966, 'tulane university'), (1971, 'tulane university'), (1976, 'tulane university'), (1978, 'tulane university'), (1992, 'tulane university'), (1994, 'tulane university'), (2003, 'tulane university')) +all_us_institutions_year : ((1966, 'tulane university'), (1971, 'tulane university'), (1976, 'tulane university'), (1978, 'tulane university'), (1992, 'tulane university'), (1994, 'tulane university'), (2003, 'tulane university')) + +6/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : kling +middlename : phillip +year_range : (1999,) +main_us_institutions_year : ((1999, 'new mexico state university'),) +all_us_institutions_year : ((1999, 'new mexico state university'),) + +firstname : james +lastname : king +middlename : phillip +year_range : (1991, 2019) +main_us_institutions_year : ((2010, 'new mexico state university'), (2011, 'new mexico state university'), (2012, 'new mexico state university'), (2014, 'new mexico state university'), (2016, 'new mexico state university'), (2017, 'new mexico state university'), (2018, 'new mexico state university'), (2019, 'new mexico state university')) +all_us_institutions_year : ((2010, 'new mexico state university'), (2011, 'new mexico state university'), (2012, 'new mexico state university'), (2014, 'new mexico state university'), (2016, 'new mexico state university'), (2016, 'stanford university'), (2017, 'new mexico state university'), (2018, 'new mexico state university'), (2019, 'new mexico state university'), (2021, 'new mexico state university')) + +7/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : miguel +lastname : altieri +middlename : a +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of california berkeley'),) +all_us_institutions_year : ((2014, 'university of california berkeley'),) + +firstname : miguel +lastname : marino +middlename : a +year_range : (2009, 2015) +main_us_institutions_year : ((2009, 'university of california berkeley'), (2012, 'university of california berkeley')) +all_us_institutions_year : ((2009, 'university of california berkeley'), (2012, 'university of california berkeley')) + +8/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gustavo +lastname : antonini +middlename : a +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of florida'),) +all_us_institutions_year : ((1997, 'university of florida'),) + +firstname : gustavo +lastname : londono +middlename : a +year_range : (1992, 2021) +main_us_institutions_year : ((2007, 'wildlife conservation society'), (2008, 'university of florida'), (2009, 'university of florida'), (2011, 'florida museum of natural history'), (2011, 'university of florida'), (2012, 'florida museum of natural history'), (2012, 'university of florida'), (2013, 'florida museum of natural history'), (2013, 'university of florida'), (2014, 'florida museum of natural history'), (2015, 'university of california riverside')) +all_us_institutions_year : ((2007, 'wildlife conservation society'), (2008, 'florida museum of natural history'), (2008, 'university of florida'), (2009, 'florida museum of natural history'), (2009, 'university of florida'), (2011, 'florida museum of natural history'), (2011, 'university of florida'), (2012, 'florida museum of natural history'), (2012, 'university of florida'), (2013, 'florida museum of natural history'), (2013, 'university of florida'), (2014, 'florida museum of natural history'), (2015, 'florida museum of natural history'), (2015, 'university of california riverside'), (2016, 'florida museum of natural history'), (2017, 'university of california riverside')) + +8/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : reynolds +middlename : r +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of iowa'),) +all_us_institutions_year : ((1995, 'university of iowa'),) + +firstname : david +lastname : butler +middlename : r +year_range : (1984, 1994) +main_us_institutions_year : ((1984, 'university of iowa'), (1994, 'university of iowa')) +all_us_institutions_year : ((1984, 'university of iowa'), (1994, 'university of iowa')) + +8/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : timothy +lastname : mullin +middlename : j +year_range : (2005,) +main_us_institutions_year : ((2005, 'north carolina state university'),) +all_us_institutions_year : ((2005, 'north carolina state university'),) + +firstname : timothy +lastname : albaugh +middlename : j +year_range : (1991, 2021) +main_us_institutions_year : ((1991, 'north carolina state university'), (1999, 'north carolina state university'), (2001, 'north carolina state university'), (2002, 'north carolina state university'), (2003, 'north carolina state university'), (2004, 'north carolina state university'), (2006, 'north carolina state university'), (2008, 'north carolina state university'), (2009, 'north carolina state university'), (2010, 'north carolina state university'), (2012, 'north carolina state university'), (2013, 'north carolina state university'), (2014, 'virginia tech'), (2015, 'virginia tech'), (2016, 'virginia tech'), (2017, 'virginia tech'), (2018, 'virginia tech'), (2019, 'virginia tech'), (2020, 'virginia tech'), (2021, 'virginia tech')) +all_us_institutions_year : ((1991, 'north carolina state university'), (1999, 'north carolina state university'), (2001, 'north carolina state university'), (2002, 'north carolina state university'), (2003, 'north carolina state university'), (2004, 'north carolina state university'), (2006, 'north carolina state university'), (2008, 'north carolina state university'), (2009, 'north carolina state university'), (2010, 'north carolina state university'), (2011, 'north carolina state university'), (2012, 'north carolina state university'), (2013, 'north carolina state university'), (2013, 'virginia tech'), (2014, 'north carolina state university'), (2014, 'virginia tech'), (2015, 'virginia tech'), (2016, 'virginia tech'), (2017, 'virginia tech'), (2018, 'virginia tech'), (2019, 'virginia tech'), (2020, 'virginia tech'), (2021, 'virginia tech')) + +8/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : whiteley +middlename : m +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of california irvine'),) +all_us_institutions_year : ((1998, 'university of california irvine'),) + +firstname : s +lastname : white +middlename : m +year_range : (2018, 2020) +main_us_institutions_year : ((2018, 'university of california santa cruz'), (2020, 'university of california santa cruz')) +all_us_institutions_year : ((2014, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2020, 'university of california berkeley'), (2020, 'university of california santa cruz')) + +8/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : diana +lastname : liverman +middlename : m +year_range : (1994,) +main_us_institutions_year : ((1994, 'pennsylvania state university'),) +all_us_institutions_year : ((1994, 'pennsylvania state university'),) + +firstname : marilyn +lastname : li +middlename : m +year_range : (2003, 2021) +main_us_institutions_year : ((2004, 'university of southern california'), (2008, 'tulane university'), (2008, 'university of pennsylvania'), (2008, 'university of southern california'), (2014, 'university of southern california'), (2015, 'university of southern california'), (2016, 'university of southern california'), (2017, 'children s hospital of philadelphia'), (2018, 'university of pennsylvania'), (2018, 'children s hospital of philadelphia'), (2019, 'university of pennsylvania'), (2019, 'children s hospital of philadelphia'), (2020, 'children s hospital of philadelphia'), (2021, 'children s hospital of philadelphia')) +all_us_institutions_year : ((2004, 'university of southern california'), (2008, 'tulane university'), (2008, 'university of pennsylvania'), (2008, 'university of southern california'), (2014, 'university of southern california'), (2015, 'university of southern california'), (2016, 'university of southern california'), (2017, 'baylor college of medicine'), (2017, 'children s hospital of philadelphia'), (2017, 'university of pennsylvania'), (2017, 'university of southern california'), (2018, 'children s hospital of philadelphia'), (2018, 'university of pennsylvania'), (2019, 'children s hospital of philadelphia'), (2019, 'university of pennsylvania'), (2020, 'children s hospital of philadelphia'), (2020, 'university of pennsylvania'), (2021, 'children s hospital of philadelphia'), (2021, 'university of pennsylvania')) + +8/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : louise +lastname : fortmann +middlename : p +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of california berkeley'),) +all_us_institutions_year : ((2012, 'university of california berkeley'),) + +firstname : louise +lastname : fox +middlename : None +year_range : (1993, 2021) +main_us_institutions_year : ((1993, 'world bank'), (1999, 'world bank'), (2008, 'world bank'), (2011, 'world bank'), (2012, 'world bank'), (2013, 'world bank'), (2016, 'world bank'), (2016, 'university of california berkeley'), (2017, 'international monetary fund'), (2021, 'university of california berkeley')) +all_us_institutions_year : ((1993, 'world bank'), (1999, 'world bank'), (2001, 'world bank'), (2003, 'world bank'), (2006, 'world bank'), (2008, 'world bank'), (2011, 'world bank'), (2012, 'world bank'), (2013, 'international monetary fund'), (2013, 'world bank'), (2015, 'international monetary fund'), (2015, 'world bank'), (2016, 'university of california berkeley'), (2016, 'world bank'), (2017, 'international monetary fund'), (2017, 'world bank'), (2021, 'university of california berkeley')) + +8/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : louise +lastname : fortmann +middlename : p +year_range : (2001,) +main_us_institutions_year : ((2001, 'university of california berkeley'),) +all_us_institutions_year : ((2001, 'university of california berkeley'),) + +firstname : louise +lastname : fox +middlename : None +year_range : (1993, 2021) +main_us_institutions_year : ((1993, 'world bank'), (1999, 'world bank'), (2008, 'world bank'), (2011, 'world bank'), (2012, 'world bank'), (2013, 'world bank'), (2016, 'world bank'), (2016, 'university of california berkeley'), (2017, 'international monetary fund'), (2021, 'university of california berkeley')) +all_us_institutions_year : ((1993, 'world bank'), (1999, 'world bank'), (2001, 'world bank'), (2003, 'world bank'), (2006, 'world bank'), (2008, 'world bank'), (2011, 'world bank'), (2012, 'world bank'), (2013, 'international monetary fund'), (2013, 'world bank'), (2015, 'international monetary fund'), (2015, 'world bank'), (2016, 'university of california berkeley'), (2016, 'world bank'), (2017, 'international monetary fund'), (2017, 'world bank'), (2021, 'university of california berkeley')) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : powers +middlename : o +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of nebraska lincoln'),) +all_us_institutions_year : ((2011, 'university of nebraska lincoln'),) + +firstname : thomas +lastname : power +middlename : michael +year_range : (1980, 2014) +main_us_institutions_year : ((1981, 'university of montana'), (1988, 'washington state university'), (1991, 'university of montana'), (2006, 'university of montana'), (2014, 'university of montana')) +all_us_institutions_year : ((1981, 'university of montana'), (1988, 'washington state university'), (1991, 'university of montana'), (2006, 'university of montana'), (2014, 'university of montana')) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : paul +lastname : mitchell +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of wisconsin madison'),) +all_us_institutions_year : ((2014, 'university of wisconsin madison'),) + +firstname : paul +lastname : mitchell +middlename : None +year_range : (1978, 2021) +main_us_institutions_year : ((1996, 'harvard university'), (2008, 'boston children s hospital'), (2010, 'boston children s hospital'), (2011, 'boston children s hospital'), (2012, 'boston children s hospital'), (2013, 'boston children s hospital'), (2014, 'boston children s hospital'), (2015, 'boston children s hospital'), (2016, 'boston children s hospital'), (2017, 'boston children s hospital'), (2018, 'boston children s hospital'), (2019, 'boston children s hospital'), (2020, 'boston children s hospital'), (2021, 'boston children s hospital')) +all_us_institutions_year : ((1996, 'harvard university'), (2008, 'boston children s hospital'), (2010, 'boston children s hospital'), (2011, 'boston children s hospital'), (2012, 'boston children s hospital'), (2012, 'harvard university'), (2013, 'boston children s hospital'), (2014, 'boston children s hospital'), (2014, 'northwestern university'), (2015, 'boston children s hospital'), (2016, 'boston children s hospital'), (2017, 'boston children s hospital'), (2018, 'boston children s hospital'), (2019, 'boston children s hospital'), (2020, 'boston children s hospital'), (2021, 'boston children s hospital'), (2021, 'broad institute'), (2021, 'harvard university')) + +8/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : timothy +lastname : delcurto +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'oregon state university'),) +all_us_institutions_year : ((2005, 'oregon state university'),) + +firstname : timothy +lastname : delcurto +middlename : None +year_range : (2020, 2021) +main_us_institutions_year : ((2020, 'montana state university'), (2021, 'montana state university')) +all_us_institutions_year : ((2018, 'montana state university'), (2019, 'montana state university'), (2020, 'montana state university'), (2021, 'montana state university')) + +8/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jose +lastname : costa +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of maryland college park'),) +all_us_institutions_year : ((2008, 'university of maryland college park'),) + +firstname : joseph +lastname : costa +middlename : e +year_range : (1988, 2021) +main_us_institutions_year : ((1988, 'marine biological laboratory'), (1990, 'marine biological laboratory'), (1992, 'marine biological laboratory')) +all_us_institutions_year : ((1988, 'marine biological laboratory'), (1990, 'marine biological laboratory'), (1992, 'marine biological laboratory')) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ae +lastname : luloff +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'pennsylvania state university'),) +all_us_institutions_year : ((2012, 'pennsylvania state university'),) + +firstname : a +lastname : luloff +middlename : e +year_range : (1977, 2020) +main_us_institutions_year : ((1981, 'university of new hampshire'), (1986, 'university of new hampshire'), (1988, 'university of new hampshire'), (1990, 'pennsylvania state university'), (1991, 'pennsylvania state university'), (1992, 'pennsylvania state university'), (1993, 'pennsylvania state university'), (1994, 'pennsylvania state university'), (1995, 'pennsylvania state university'), (1996, 'pennsylvania state university'), (1997, 'pennsylvania state university'), (1998, 'pennsylvania state university'), (1999, 'pennsylvania state university'), (2000, 'pennsylvania state university'), (2001, 'pennsylvania state university'), (2002, 'pennsylvania state university'), (2003, 'pennsylvania state university'), (2004, 'pennsylvania state university'), (2005, 'pennsylvania state university'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2020, 'pennsylvania state university')) +all_us_institutions_year : ((1981, 'university of new hampshire'), (1986, 'university of new hampshire'), (1988, 'university of new hampshire'), (1990, 'pennsylvania state university'), (1991, 'pennsylvania state university'), (1992, 'pennsylvania state university'), (1993, 'pennsylvania state university'), (1994, 'pennsylvania state university'), (1995, 'pennsylvania state university'), (1996, 'pennsylvania state university'), (1997, 'pennsylvania state university'), (1998, 'pennsylvania state university'), (1999, 'pennsylvania state university'), (2000, 'pennsylvania state university'), (2001, 'pennsylvania state university'), (2002, 'pennsylvania state university'), (2003, 'pennsylvania state university'), (2004, 'pennsylvania state university'), (2005, 'pennsylvania state university'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2020, 'pennsylvania state university')) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christian +lastname : brannstrom +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'texas a m university college station'),) +all_us_institutions_year : ((2011, 'texas a m university college station'),) + +firstname : christian +lastname : brannstrom +middlename : None +year_range : (1995, 2021) +main_us_institutions_year : ((1995, 'university of wisconsin madison'), (2003, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2006, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2009, 'texas a m university'), (2010, 'texas a m university'), (2011, 'texas a m university'), (2012, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2015, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2021, 'texas a m university')) +all_us_institutions_year : ((1995, 'university of wisconsin madison'), (2003, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2006, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2009, 'texas a m university'), (2010, 'texas a m university'), (2010, 'university of texas at austin'), (2011, 'texas a m university'), (2012, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2015, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university'), (2021, 'texas a m university')) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : hall +middlename : b +year_range : (1991,) +main_us_institutions_year : ((1991, 'iowa state university'),) +all_us_institutions_year : ((1991, 'iowa state university'),) + +firstname : r +lastname : hall +middlename : b +year_range : (1981, 1997) +main_us_institutions_year : ((1981, 'united states geological survey'),) +all_us_institutions_year : ((1970, 'united states geological survey'), (1972, 'united states geological survey'), (1981, 'united states geological survey')) + +10/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : kling +middlename : phillip +year_range : (1999,) +main_us_institutions_year : ((1999, 'new mexico state university'),) +all_us_institutions_year : ((1999, 'new mexico state university'),) + +firstname : j +lastname : kling +middlename : g +year_range : (2002, 2020) +main_us_institutions_year : ((2003, 'oregon state university'), (2004, 'oregon state university'), (2005, 'oregon state university'), (2006, 'oregon state university'), (2009, 'oregon state university'), (2010, 'oregon state university'), (2011, 'oregon state university'), (2012, 'oregon state university'), (2015, 'oregon state university'), (2016, 'oregon state university'), (2017, 'oregon state university'), (2018, 'oregon state university'), (2019, 'oregon state university'), (2020, 'oregon state university')) +all_us_institutions_year : ((2003, 'oregon state university'), (2004, 'oregon state university'), (2005, 'oregon state university'), (2006, 'oregon state university'), (2009, 'oregon state university'), (2010, 'oregon state university'), (2011, 'oregon state university'), (2012, 'oregon state university'), (2015, 'oregon state university'), (2016, 'oregon state university'), (2017, 'oregon state university'), (2018, 'oregon state university'), (2019, 'oregon state university'), (2020, 'oregon state university')) + +10/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : day +middlename : j +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of wisconsin milwaukee'),) +all_us_institutions_year : ((1990, 'university of wisconsin milwaukee'),) + +firstname : m +lastname : day +middlename : None +year_range : (2008, 2019) +main_us_institutions_year : ((2008, 'university of new hampshire'), (2012, 'university of new hampshire'), (2013, 'university of wisconsin madison'), (2014, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison'), (2018, 'university of wisconsin madison'), (2019, 'university of wisconsin madison')) +all_us_institutions_year : ((2008, 'university of new hampshire'), (2012, 'university of new hampshire'), (2013, 'university of wisconsin madison'), (2014, 'university of wisconsin madison'), (2015, 'university of wisconsin madison'), (2016, 'university of wisconsin madison'), (2017, 'university of wisconsin madison'), (2018, 'university of wisconsin madison'), (2019, 'university of wisconsin madison')) + +10/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : steven +lastname : wall +middlename : b vander +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of nevada reno'),) +all_us_institutions_year : ((1997, 'university of nevada reno'),) + +firstname : stephen +lastname : wall +middlename : b vander +year_range : (1977, 2019) +main_us_institutions_year : ((1980, 'utah state university'), (1982, 'utah state university'), (1984, 'utah state university'), (1991, 'university of nevada reno'), (1993, 'university of nevada reno'), (1995, 'university of nevada reno'), (1997, 'university of nevada reno'), (1998, 'university of nevada reno'), (1999, 'university of nevada reno'), (2000, 'university of nevada reno'), (2001, 'university of nevada reno'), (2002, 'university of nevada reno'), (2003, 'university of nevada reno'), (2004, 'university of nevada reno'), (2005, 'university of nevada reno'), (2006, 'university of nevada reno'), (2007, 'university of nevada reno'), (2008, 'university of nevada reno'), (2009, 'university of nevada reno'), (2010, 'university of nevada reno'), (2011, 'university of nevada reno'), (2012, 'university of nevada reno'), (2013, 'university of nevada reno'), (2015, 'university of nevada reno'), (2017, 'university of nevada reno'), (2018, 'university of nevada reno'), (2019, 'university of nevada reno')) +all_us_institutions_year : ((1980, 'utah state university'), (1982, 'utah state university'), (1984, 'utah state university'), (1987, 'utah state university'), (1991, 'university of nevada reno'), (1993, 'university of nevada reno'), (1995, 'university of nevada reno'), (1997, 'university of nevada reno'), (1998, 'university of nevada reno'), (1999, 'university of nevada reno'), (2000, 'university of nevada reno'), (2001, 'university of nevada reno'), (2002, 'university of nevada reno'), (2003, 'university of nevada reno'), (2004, 'university of nevada reno'), (2005, 'university of nevada reno'), (2006, 'university of nevada reno'), (2007, 'university of nevada reno'), (2008, 'university of nevada reno'), (2009, 'university of nevada reno'), (2010, 'northern arizona university'), (2010, 'university of nevada reno'), (2010, 'utah state university'), (2011, 'university of nevada reno'), (2012, 'university of nevada reno'), (2013, 'university of nevada reno'), (2015, 'university of nevada reno'), (2017, 'university of nevada reno'), (2018, 'university of nevada reno'), (2019, 'university of nevada reno')) + +10/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : gorchov +middlename : l +year_range : (2002,) +main_us_institutions_year : ((2002, 'miami university oxford'),) +all_us_institutions_year : ((2002, 'miami university oxford'),) + +firstname : david +lastname : gorchov +middlename : l +year_range : (1994, 2007) +main_us_institutions_year : ((1994, 'eastern illinois university'),) +all_us_institutions_year : ((1994, 'eastern illinois university'),) + +11/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : raun +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'oklahoma state university'),) +all_us_institutions_year : ((2005, 'oklahoma state university'),) + +firstname : w +lastname : raun +middlename : r +year_range : (1987, 2021) +main_us_institutions_year : ((1987, 'university of nebraska lincoln'), (1993, 'oklahoma state university stillwater'), (1994, 'oklahoma state university stillwater'), (1995, 'oklahoma state university stillwater'), (1997, 'oklahoma state university stillwater'), (1998, 'oklahoma state university stillwater'), (1999, 'oklahoma state university stillwater'), (2000, 'oklahoma state university stillwater'), (2001, 'oklahoma state university stillwater'), (2002, 'oklahoma state university stillwater'), (2003, 'oklahoma state university stillwater'), (2004, 'oklahoma state university stillwater'), (2005, 'oklahoma state university stillwater'), (2006, 'oklahoma state university stillwater'), (2007, 'oklahoma state university stillwater'), (2008, 'oklahoma state university stillwater'), (2009, 'oklahoma state university stillwater'), (2010, 'oklahoma state university stillwater'), (2011, 'oklahoma state university stillwater'), (2012, 'oklahoma state university stillwater'), (2013, 'oklahoma state university stillwater'), (2014, 'oklahoma state university stillwater'), (2015, 'oklahoma state university stillwater'), (2016, 'oklahoma state university stillwater'), (2017, 'oklahoma state university stillwater'), (2018, 'oklahoma state university stillwater'), (2019, 'oklahoma state university stillwater'), (2020, 'oklahoma state university stillwater'), (2021, 'oklahoma state university stillwater')) +all_us_institutions_year : ((1987, 'university of nebraska lincoln'), (1993, 'oklahoma state university stillwater'), (1994, 'oklahoma state university stillwater'), (1995, 'oklahoma state university stillwater'), (1996, 'oklahoma state university stillwater'), (1997, 'oklahoma state university stillwater'), (1998, 'oklahoma state university stillwater'), (1999, 'oklahoma state university stillwater'), (2000, 'oklahoma state university stillwater'), (2001, 'oklahoma state university stillwater'), (2002, 'oklahoma state university stillwater'), (2003, 'oklahoma state university stillwater'), (2004, 'oklahoma state university stillwater'), (2005, 'oklahoma state university stillwater'), (2006, 'oklahoma state university stillwater'), (2007, 'oklahoma state university stillwater'), (2008, 'oklahoma state university stillwater'), (2009, 'oklahoma state university stillwater'), (2010, 'oklahoma state university stillwater'), (2011, 'oklahoma state university stillwater'), (2012, 'oklahoma state university stillwater'), (2013, 'oklahoma state university stillwater'), (2014, 'oklahoma state university stillwater'), (2015, 'oklahoma state university stillwater'), (2016, 'oklahoma state university stillwater'), (2017, 'oklahoma state university stillwater'), (2018, 'oklahoma state university stillwater'), (2019, 'oklahoma state university stillwater'), (2020, 'oklahoma state university stillwater'), (2021, 'oklahoma state university stillwater')) + +11/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : k +lastname : yener +middlename : aslihan +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of chicago'),) +all_us_institutions_year : ((2003, 'university of chicago'),) + +firstname : k +lastname : yener +middlename : aslihan +year_range : (1989, 1993) +main_us_institutions_year : ((1989, 'smithsonian institution'), (1992, 'smithsonian institution')) +all_us_institutions_year : ((1989, 'smithsonian institution'), (1992, 'smithsonian institution')) + +12/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kenneth +lastname : burnham +middlename : p +year_range : (1999,) +main_us_institutions_year : ((1999, 'colorado state university'),) +all_us_institutions_year : ((1999, 'colorado state university'),) + +firstname : k +lastname : burnham +middlename : p +year_range : (1995, 1998) +main_us_institutions_year : ((1998, 'united states geological survey'),) +all_us_institutions_year : ((1998, 'united states geological survey'),) + +12/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 1666.9129019061725 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_geology_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_geology_christoph_degree0_advisors_9015.log index 92e7fde..ad4a5ff 100644 --- a/src/dataprep/temp/trainlink_mag_proquest_geology_christoph_degree0_advisors_9015.log +++ b/src/dataprep/temp/trainlink_mag_proquest_geology_christoph_degree0_advisors_9015.log @@ -5,7 +5,7 @@ Testing is False I set the write connection to the main database. id_field is [127313418] and will be passed to sql queries. finished setup ... -Time elapsed: 0.0007378538449605306 minutes +Time elapsed: 0.0006251414616902669 minutes SELECT relationship_id @@ -136,773 +136,5 @@ Time elapsed: 0.0007378538449605306 minutes WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL -Time elapsed: 17.22273528178533 minutes - -Starting active labeling... -firstname : david -lastname : mcconnell -middlename : None -year_range : (2014,) -main_us_institutions_year : ((2014, 'north carolina state university'),) -all_us_institutions_year : ((2014, 'north carolina state university'),) - -firstname : david -lastname : mcconnell -middlename : a -year_range : (1989, 2021) -main_us_institutions_year : ((1989, 'texas a m university'), (1990, 'texas a m university'), (1994, 'university of akron'), (1997, 'university of akron'), (2003, 'university of akron'), (2005, 'florida state university college of arts and sciences'), (2005, 'university of akron'), (2006, 'university of akron'), (2008, 'university of akron'), (2011, 'north carolina state university'), (2013, 'north carolina state university'), (2014, 'north carolina state university'), (2015, 'north carolina state university'), (2016, 'north carolina state university'), (2017, 'north carolina state university'), (2018, 'north carolina state university'), (2019, 'north carolina state university'), (2021, 'north carolina state university')) -all_us_institutions_year : ((1989, 'texas a m university'), (1990, 'texas a m university'), (1993, 'university of akron'), (1994, 'university of akron'), (1997, 'university of akron'), (2003, 'university of akron'), (2005, 'florida state university college of arts and sciences'), (2005, 'university of akron'), (2006, 'university of akron'), (2008, 'university of akron'), (2011, 'north carolina state university'), (2013, 'north carolina state university'), (2014, 'north carolina state university'), (2015, 'north carolina state university'), (2016, 'north carolina state university'), (2017, 'north carolina state university'), (2018, 'north carolina state university'), (2019, 'north carolina state university'), (2021, 'north carolina state university')) - -0/10 positive, 0/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : duncan -lastname : fitzgerald -middlename : None -year_range : (2007,) -main_us_institutions_year : ((2007, 'boston university'),) -all_us_institutions_year : ((2007, 'boston university'),) - -firstname : duncan -lastname : fitzgerald -middlename : m -year_range : (1978, 2021) -main_us_institutions_year : ((1983, 'boston university'), (1984, 'boston university'), (1987, 'boston university'), (1988, 'boston university'), (1990, 'boston university'), (1995, 'boston university'), (1996, 'boston university'), (1998, 'boston university'), (2000, 'boston university'), (2001, 'boston university'), (2002, 'boston university'), (2003, 'boston university'), (2004, 'boston university'), (2005, 'boston university'), (2006, 'boston university'), (2007, 'boston university'), (2008, 'boston university'), (2009, 'boston university'), (2011, 'boston university'), (2012, 'boston university'), (2013, 'boston university'), (2014, 'boston university'), (2015, 'boston university'), (2016, 'boston university'), (2017, 'boston university'), (2018, 'boston university'), (2019, 'boston university'), (2020, 'boston university'), (2021, 'boston university')) -all_us_institutions_year : ((1982, 'boston university'), (1983, 'boston university'), (1983, 'university of maine'), (1984, 'boston university'), (1987, 'boston university'), (1988, 'boston university'), (1990, 'boston university'), (1994, 'boston university'), (1995, 'boston university'), (1996, 'boston university'), (1998, 'boston university'), (2000, 'boston university'), (2001, 'boston university'), (2002, 'boston university'), (2003, 'boston university'), (2004, 'boston university'), (2005, 'boston university'), (2006, 'boston university'), (2007, 'boston university'), (2008, 'boston university'), (2009, 'boston university'), (2011, 'boston university'), (2012, 'boston university'), (2013, 'boston university'), (2014, 'boston university'), (2015, 'boston university'), (2016, 'boston university'), (2017, 'boston university'), (2018, 'boston university'), (2019, 'boston university'), (2020, 'boston university'), (2021, 'boston university')) - -1/10 positive, 0/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : rongxing -lastname : li -middlename : None -year_range : (2004,) -main_us_institutions_year : ((2004, 'ohio state university'),) -all_us_institutions_year : ((2004, 'ohio state university'),) - -firstname : rongxing -lastname : li -middlename : None -year_range : (2012, 2021) -main_us_institutions_year : None -all_us_institutions_year : ((2013, 'ohio state university'),) - -2/10 positive, 0/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : philip -lastname : liu -middlename : lf -year_range : (2015,) -main_us_institutions_year : ((2015, 'cornell university'),) -all_us_institutions_year : ((2015, 'cornell university'),) - -firstname : philip -lastname : liu -middlename : l f -year_range : (1973, 2021) -main_us_institutions_year : ((1973, 'massachusetts institute of technology'), (1975, 'cornell university'), (1977, 'cornell university'), (1978, 'cornell university'), (1979, 'cornell university'), (1980, 'cornell university'), (1981, 'cornell university'), (1982, 'cornell university'), (1983, 'cornell university'), (1984, 'cornell university'), (1985, 'cornell university'), (1986, 'cornell university'), (1987, 'cornell university'), (1988, 'cornell university'), (1989, 'cornell university'), (1990, 'cornell university'), (1991, 'cornell university'), (1992, 'cornell university'), (1993, 'cornell university'), (1994, 'cornell university'), (1995, 'cornell university'), (1996, 'cornell university'), (1997, 'cornell university'), (1998, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2019, 'cornell university')) -all_us_institutions_year : ((1973, 'massachusetts institute of technology'), (1975, 'cornell university'), (1975, 'massachusetts institute of technology'), (1977, 'cornell university'), (1978, 'cornell university'), (1979, 'cornell university'), (1980, 'cornell university'), (1981, 'cornell university'), (1982, 'cornell university'), (1983, 'cornell university'), (1984, 'cornell university'), (1985, 'cornell university'), (1986, 'cornell university'), (1987, 'cornell university'), (1988, 'cornell university'), (1989, 'cornell university'), (1990, 'cornell university'), (1991, 'cornell university'), (1992, 'cornell university'), (1993, 'cornell university'), (1994, 'cornell university'), (1995, 'cornell university'), (1996, 'cornell university'), (1997, 'cornell university'), (1998, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2010, 'northwestern university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2019, 'cornell university'), (2020, 'cornell university'), (2021, 'cornell university')) - -2/10 positive, 1/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : stephen -lastname : self -middlename : None -year_range : (1995,) -main_us_institutions_year : ((1995, 'university of hawaii at manoa'),) -all_us_institutions_year : ((1995, 'university of hawaii at manoa'),) - -firstname : stephen -lastname : self -middlename : None -year_range : (1973, 2021) -main_us_institutions_year : ((1978, 'dartmouth college'), (1979, 'dartmouth college'), (1980, 'dartmouth college'), (1981, 'arizona state university'), (1982, 'arizona state university'), (1983, 'arizona state university'), (1983, 'university of texas at arlington'), (1983, 'dartmouth college'), (1984, 'university of texas at arlington'), (1985, 'university of texas at arlington'), (1986, 'university of texas at arlington'), (1987, 'university of texas at arlington'), (1988, 'university of texas at arlington'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of hawaii'), (1992, 'university of hawaii at manoa'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of hawaii at manoa'), (1996, 'university of hawaii at manoa'), (1997, 'university of hawaii at manoa'), (2000, 'university of hawaii'), (2000, 'university of hawaii at manoa'), (2001, 'university of hawaii'), (2001, 'university of hawaii at manoa'), (2016, 'university of california berkeley')) -all_us_institutions_year : ((1978, 'dartmouth college'), (1979, 'dartmouth college'), (1980, 'arizona state university'), (1980, 'dartmouth college'), (1981, 'arizona state university'), (1982, 'arizona state university'), (1983, 'arizona state university'), (1983, 'dartmouth college'), (1983, 'university of texas at arlington'), (1984, 'university of texas at arlington'), (1985, 'university of texas at arlington'), (1986, 'dartmouth college'), (1986, 'university of texas at arlington'), (1987, 'dartmouth college'), (1987, 'university of texas at arlington'), (1988, 'university of texas at arlington'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of hawaii'), (1992, 'university of hawaii'), (1992, 'university of hawaii at manoa'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of hawaii'), (1995, 'university of hawaii at manoa'), (1996, 'university of hawaii'), (1996, 'university of hawaii at manoa'), (1997, 'university of hawaii at manoa'), (2000, 'university of hawaii'), (2000, 'university of hawaii at manoa'), (2001, 'university of hawaii'), (2001, 'university of hawaii at manoa'), (2003, 'university of hawaii'), (2003, 'university of hawaii at manoa'), (2013, 'arizona state university'), (2013, 'university of hawaii at manoa'), (2016, 'university of california berkeley')) - -3/10 positive, 1/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : donald -lastname : klein -middlename : a -year_range : (1991,) -main_us_institutions_year : ((1991, 'colorado state university'),) -all_us_institutions_year : ((1991, 'colorado state university'),) - -firstname : donald -lastname : klein -middlename : f -year_range : (1960, 2019) -main_us_institutions_year : ((1978, 'stony brook university'), (1980, 'columbia university'), (1981, 'columbia university'), (1982, 'columbia university'), (1983, 'columbia university'), (1984, 'columbia university'), (1985, 'columbia university'), (1986, 'columbia university'), (1987, 'columbia university'), (1988, 'columbia university'), (1989, 'columbia university'), (1990, 'columbia university'), (1991, 'columbia university'), (1992, 'columbia university'), (1993, 'columbia university'), (1994, 'columbia university'), (1995, 'columbia university'), (1996, 'columbia university'), (1997, 'columbia university'), (1998, 'columbia university'), (1999, 'columbia university'), (2000, 'columbia university'), (2001, 'columbia university'), (2002, 'columbia university'), (2003, 'columbia university'), (2004, 'columbia university'), (2005, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2008, 'columbia university'), (2009, 'columbia university'), (2009, 'new york university'), (2010, 'columbia university'), (2011, 'new york university'), (2012, 'new york university'), (2013, 'new york university'), (2016, 'columbia university')) -all_us_institutions_year : ((1978, 'stony brook university'), (1980, 'columbia university'), (1981, 'columbia university'), (1982, 'columbia university'), (1983, 'columbia university'), (1984, 'columbia university'), (1985, 'columbia university'), (1986, 'columbia university'), (1987, 'columbia university'), (1988, 'columbia university'), (1988, 'nathan kline institute for psychiatric research'), (1989, 'columbia university'), (1990, 'columbia university'), (1991, 'columbia university'), (1992, 'columbia university'), (1993, 'columbia university'), (1994, 'columbia university'), (1995, 'columbia university'), (1996, 'columbia university'), (1997, 'columbia university'), (1998, 'columbia university'), (1999, 'columbia university'), (2000, 'columbia university'), (2001, 'columbia university'), (2002, 'columbia university'), (2002, 'new york university'), (2002, 'veterans health administration'), (2003, 'columbia university'), (2004, 'columbia university'), (2005, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2008, 'columbia university'), (2008, 'nathan kline institute for psychiatric research'), (2008, 'new york university'), (2009, 'columbia university'), (2009, 'new york university'), (2010, 'columbia university'), (2010, 'nathan kline institute for psychiatric research'), (2010, 'new york university'), (2011, 'nathan kline institute for psychiatric research'), (2011, 'new york university'), (2012, 'new york university'), (2013, 'new york university'), (2016, 'columbia university')) - -4/10 positive, 1/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : william -lastname : wood -middlename : l -year_range : (1995,) -main_us_institutions_year : ((1995, 'purdue university'),) -all_us_institutions_year : ((1995, 'purdue university'),) - -firstname : william -lastname : woods -middlename : i -year_range : (1977, 2015) -main_us_institutions_year : ((1984, 'southern illinois university carbondale'), (2001, 'southern illinois university edwardsville'), (2002, 'southern illinois university edwardsville'), (2003, 'southern illinois university carbondale'), (2004, 'southern illinois university edwardsville'), (2005, 'southern illinois university carbondale'), (2006, 'university of kansas'), (2006, 'southern illinois university edwardsville'), (2010, 'university of kansas'), (2011, 'university of kansas'), (2013, 'university of kansas'), (2015, 'university of kansas')) -all_us_institutions_year : ((1984, 'southern illinois university carbondale'), (2001, 'southern illinois university edwardsville'), (2002, 'southern illinois university edwardsville'), (2003, 'southern illinois university carbondale'), (2004, 'southern illinois university edwardsville'), (2004, 'university of kansas'), (2005, 'southern illinois university carbondale'), (2006, 'southern illinois university edwardsville'), (2006, 'university of kansas'), (2009, 'university of kansas'), (2010, 'university of kansas'), (2011, 'university of kansas'), (2013, 'university of kansas'), (2015, 'university of kansas')) - -4/10 positive, 2/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : abdul -lastname : shakoor -middlename : None -year_range : (2007,) -main_us_institutions_year : ((2007, 'kent state university'),) -all_us_institutions_year : ((2007, 'kent state university'),) - -firstname : abdul -lastname : shakoori -middlename : rauf -year_range : (1972, 2021) -main_us_institutions_year : ((1993, 'university of massachusetts amherst'), (1994, 'university of massachusetts amherst')) -all_us_institutions_year : ((1992, 'university of massachusetts amherst'), (1993, 'university of massachusetts amherst'), (1994, 'university of massachusetts amherst'), (2008, 'university of massachusetts medical school')) - -4/10 positive, 3/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : robert -lastname : jacobi -middlename : d -year_range : (1997,) -main_us_institutions_year : ((1997, 'university at buffalo'),) -all_us_institutions_year : ((1997, 'university at buffalo'),) - -firstname : robert -lastname : jacob -middlename : None -year_range : (1992, 2020) -main_us_institutions_year : ((2000, 'university of chicago'), (2001, 'university of chicago'), (2001, 'argonne national laboratory'), (2002, 'argonne national laboratory'), (2003, 'argonne national laboratory'), (2004, 'argonne national laboratory'), (2005, 'argonne national laboratory'), (2006, 'argonne national laboratory'), (2007, 'argonne national laboratory'), (2008, 'argonne national laboratory'), (2011, 'argonne national laboratory'), (2012, 'argonne national laboratory'), (2013, 'argonne national laboratory'), (2014, 'argonne national laboratory'), (2015, 'argonne national laboratory'), (2016, 'argonne national laboratory'), (2017, 'argonne national laboratory'), (2019, 'argonne national laboratory'), (2020, 'argonne national laboratory')) -all_us_institutions_year : ((2000, 'university of chicago'), (2001, 'argonne national laboratory'), (2001, 'university of chicago'), (2002, 'argonne national laboratory'), (2003, 'argonne national laboratory'), (2004, 'argonne national laboratory'), (2005, 'argonne national laboratory'), (2006, 'argonne national laboratory'), (2006, 'university of chicago'), (2007, 'argonne national laboratory'), (2008, 'argonne national laboratory'), (2011, 'argonne national laboratory'), (2012, 'argonne national laboratory'), (2013, 'argonne national laboratory'), (2013, 'university of chicago'), (2014, 'argonne national laboratory'), (2015, 'argonne national laboratory'), (2015, 'university of chicago'), (2016, 'argonne national laboratory'), (2017, 'argonne national laboratory'), (2019, 'argonne national laboratory'), (2020, 'argonne national laboratory'), (2021, 'argonne national laboratory')) - -4/10 positive, 4/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : peter -lastname : shultz -middlename : None -year_range : (2006,) -main_us_institutions_year : ((2006, 'brown university'),) -all_us_institutions_year : ((2006, 'brown university'),) - -firstname : p -lastname : schultz -middlename : h -year_range : (1973, 2021) -main_us_institutions_year : ((1977, 'universities space research association'), (1980, 'lunar and planetary institute'), (1981, 'university of utah'), (1983, 'lunar and planetary institute'), (1985, 'lunar and planetary institute'), (1987, 'brown university'), (1988, 'brown university'), (1989, 'brown university'), (1990, 'brown university'), (1991, 'brown university'), (1992, 'brown university'), (1993, 'brown university'), (1994, 'brown university'), (1996, 'brown university'), (1997, 'brown university'), (1998, 'brown university'), (1999, 'brown university'), (2001, 'brown university'), (2002, 'brown university'), (2003, 'brown university'), (2004, 'brown university'), (2005, 'brown university'), (2006, 'brown university'), (2007, 'brown university'), (2008, 'brown university'), (2009, 'brown university'), (2010, 'brown university'), (2011, 'brown university'), (2012, 'brown university'), (2013, 'brown university'), (2014, 'brown university'), (2015, 'brown university'), (2016, 'brown university'), (2017, 'brown university'), (2018, 'brown university'), (2019, 'brown university'), (2020, 'brown university')) -all_us_institutions_year : ((1975, 'ames research center'), (1976, 'ames research center'), (1977, 'universities space research association'), (1980, 'lunar and planetary institute'), (1981, 'university of utah'), (1982, 'lunar and planetary institute'), (1983, 'lunar and planetary institute'), (1985, 'lunar and planetary institute'), (1987, 'brown university'), (1988, 'brown university'), (1989, 'brown university'), (1990, 'brown university'), (1991, 'brown university'), (1992, 'brown university'), (1993, 'brown university'), (1994, 'brown university'), (1996, 'brown university'), (1997, 'brown university'), (1998, 'brown university'), (1999, 'brown university'), (2001, 'brown university'), (2002, 'brown university'), (2003, 'brown university'), (2004, 'brown university'), (2005, 'brown university'), (2006, 'brown university'), (2007, 'brown university'), (2008, 'brown university'), (2009, 'brown university'), (2010, 'brown university'), (2011, 'brown university'), (2012, 'brown university'), (2013, 'brown university'), (2014, 'brown university'), (2015, 'brown university'), (2015, 'planetary science institute'), (2016, 'brown university'), (2017, 'brown university'), (2018, 'brown university'), (2019, 'brown university'), (2020, 'brown university')) - -4/10 positive, 5/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : m -lastname : toksoz -middlename : nafi -year_range : (1996,) -main_us_institutions_year : ((1996, 'massachusetts institute of technology'),) -all_us_institutions_year : ((1996, 'massachusetts institute of technology'),) - -firstname : m -lastname : tokoz -middlename : nafi -year_range : (2015, 2017) -main_us_institutions_year : ((2015, 'massachusetts institute of technology'), (2017, 'massachusetts institute of technology')) -all_us_institutions_year : ((2015, 'massachusetts institute of technology'), (2017, 'massachusetts institute of technology')) - -4/10 positive, 5/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : m -lastname : toksoz -middlename : nafi -year_range : (1993,) -main_us_institutions_year : ((1993, 'massachusetts institute of technology'),) -all_us_institutions_year : ((1993, 'massachusetts institute of technology'),) - -firstname : m -lastname : tokoz -middlename : nafi -year_range : (2015, 2017) -main_us_institutions_year : ((2015, 'massachusetts institute of technology'), (2017, 'massachusetts institute of technology')) -all_us_institutions_year : ((2015, 'massachusetts institute of technology'), (2017, 'massachusetts institute of technology')) - -4/10 positive, 6/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : julia -lastname : clarke -middlename : None -year_range : (2009,) -main_us_institutions_year : ((2009, 'north carolina state university'),) -all_us_institutions_year : ((2009, 'north carolina state university'),) - -firstname : s -lastname : clark -middlename : e -year_range : (2011, 2021) -main_us_institutions_year : ((2011, 'university of north carolina at chapel hill'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2019, 'princeton university')) -all_us_institutions_year : ((2010, 'university of north carolina at chapel hill'), (2011, 'university of north carolina at chapel hill'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2016, 'university of north carolina at chapel hill'), (2017, 'columbia university'), (2019, 'princeton university')) - -4/10 positive, 7/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : martin -lastname : wahlen -middlename : None -year_range : (1998,) -main_us_institutions_year : ((1998, 'university of california san diego'),) -all_us_institutions_year : ((1998, 'university of california san diego'),) - -firstname : michael -lastname : wahl -middlename : h -year_range : (1992, 2011) -main_us_institutions_year : ((1992, 'university of california san diego'), (1994, 'university of california san diego'), (1995, 'university of california san diego'), (1996, 'university of california san diego'), (1997, 'university of california san diego')) -all_us_institutions_year : ((1992, 'university of california san diego'), (1994, 'university of california san diego'), (1995, 'university of california san diego'), (1996, 'university of california san diego'), (1997, 'university of california san diego')) - -4/10 positive, 8/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : larry -lastname : wilding -middlename : p -year_range : (2001,) -main_us_institutions_year : ((2001, 'texas a m university college station'),) -all_us_institutions_year : ((2001, 'texas a m university college station'),) - -firstname : l -lastname : wilding -middlename : p -year_range : (1968, 2009) -main_us_institutions_year : ((1983, 'texas a m university'), (1987, 'texas a m university'), (1988, 'texas a m university'), (1989, 'texas a m university'), (1990, 'texas a m university'), (1991, 'texas a m university'), (1992, 'texas a m university'), (1993, 'texas a m university'), (1994, 'texas a m university'), (1995, 'texas a m university'), (1996, 'texas a m university'), (1997, 'texas a m university'), (1998, 'texas a m university'), (1999, 'texas a m university'), (2002, 'texas a m university'), (2003, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2009, 'texas a m university')) -all_us_institutions_year : ((1983, 'texas a m university'), (1987, 'texas a m university'), (1988, 'texas a m university'), (1989, 'texas a m university'), (1990, 'texas a m university'), (1991, 'texas a m university'), (1992, 'texas a m university'), (1993, 'colorado state university'), (1993, 'texas a m university'), (1994, 'texas a m university'), (1995, 'texas a m university'), (1996, 'texas a m university'), (1997, 'texas a m university'), (1998, 'texas a m university'), (1999, 'texas a m university'), (2002, 'texas a m university'), (2003, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2009, 'texas a m university'), (2015, 'texas a m university')) - -4/10 positive, 9/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : michael -lastname : bell -middlename : m -year_range : (1999,) -main_us_institutions_year : ((1999, 'iowa state university'),) -all_us_institutions_year : ((1999, 'iowa state university'),) - -firstname : m -lastname : bell -middlename : None -year_range : (2003, 2020) -main_us_institutions_year : ((2010, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2018, 'pennsylvania state university'), (2019, 'pennsylvania state university'), (2020, 'kansas wesleyan university')) -all_us_institutions_year : ((2010, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2013, 'university of illinois at urbana champaign'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2018, 'pennsylvania state university'), (2019, 'pennsylvania state university'), (2020, 'kansas wesleyan university'), (2020, 'pennsylvania state university')) - -5/10 positive, 9/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : surendra -lastname : saxena -middlename : k -year_range : (1990,) -main_us_institutions_year : ((1990, 'illinois institute of technology'),) -all_us_institutions_year : ((1990, 'illinois institute of technology'),) - -firstname : s -lastname : saxena -middlename : k -year_range : (1972, 2021) -main_us_institutions_year : ((1972, 'virginia tech'), (1974, 'virginia tech'), (1976, 'brooklyn college'), (1979, 'brooklyn college'), (1981, 'brooklyn college'), (1982, 'brooklyn college'), (1983, 'brooklyn college'), (1984, 'brooklyn college'), (1985, 'brooklyn college'), (1986, 'brooklyn college'), (1987, 'brooklyn college'), (1988, 'brooklyn college'), (1989, 'brooklyn college'), (1990, 'brooklyn college'), (2001, 'florida international university'), (2002, 'florida international university'), (2003, 'florida international university'), (2004, 'florida international university'), (2005, 'florida international university'), (2006, 'florida international university'), (2007, 'florida international university'), (2008, 'florida international university'), (2009, 'florida international university'), (2010, 'florida international university'), (2011, 'florida international university'), (2012, 'florida international university'), (2014, 'florida international university'), (2015, 'florida international university'), (2016, 'florida international university'), (2017, 'florida international university'), (2018, 'florida international university'), (2019, 'florida international university'), (2021, 'florida international university')) -all_us_institutions_year : ((1972, 'virginia tech'), (1974, 'virginia tech'), (1976, 'brooklyn college'), (1979, 'brooklyn college'), (1981, 'brooklyn college'), (1982, 'brooklyn college'), (1983, 'brooklyn college'), (1984, 'brooklyn college'), (1985, 'brooklyn college'), (1986, 'brooklyn college'), (1986, 'the graduate center cuny'), (1987, 'brooklyn college'), (1987, 'the graduate center cuny'), (1988, 'brooklyn college'), (1989, 'brooklyn college'), (1990, 'brooklyn college'), (1991, 'brooklyn college'), (2000, 'florida international university'), (2001, 'florida international university'), (2002, 'florida international university'), (2003, 'florida international university'), (2004, 'florida international university'), (2005, 'florida international university'), (2006, 'florida international university'), (2007, 'florida international university'), (2008, 'florida international university'), (2009, 'florida international university'), (2010, 'florida international university'), (2011, 'florida international university'), (2012, 'florida international university'), (2013, 'florida international university'), (2014, 'florida international university'), (2015, 'florida international university'), (2016, 'florida international university'), (2017, 'florida international university'), (2018, 'florida international university'), (2019, 'florida international university'), (2021, 'florida international university')) - -5/10 positive, 10/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : stephen -lastname : self -middlename : None -year_range : (2001,) -main_us_institutions_year : ((2001, 'university of hawaii at manoa'),) -all_us_institutions_year : ((2001, 'university of hawaii at manoa'),) - -firstname : stephen -lastname : self -middlename : None -year_range : (1980, 2020) -main_us_institutions_year : ((2009, 'nuclear regulatory commission'), (2015, 'university of california berkeley'), (2016, 'university of california berkeley'), (2017, 'university of california berkeley'), (2018, 'planetary science institute'), (2019, 'university of california berkeley'), (2020, 'university of california berkeley')) -all_us_institutions_year : ((2009, 'nuclear regulatory commission'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2016, 'university of california berkeley'), (2017, 'university of california berkeley'), (2018, 'planetary science institute'), (2019, 'university of california berkeley'), (2020, 'university of california berkeley')) - -5/10 positive, 11/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : stephen -lastname : mckeever -middlename : w s -year_range : (2001,) -main_us_institutions_year : ((2001, 'oklahoma state university'),) -all_us_institutions_year : ((2001, 'oklahoma state university'),) - -firstname : s -lastname : mckeever -middlename : w s -year_range : (1975, 2021) -main_us_institutions_year : ((1985, 'oklahoma state university stillwater'), (1986, 'oklahoma state university stillwater'), (1988, 'oklahoma state university stillwater'), (1991, 'oklahoma state university stillwater'), (1993, 'oklahoma state university stillwater'), (1994, 'oklahoma state university stillwater'), (1995, 'oklahoma state university stillwater'), (1996, 'oklahoma state university stillwater'), (1997, 'oklahoma state university stillwater'), (1998, 'oklahoma state university stillwater'), (2000, 'oklahoma state university stillwater'), (2001, 'oklahoma state university stillwater'), (2002, 'oklahoma state university stillwater'), (2003, 'oklahoma state university stillwater'), (2004, 'oklahoma state university stillwater'), (2005, 'oklahoma state university stillwater'), (2006, 'oklahoma state university stillwater'), (2007, 'oklahoma state university stillwater'), (2008, 'oklahoma state university stillwater'), (2009, 'oklahoma state university stillwater'), (2010, 'oklahoma state university stillwater'), (2011, 'oklahoma state university stillwater'), (2014, 'oklahoma state university stillwater'), (2016, 'oklahoma state university stillwater'), (2017, 'oklahoma state university stillwater'), (2018, 'oklahoma state university stillwater'), (2019, 'oklahoma state university stillwater'), (2020, 'oklahoma state university stillwater'), (2021, 'oklahoma state university stillwater')) -all_us_institutions_year : ((1985, 'oklahoma state university stillwater'), (1986, 'oklahoma state university stillwater'), (1988, 'oklahoma state university stillwater'), (1991, 'oklahoma state university stillwater'), (1993, 'oklahoma state university stillwater'), (1994, 'oklahoma state university stillwater'), (1995, 'oklahoma state university stillwater'), (1996, 'oklahoma state university stillwater'), (1997, 'oklahoma state university stillwater'), (1998, 'oklahoma state university stillwater'), (1999, 'oklahoma state university stillwater'), (2000, 'oklahoma state university stillwater'), (2001, 'oklahoma state university stillwater'), (2002, 'oklahoma state university stillwater'), (2003, 'oklahoma state university stillwater'), (2004, 'oklahoma state university stillwater'), (2005, 'oklahoma state university stillwater'), (2006, 'oklahoma state university stillwater'), (2007, 'oklahoma state university stillwater'), (2008, 'oklahoma state university stillwater'), (2009, 'oklahoma state university stillwater'), (2010, 'oklahoma state university stillwater'), (2011, 'oklahoma state university stillwater'), (2014, 'oklahoma state university stillwater'), (2016, 'oklahoma state university stillwater'), (2017, 'oklahoma state university stillwater'), (2018, 'oklahoma state university stillwater'), (2019, 'oklahoma state university stillwater'), (2020, 'oklahoma state university stillwater'), (2021, 'oklahoma state university stillwater')) - -5/10 positive, 12/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : e -lastname : bettis -middlename : arthur -year_range : (2014,) -main_us_institutions_year : ((2014, 'university of iowa'),) -all_us_institutions_year : ((2014, 'university of iowa'),) - -firstname : e -lastname : bettis -middlename : a -year_range : (1990, 1996) -main_us_institutions_year : ((1990, 'iowa department of natural resources'), (1991, 'iowa department of natural resources'), (1993, 'iowa department of natural resources')) -all_us_institutions_year : ((1990, 'iowa department of natural resources'), (1991, 'iowa department of natural resources'), (1993, 'iowa department of natural resources')) - -6/10 positive, 12/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : peter -lastname : keken -middlename : e van -year_range : (2002,) -main_us_institutions_year : ((2002, 'university of michigan'),) -all_us_institutions_year : ((2002, 'university of michigan'),) - -firstname : peter -lastname : keken -middlename : e van -year_range : (1994, 1995) -main_us_institutions_year : ((1994, 'university of minnesota'), (1995, 'university of minnesota')) -all_us_institutions_year : ((1994, 'university of minnesota'), (1995, 'university of minnesota')) - -6/10 positive, 13/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : andrew -lastname : lacis -middlename : a -year_range : (1997,) -main_us_institutions_year : ((1997, 'columbia university'),) -all_us_institutions_year : ((1997, 'columbia university'),) - -firstname : andrew -lastname : lacis -middlename : a -year_range : (1974, 2021) -main_us_institutions_year : ((1974, 'computer sciences corporation'), (1974, 'goddard institute for space studies'), (1976, 'goddard space flight center'), (1977, 'goddard institute for space studies'), (1979, 'goddard institute for space studies'), (1980, 'goddard institute for space studies'), (1981, 'goddard space flight center'), (1983, 'goddard space flight center'), (1985, 'goddard space flight center'), (1988, 'goddard space flight center'), (1989, 'goddard space flight center'), (1990, 'goddard space flight center'), (1990, 'goddard institute for space studies'), (1991, 'goddard institute for space studies'), (1992, 'goddard institute for space studies'), (1993, 'goddard institute for space studies'), (1994, 'goddard institute for space studies'), (1995, 'goddard institute for space studies'), (1996, 'goddard institute for space studies'), (1997, 'goddard institute for space studies'), (1998, 'goddard institute for space studies'), (1999, 'goddard institute for space studies'), (2000, 'goddard institute for space studies'), (2001, 'goddard institute for space studies'), (2002, 'goddard institute for space studies'), (2003, 'goddard institute for space studies'), (2004, 'goddard institute for space studies'), (2005, 'goddard institute for space studies'), (2006, 'goddard institute for space studies'), (2007, 'goddard institute for space studies'), (2009, 'goddard institute for space studies'), (2010, 'goddard institute for space studies'), (2011, 'goddard institute for space studies'), (2012, 'goddard institute for space studies'), (2013, 'goddard institute for space studies'), (2014, 'goddard institute for space studies'), (2015, 'goddard institute for space studies'), (2016, 'goddard institute for space studies'), (2017, 'goddard institute for space studies'), (2019, 'goddard institute for space studies'), (2020, 'goddard institute for space studies'), (2021, 'goddard institute for space studies')) -all_us_institutions_year : ((1974, 'computer sciences corporation'), (1974, 'goddard institute for space studies'), (1976, 'goddard space flight center'), (1977, 'goddard institute for space studies'), (1979, 'goddard institute for space studies'), (1980, 'goddard institute for space studies'), (1981, 'goddard space flight center'), (1983, 'goddard space flight center'), (1985, 'goddard space flight center'), (1988, 'goddard space flight center'), (1989, 'goddard space flight center'), (1990, 'goddard institute for space studies'), (1990, 'goddard space flight center'), (1991, 'goddard institute for space studies'), (1992, 'goddard institute for space studies'), (1993, 'goddard institute for space studies'), (1993, 'goddard space flight center'), (1994, 'goddard institute for space studies'), (1995, 'goddard institute for space studies'), (1996, 'goddard institute for space studies'), (1997, 'goddard institute for space studies'), (1998, 'goddard institute for space studies'), (1999, 'goddard institute for space studies'), (2000, 'goddard institute for space studies'), (2001, 'goddard institute for space studies'), (2002, 'goddard institute for space studies'), (2003, 'goddard institute for space studies'), (2003, 'goddard space flight center'), (2004, 'goddard institute for space studies'), (2005, 'columbia university'), (2005, 'goddard institute for space studies'), (2006, 'columbia university'), (2006, 'goddard institute for space studies'), (2007, 'goddard institute for space studies'), (2009, 'goddard institute for space studies'), (2010, 'goddard institute for space studies'), (2011, 'goddard institute for space studies'), (2012, 'goddard institute for space studies'), (2013, 'goddard institute for space studies'), (2013, 'goddard space flight center'), (2014, 'goddard institute for space studies'), (2014, 'goddard space flight center'), (2015, 'goddard institute for space studies'), (2015, 'goddard space flight center'), (2016, 'goddard institute for space studies'), (2017, 'goddard institute for space studies'), (2018, 'goddard institute for space studies'), (2019, 'goddard institute for space studies'), (2020, 'goddard institute for space studies'), (2021, 'goddard institute for space studies')) - -6/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : wm -lastname : franklin -middlename : randolph -year_range : (1998,) -main_us_institutions_year : ((1998, 'rensselaer polytechnic institute'),) -all_us_institutions_year : ((1998, 'rensselaer polytechnic institute'),) - -firstname : w -lastname : franklin -middlename : randolph -year_range : (1979, 2020) -main_us_institutions_year : ((1979, 'rensselaer polytechnic institute'), (1981, 'rensselaer polytechnic institute'), (1983, 'rensselaer polytechnic institute'), (1985, 'rensselaer polytechnic institute'), (1986, 'rensselaer polytechnic institute'), (1990, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2009, 'rensselaer polytechnic institute'), (2010, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2013, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2015, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute'), (2017, 'rensselaer polytechnic institute'), (2018, 'rensselaer polytechnic institute'), (2020, 'rensselaer polytechnic institute')) -all_us_institutions_year : ((1979, 'rensselaer polytechnic institute'), (1981, 'rensselaer polytechnic institute'), (1983, 'rensselaer polytechnic institute'), (1985, 'rensselaer polytechnic institute'), (1986, 'rensselaer polytechnic institute'), (1990, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2009, 'rensselaer polytechnic institute'), (2010, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2013, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2015, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute'), (2017, 'rensselaer polytechnic institute'), (2018, 'rensselaer polytechnic institute'), (2020, 'rensselaer polytechnic institute')) - -6/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : malcolm -lastname : mckenna -middlename : None -year_range : (1996,) -main_us_institutions_year : ((1996, 'columbia university'),) -all_us_institutions_year : ((1996, 'columbia university'),) - -firstname : malcolm -lastname : mckenna -middlename : c -year_range : (2004, 2007) -main_us_institutions_year : ((2004, 'university of wyoming'), (2004, 'university of colorado boulder'), (2005, 'university of wyoming'), (2007, 'university of wyoming')) -all_us_institutions_year : ((2004, 'university of colorado boulder'), (2004, 'university of wyoming'), (2005, 'university of wyoming'), (2007, 'university of wyoming')) - -7/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : karen -lastname : damm -middlename : l von -year_range : (2001,) -main_us_institutions_year : ((2001, 'university of new hampshire main campus'),) -all_us_institutions_year : ((2001, 'university of new hampshire main campus'),) - -firstname : k -lastname : damm -middlename : l von -year_range : (1993, 2008) -main_us_institutions_year : ((1993, 'university of new hampshire'), (1995, 'university of new hampshire'), (1996, 'university of new hampshire'), (1997, 'university of new hampshire'), (1998, 'university of new hampshire'), (2001, 'university of new hampshire'), (2002, 'university of new hampshire'), (2003, 'university of new hampshire'), (2005, 'university of new hampshire'), (2006, 'university of new hampshire'), (2008, 'university of new hampshire')) -all_us_institutions_year : ((1993, 'university of new hampshire'), (1995, 'university of new hampshire'), (1996, 'university of new hampshire'), (1997, 'university of new hampshire'), (1998, 'university of new hampshire'), (2001, 'university of new hampshire'), (2002, 'university of new hampshire'), (2003, 'university of new hampshire'), (2005, 'university of new hampshire'), (2006, 'university of new hampshire'), (2008, 'university of new hampshire')) - -7/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : h -lastname : harvey -middlename : rodger -year_range : (2000,) -main_us_institutions_year : ((2000, 'university of maryland college park'),) -all_us_institutions_year : ((2000, 'university of maryland college park'),) - -firstname : h -lastname : harvey -middlename : rodger -year_range : (1997, 2021) -main_us_institutions_year : ((1997, 'university of maryland center for environmental science'), (1999, 'university of maryland center for environmental science'), (2000, 'university of maryland center for environmental science'), (2001, 'university of maryland center for environmental science'), (2003, 'university of maryland center for environmental science'), (2004, 'university of maryland center for environmental science'), (2005, 'university of maryland center for environmental science'), (2006, 'university of maryland center for environmental science'), (2008, 'university of maryland center for environmental science'), (2009, 'university of maryland center for environmental science'), (2010, 'university of maryland center for environmental science'), (2011, 'old dominion university'), (2012, 'old dominion university'), (2013, 'university of maryland center for environmental science'), (2013, 'old dominion university'), (2014, 'old dominion university'), (2016, 'old dominion university'), (2017, 'old dominion university'), (2018, 'old dominion university'), (2019, 'old dominion university'), (2020, 'old dominion university'), (2021, 'old dominion university')) -all_us_institutions_year : ((1997, 'university of maryland center for environmental science'), (1999, 'university of maryland center for environmental science'), (2000, 'university of maryland center for environmental science'), (2001, 'university of maryland center for environmental science'), (2002, 'university of maryland center for environmental science'), (2003, 'university of maryland center for environmental science'), (2004, 'university of maryland center for environmental science'), (2005, 'university of maryland center for environmental science'), (2006, 'university of maryland center for environmental science'), (2008, 'university of maryland center for environmental science'), (2009, 'university of maryland center for environmental science'), (2010, 'university of maryland center for environmental science'), (2011, 'old dominion university'), (2011, 'university of maryland center for environmental science'), (2012, 'old dominion university'), (2012, 'university of maryland center for environmental science'), (2013, 'old dominion university'), (2013, 'university of maryland center for environmental science'), (2014, 'old dominion university'), (2015, 'old dominion university'), (2016, 'old dominion university'), (2017, 'old dominion university'), (2018, 'old dominion university'), (2019, 'old dominion university'), (2020, 'old dominion university'), (2021, 'old dominion university')) - -8/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : milo -lastname : backus -middlename : m -year_range : (1990,) -main_us_institutions_year : ((1990, 'university of texas at austin'),) -all_us_institutions_year : ((1990, 'university of texas at austin'),) - -firstname : mile -lastname : backus -middlename : m -year_range : (1984, 1991) -main_us_institutions_year : ((1984, 'university of texas at austin'), (1986, 'university of texas at austin'), (1987, 'university of texas at austin'), (1990, 'university of texas at austin'), (1991, 'university of texas at austin')) -all_us_institutions_year : ((1984, 'university of texas at austin'), (1986, 'university of texas at austin'), (1987, 'university of texas at austin'), (1990, 'university of texas at austin'), (1991, 'university of texas at austin')) - -9/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : h -lastname : harvey -middlename : rodger -year_range : (2000,) -main_us_institutions_year : ((2000, 'university of maryland college park'),) -all_us_institutions_year : ((2000, 'university of maryland college park'),) - -firstname : h -lastname : harvey -middlename : rodger -year_range : (1997, 2021) -main_us_institutions_year : ((1997, 'university of maryland center for environmental science'), (1999, 'university of maryland center for environmental science'), (2000, 'university of maryland center for environmental science'), (2001, 'university of maryland center for environmental science'), (2003, 'university of maryland center for environmental science'), (2004, 'university of maryland center for environmental science'), (2005, 'university of maryland center for environmental science'), (2006, 'university of maryland center for environmental science'), (2008, 'university of maryland center for environmental science'), (2009, 'university of maryland center for environmental science'), (2010, 'university of maryland center for environmental science'), (2011, 'old dominion university'), (2012, 'old dominion university'), (2013, 'university of maryland center for environmental science'), (2013, 'old dominion university'), (2014, 'old dominion university'), (2016, 'old dominion university'), (2017, 'old dominion university'), (2018, 'old dominion university'), (2019, 'old dominion university'), (2020, 'old dominion university'), (2021, 'old dominion university')) -all_us_institutions_year : ((1997, 'university of maryland center for environmental science'), (1999, 'university of maryland center for environmental science'), (2000, 'university of maryland center for environmental science'), (2001, 'university of maryland center for environmental science'), (2002, 'university of maryland center for environmental science'), (2003, 'university of maryland center for environmental science'), (2004, 'university of maryland center for environmental science'), (2005, 'university of maryland center for environmental science'), (2006, 'university of maryland center for environmental science'), (2008, 'university of maryland center for environmental science'), (2009, 'university of maryland center for environmental science'), (2010, 'university of maryland center for environmental science'), (2011, 'old dominion university'), (2011, 'university of maryland center for environmental science'), (2012, 'old dominion university'), (2012, 'university of maryland center for environmental science'), (2013, 'old dominion university'), (2013, 'university of maryland center for environmental science'), (2014, 'old dominion university'), (2015, 'old dominion university'), (2016, 'old dominion university'), (2017, 'old dominion university'), (2018, 'old dominion university'), (2019, 'old dominion university'), (2020, 'old dominion university'), (2021, 'old dominion university')) - -8/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : milo -lastname : backus -middlename : m -year_range : (1990,) -main_us_institutions_year : ((1990, 'university of texas at austin'),) -all_us_institutions_year : ((1990, 'university of texas at austin'),) - -firstname : mile -lastname : backus -middlename : m -year_range : (1984, 1991) -main_us_institutions_year : ((1984, 'university of texas at austin'), (1986, 'university of texas at austin'), (1987, 'university of texas at austin'), (1990, 'university of texas at austin'), (1991, 'university of texas at austin')) -all_us_institutions_year : ((1984, 'university of texas at austin'), (1986, 'university of texas at austin'), (1987, 'university of texas at austin'), (1990, 'university of texas at austin'), (1991, 'university of texas at austin')) - -9/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : robert -lastname : odom -middlename : i -year_range : (2013,) -main_us_institutions_year : ((2013, 'university of washington'),) -all_us_institutions_year : ((2013, 'university of washington'),) - -firstname : robert -lastname : odom -middlename : i -year_range : (1983, 1988) -main_us_institutions_year : ((1984, 'princeton university'), (1987, 'princeton university'), (1988, 'princeton university')) -all_us_institutions_year : ((1984, 'princeton university'), (1987, 'princeton university'), (1988, 'princeton university')) - -10/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jay -lastname : bass -middlename : d -year_range : (2008,) -main_us_institutions_year : ((2008, 'university of illinois at urbana champaign'),) -all_us_institutions_year : ((2008, 'university of illinois at urbana champaign'),) - -firstname : jay -lastname : bass -middlename : d -year_range : (1984, 1990) -main_us_institutions_year : ((1984, 'california institute of technology'), (1986, 'california institute of technology'), (1990, 'california institute of technology')) -all_us_institutions_year : ((1984, 'california institute of technology'), (1985, 'california institute of technology'), (1986, 'california institute of technology'), (1990, 'california institute of technology')) - -10/10 positive, 16/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : david -lastname : fastovsky -middlename : e -year_range : (2006,) -main_us_institutions_year : ((2006, 'university of rhode island'),) -all_us_institutions_year : ((2006, 'university of rhode island'),) - -firstname : david -lastname : fastovsky -middlename : e -year_range : (1986, 1990) -main_us_institutions_year : ((1986, 'university of wisconsin madison'), (1987, 'university of wisconsin madison'), (1990, 'university of wisconsin madison')) -all_us_institutions_year : ((1986, 'university of wisconsin madison'), (1987, 'university of rhode island'), (1987, 'university of wisconsin madison'), (1990, 'university of wisconsin madison')) - -10/10 positive, 17/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : bennett -middlename : a -year_range : (2014,) -main_us_institutions_year : ((2014, 'university of arizona'),) -all_us_institutions_year : ((2014, 'university of arizona'),) - -firstname : c -lastname : bennett -middlename : a -year_range : (2017, 2021) -main_us_institutions_year : ((2017, 'university of arizona'), (2018, 'university of arizona'), (2019, 'university of arizona'), (2020, 'university of arizona'), (2021, 'university of arizona')) -all_us_institutions_year : ((2017, 'university of arizona'), (2018, 'university of arizona'), (2019, 'university of arizona'), (2020, 'university of arizona'), (2021, 'university of arizona')) - -10/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : andreas -lastname : kronenberg -middlename : k -year_range : (1995,) -main_us_institutions_year : ((1995, 'texas a m university college station'),) -all_us_institutions_year : ((1995, 'texas a m university college station'),) - -firstname : a -lastname : kronenberg -middlename : k -year_range : (1990, 2020) -main_us_institutions_year : ((1990, 'texas a m university'), (1991, 'texas a m university'), (1993, 'texas a m university'), (1996, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2011, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) -all_us_institutions_year : ((1990, 'texas a m university'), (1991, 'texas a m university'), (1993, 'texas a m university'), (1994, 'texas a m university'), (1996, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2011, 'texas a m university'), (2012, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) - -10/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : bennett -middlename : a -year_range : (2014,) -main_us_institutions_year : ((2014, 'university of arizona'),) -all_us_institutions_year : ((2014, 'university of arizona'),) - -firstname : c -lastname : bennett -middlename : a -year_range : (2017, 2021) -main_us_institutions_year : ((2017, 'university of arizona'), (2018, 'university of arizona'), (2019, 'university of arizona'), (2020, 'university of arizona'), (2021, 'university of arizona')) -all_us_institutions_year : ((2017, 'university of arizona'), (2018, 'university of arizona'), (2019, 'university of arizona'), (2020, 'university of arizona'), (2021, 'university of arizona')) - -10/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : andreas -lastname : kronenberg -middlename : k -year_range : (1995,) -main_us_institutions_year : ((1995, 'texas a m university college station'),) -all_us_institutions_year : ((1995, 'texas a m university college station'),) - -firstname : a -lastname : kronenberg -middlename : k -year_range : (1990, 2020) -main_us_institutions_year : ((1990, 'texas a m university'), (1991, 'texas a m university'), (1993, 'texas a m university'), (1996, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2011, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) -all_us_institutions_year : ((1990, 'texas a m university'), (1991, 'texas a m university'), (1993, 'texas a m university'), (1994, 'texas a m university'), (1996, 'texas a m university'), (2004, 'texas a m university'), (2005, 'texas a m university'), (2007, 'texas a m university'), (2008, 'texas a m university'), (2011, 'texas a m university'), (2012, 'texas a m university'), (2013, 'texas a m university'), (2014, 'texas a m university'), (2016, 'texas a m university'), (2017, 'texas a m university'), (2018, 'texas a m university'), (2019, 'texas a m university'), (2020, 'texas a m university')) - -10/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : anthony -lastname : juo -middlename : s r -year_range : (1998,) -main_us_institutions_year : ((1998, 'texas a m university college station'),) -all_us_institutions_year : ((1998, 'texas a m university college station'),) - -firstname : a -lastname : juo -middlename : s r -year_range : (1995, 1997) -main_us_institutions_year : ((1995, 'texas a m university'),) -all_us_institutions_year : ((1995, 'texas a m university'),) - -11/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : robert -lastname : grimm -middlename : e -year_range : (2003,) -main_us_institutions_year : ((2003, 'university of colorado at boulder'),) -all_us_institutions_year : ((2003, 'university of colorado at boulder'),) - -firstname : r -lastname : grimm -middlename : e -year_range : (2008, 2010) -main_us_institutions_year : ((2008, 'colorado school of mines'), (2010, 'colorado school of mines')) -all_us_institutions_year : ((2008, 'colorado school of mines'), (2010, 'colorado school of mines')) - -12/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : an -lastname : yin -middlename : None -year_range : (2007,) -main_us_institutions_year : ((2007, 'university of california los angeles'),) -all_us_institutions_year : ((2007, 'university of california los angeles'),) - -firstname : chingchung -lastname : yin -middlename : None -year_range : (1987, 1992) -main_us_institutions_year : ((1987, 'university of california los angeles'), (1992, 'university of california los angeles')) -all_us_institutions_year : ((1987, 'university of california los angeles'), (1988, 'university of california los angeles'), (1992, 'university of california los angeles')) - -12/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : an -lastname : yin -middlename : None -year_range : (2011,) -main_us_institutions_year : ((2011, 'university of california los angeles'),) -all_us_institutions_year : ((2011, 'university of california los angeles'),) - -firstname : chingchung -lastname : yin -middlename : None -year_range : (1987, 1992) -main_us_institutions_year : ((1987, 'university of california los angeles'), (1992, 'university of california los angeles')) -all_us_institutions_year : ((1987, 'university of california los angeles'), (1988, 'university of california los angeles'), (1992, 'university of california los angeles')) - -12/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : juergen -lastname : schieber -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'indiana university'),) -all_us_institutions_year : ((2015, 'indiana university'),) - -firstname : jurgen -lastname : schieber -middlename : None -year_range : (1986, 2021) -main_us_institutions_year : ((1986, 'university of oregon'), (1988, 'university of oregon'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of texas at arlington'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of texas at arlington'), (1998, 'university of texas at arlington'), (1999, 'university of texas at arlington'), (2000, 'university of texas at arlington'), (2001, 'university of texas at arlington'), (2002, 'university of texas at arlington'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2009, 'indiana university'), (2011, 'indiana university'), (2013, 'indiana university'), (2021, 'indiana university')) -all_us_institutions_year : ((1986, 'university of oregon'), (1988, 'university of oregon'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of texas at arlington'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of texas at arlington'), (1998, 'university of texas at arlington'), (1999, 'university of texas at arlington'), (2000, 'university of texas at arlington'), (2001, 'university of texas at arlington'), (2002, 'university of texas at arlington'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2009, 'indiana university'), (2011, 'indiana university'), (2012, 'indiana university'), (2013, 'indiana university'), (2021, 'indiana university')) - -12/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : alberto -lastname : douce -middlename : emilio patino -year_range : (1998,) -main_us_institutions_year : ((1998, 'university of georgia'),) -all_us_institutions_year : ((1998, 'university of georgia'),) - -firstname : a -lastname : douce -middlename : e patino -year_range : (1995, 1998) -main_us_institutions_year : ((1995, 'university of georgia'), (1998, 'university of georgia')) -all_us_institutions_year : ((1995, 'university of georgia'), (1998, 'university of georgia')) - -13/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : juergen -lastname : schieber -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'indiana university'),) -all_us_institutions_year : ((2015, 'indiana university'),) - -firstname : jurgen -lastname : schieber -middlename : None -year_range : (1986, 2021) -main_us_institutions_year : ((1986, 'university of oregon'), (1988, 'university of oregon'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of texas at arlington'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of texas at arlington'), (1998, 'university of texas at arlington'), (1999, 'university of texas at arlington'), (2000, 'university of texas at arlington'), (2001, 'university of texas at arlington'), (2002, 'university of texas at arlington'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2009, 'indiana university'), (2011, 'indiana university'), (2013, 'indiana university'), (2021, 'indiana university')) -all_us_institutions_year : ((1986, 'university of oregon'), (1988, 'university of oregon'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of texas at arlington'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of texas at arlington'), (1998, 'university of texas at arlington'), (1999, 'university of texas at arlington'), (2000, 'university of texas at arlington'), (2001, 'university of texas at arlington'), (2002, 'university of texas at arlington'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2009, 'indiana university'), (2011, 'indiana university'), (2012, 'indiana university'), (2013, 'indiana university'), (2021, 'indiana university')) - -12/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : alberto -lastname : douce -middlename : emilio patino -year_range : (1998,) -main_us_institutions_year : ((1998, 'university of georgia'),) -all_us_institutions_year : ((1998, 'university of georgia'),) - -firstname : a -lastname : douce -middlename : e patino -year_range : (1995, 1998) -main_us_institutions_year : ((1995, 'university of georgia'), (1998, 'university of georgia')) -all_us_institutions_year : ((1995, 'university of georgia'), (1998, 'university of georgia')) - -13/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : juergen -lastname : schieber -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'indiana university'),) -all_us_institutions_year : ((2015, 'indiana university'),) - -firstname : jurgen -lastname : schieber -middlename : None -year_range : (1986, 2021) -main_us_institutions_year : ((1986, 'university of oregon'), (1988, 'university of oregon'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of texas at arlington'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of texas at arlington'), (1998, 'university of texas at arlington'), (1999, 'university of texas at arlington'), (2000, 'university of texas at arlington'), (2001, 'university of texas at arlington'), (2002, 'university of texas at arlington'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2009, 'indiana university'), (2011, 'indiana university'), (2013, 'indiana university'), (2021, 'indiana university')) -all_us_institutions_year : ((1986, 'university of oregon'), (1988, 'university of oregon'), (1989, 'university of texas at arlington'), (1990, 'university of texas at arlington'), (1991, 'university of texas at arlington'), (1993, 'university of texas at arlington'), (1994, 'university of texas at arlington'), (1995, 'university of texas at arlington'), (1998, 'university of texas at arlington'), (1999, 'university of texas at arlington'), (2000, 'university of texas at arlington'), (2001, 'university of texas at arlington'), (2002, 'university of texas at arlington'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2009, 'indiana university'), (2011, 'indiana university'), (2012, 'indiana university'), (2013, 'indiana university'), (2021, 'indiana university')) - -14/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -Finished labeling -Done in 66.69172107378641 minutes. +reading from /mnt/ssd/DedupeFiles/advisors/settings_geology_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Done in 17.37985785404841 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_history_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_history_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..83be1fd --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_history_christoph_degree0_advisors_9015.log @@ -0,0 +1,636 @@ +Namespace(testing=False, verbose=1, field=['history'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [95457728] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0005679607391357421 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 36.141055234273274 minutes + +Starting active labeling... +firstname : kemal +lastname : karpat +middlename : h +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of wisconsin madison'),) +all_us_institutions_year : ((1993, 'university of wisconsin madison'),) + +firstname : kemal +lastname : karpat +middlename : h +year_range : (1959, 2012) +main_us_institutions_year : ((1978, 'university of wisconsin madison'), (1979, 'university of wisconsin madison'), (1982, 'university of wisconsin madison'), (1983, 'university of wisconsin madison'), (1984, 'university of wisconsin madison'), (1985, 'university of wisconsin madison'), (1986, 'university of wisconsin madison'), (1993, 'university of wisconsin madison'), (1996, 'university of wisconsin madison'), (1997, 'university of wisconsin madison'), (1998, 'university of wisconsin madison'), (2000, 'university of wisconsin madison'), (2004, 'university of wisconsin madison'), (2006, 'university of wisconsin madison'), (2007, 'university of wisconsin madison'), (2008, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison')) +all_us_institutions_year : ((1978, 'university of wisconsin madison'), (1979, 'university of wisconsin madison'), (1982, 'university of wisconsin madison'), (1983, 'university of wisconsin madison'), (1984, 'university of wisconsin madison'), (1985, 'university of wisconsin madison'), (1986, 'university of wisconsin madison'), (1993, 'university of wisconsin madison'), (1996, 'university of wisconsin madison'), (1997, 'university of wisconsin madison'), (1998, 'university of wisconsin madison'), (2000, 'university of wisconsin madison'), (2004, 'university of wisconsin madison'), (2006, 'university of wisconsin madison'), (2007, 'university of wisconsin madison'), (2008, 'university of wisconsin madison'), (2009, 'university of wisconsin madison'), (2011, 'university of wisconsin madison'), (2012, 'university of wisconsin madison')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : ronald +lastname : marcello +middlename : e +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of north texas'),) +all_us_institutions_year : ((2000, 'university of north texas'),) + +firstname : ronald +lastname : marcello +middlename : e +year_range : (1984, 2019) +main_us_institutions_year : ((1984, 'university of north texas'), (1988, 'university of north texas'), (1992, 'university of north texas'), (1994, 'university of north texas'), (2000, 'university of north texas'), (2005, 'university of north texas'), (2019, 'university of north texas')) +all_us_institutions_year : ((1984, 'university of north texas'), (1988, 'university of north texas'), (1992, 'university of north texas'), (1994, 'university of north texas'), (2000, 'university of north texas'), (2005, 'university of north texas'), (2019, 'university of north texas')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jerzy +lastname : linderski +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2000, 'university of north carolina at chapel hill'),) + +firstname : jerzy +lastname : linderski +middlename : None +year_range : (1964, 2008) +main_us_institutions_year : ((1989, 'university of north carolina at chapel hill'), (1990, 'university of north carolina at chapel hill'), (1998, 'university of north carolina at chapel hill'), (2000, 'university of north carolina at chapel hill'), (2003, 'university of north carolina at chapel hill'), (2006, 'university of north carolina at chapel hill'), (2010, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((1989, 'university of north carolina at chapel hill'), (1990, 'university of north carolina at chapel hill'), (1998, 'university of north carolina at chapel hill'), (2000, 'university of north carolina at chapel hill'), (2003, 'university of north carolina at chapel hill'), (2006, 'university of north carolina at chapel hill'), (2010, 'university of north carolina at chapel hill')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : mcgee +middlename : calvin +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of iowa'),) +all_us_institutions_year : ((1990, 'university of iowa'),) + +firstname : michael +lastname : mcgee +middlename : calvin +year_range : (1975, 1997) +main_us_institutions_year : ((1975, 'university of memphis'), (1980, 'university of iowa'), (1983, 'university of iowa'), (1984, 'university of iowa'), (1985, 'university of iowa'), (1986, 'university of iowa'), (1990, 'university of iowa')) +all_us_institutions_year : ((1975, 'university of memphis'), (1980, 'university of iowa'), (1983, 'university of iowa'), (1984, 'university of iowa'), (1985, 'university of iowa'), (1986, 'university of iowa'), (1990, 'university of iowa')) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lisa +lastname : bitel +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of southern california'),) +all_us_institutions_year : ((2011, 'university of southern california'),) + +firstname : lisa +lastname : bitel +middlename : m +year_range : (1992, 2015) +main_us_institutions_year : ((2004, 'university of southern california'), (2006, 'university of southern california'), (2008, 'university of southern california'), (2011, 'university of southern california'), (2015, 'university of southern california')) +all_us_institutions_year : ((2004, 'university of southern california'), (2006, 'university of southern california'), (2008, 'university of southern california'), (2011, 'university of southern california'), (2013, 'university of southern california'), (2015, 'university of southern california'), (2018, 'university of southern california')) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : werner +lastname : baer +middlename : None +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((1998, 'university of illinois at urbana champaign'),) + +firstname : werner +lastname : baer +middlename : None +year_range : (1956, 2019) +main_us_institutions_year : ((1956, 'harvard university'), (1957, 'harvard university'), (1959, 'harvard university'), (1966, 'vanderbilt university'), (1967, 'vanderbilt university'), (1969, 'san jose state university'), (1973, 'vanderbilt university'), (1974, 'university of illinois at urbana champaign'), (1976, 'university of illinois at urbana champaign'), (1980, 'university of illinois at urbana champaign'), (1981, 'university of illinois at urbana champaign'), (1982, 'university of florida'), (1984, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign'), (1988, 'university of illinois at urbana champaign'), (1989, 'hunter college'), (1991, 'university of illinois at urbana champaign'), (1993, 'university of illinois at urbana champaign'), (1994, 'university of illinois at urbana champaign'), (1996, 'university of illinois at urbana champaign'), (1997, 'university of illinois at urbana champaign'), (1998, 'university of illinois at urbana champaign'), (1999, 'university of illinois at urbana champaign'), (2000, 'university of illinois at urbana champaign'), (2001, 'university of illinois at urbana champaign'), (2002, 'university of illinois at urbana champaign'), (2003, 'university of illinois at urbana champaign'), (2005, 'university of illinois at urbana champaign'), (2006, 'university of illinois at urbana champaign'), (2008, 'university of illinois at urbana champaign'), (2010, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2012, 'university of illinois at urbana champaign'), (2013, 'university of illinois at urbana champaign'), (2014, 'university of illinois at urbana champaign'), (2016, 'university of illinois at urbana champaign'), (2018, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1956, 'harvard university'), (1957, 'harvard university'), (1959, 'harvard university'), (1965, 'vanderbilt university'), (1966, 'vanderbilt university'), (1967, 'vanderbilt university'), (1969, 'san jose state university'), (1970, 'vanderbilt university'), (1973, 'vanderbilt university'), (1974, 'university of illinois at urbana champaign'), (1976, 'university of illinois at urbana champaign'), (1980, 'university of illinois at urbana champaign'), (1981, 'university of florida'), (1981, 'university of illinois at urbana champaign'), (1982, 'university of florida'), (1984, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign'), (1988, 'university of illinois at urbana champaign'), (1989, 'hunter college'), (1991, 'university of illinois at urbana champaign'), (1993, 'university of illinois at urbana champaign'), (1994, 'university of illinois at urbana champaign'), (1996, 'university of illinois at urbana champaign'), (1997, 'university of illinois at urbana champaign'), (1998, 'university of illinois at urbana champaign'), (1999, 'university of illinois at urbana champaign'), (2000, 'university of illinois at urbana champaign'), (2001, 'university of illinois at urbana champaign'), (2002, 'university of illinois at urbana champaign'), (2003, 'university of illinois at urbana champaign'), (2005, 'university of illinois at urbana champaign'), (2006, 'university of illinois at urbana champaign'), (2008, 'university of illinois at urbana champaign'), (2009, 'university of illinois at urbana champaign'), (2010, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2012, 'university of illinois at urbana champaign'), (2013, 'university of illinois at urbana champaign'), (2014, 'university of illinois at urbana champaign'), (2016, 'university of illinois at urbana champaign'), (2018, 'university of illinois at urbana champaign')) + +5/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : heather +lastname : streets +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'washington state university'),) +all_us_institutions_year : ((2011, 'washington state university'),) + +firstname : heather +lastname : streetssalter +middlename : None +year_range : (2013, 2021) +main_us_institutions_year : ((2013, 'northeastern university'), (2014, 'northeastern university'), (2017, 'northeastern university'), (2021, 'northeastern university')) +all_us_institutions_year : ((2013, 'northeastern university'), (2014, 'northeastern university'), (2017, 'northeastern university'), (2021, 'northeastern university')) + +6/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : george +lastname : rosenwald +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of michigan'),) +all_us_institutions_year : ((1995, 'university of michigan'),) + +firstname : george +lastname : rosen +middlename : None +year_range : (1943, 2012) +main_us_institutions_year : ((1953, 'columbia university'), (1957, 'columbia university'), (1958, 'columbia university'), (1963, 'columbia university'), (1964, 'columbia university'), (1965, 'columbia university'), (1966, 'columbia university'), (1969, 'columbia university'), (1971, 'yale university'), (1972, 'yale university'), (1973, 'yale university'), (1974, 'yale university'), (1975, 'yale university'), (1976, 'yale university')) +all_us_institutions_year : ((1953, 'columbia university'), (1957, 'columbia university'), (1958, 'columbia university'), (1963, 'columbia university'), (1964, 'columbia university'), (1965, 'columbia university'), (1966, 'columbia university'), (1969, 'columbia university'), (1971, 'yale university'), (1972, 'yale university'), (1973, 'yale university'), (1974, 'yale university'), (1975, 'emory university'), (1975, 'yale university'), (1976, 'yale university')) + +6/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : charles +lastname : fraker +middlename : None +year_range : (1991,) +main_us_institutions_year : ((1991, 'university of michigan'),) +all_us_institutions_year : ((1991, 'university of michigan'),) + +firstname : charles +lastname : frake +middlename : o +year_range : (1995, 2014) +main_us_institutions_year : ((1995, 'university at buffalo'), (1998, 'university at buffalo'), (2014, 'university at buffalo')) +all_us_institutions_year : ((1995, 'university at buffalo'), (1998, 'university at buffalo'), (2014, 'university at buffalo')) + +6/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : wirth +middlename : d +year_range : (1992,) +main_us_institutions_year : ((1992, 'stanford university'),) +all_us_institutions_year : ((1992, 'stanford university'),) + +firstname : john +lastname : worth +middlename : e +year_range : (1995, 2020) +main_us_institutions_year : ((2011, 'university of west florida'), (2012, 'university of west florida'), (2020, 'university of west florida')) +all_us_institutions_year : ((2011, 'university of west florida'), (2012, 'university of west florida'), (2018, 'university of west florida'), (2020, 'university of west florida')) + +6/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : howard +lastname : winters +middlename : d +year_range : (1991,) +main_us_institutions_year : ((1991, 'new york university'),) +all_us_institutions_year : ((1991, 'new york university'),) + +firstname : howard +lastname : sander +middlename : w +year_range : (1987, 2020) +main_us_institutions_year : ((1987, 'state university of new york system'), (1988, 'state university of new york system'), (1989, 'state university of new york system'), (1996, 'new york medical college'), (1997, 'new york medical college'), (1998, 'new york medical college'), (1999, 'st vincent s health system'), (1999, 'new york medical college'), (2000, 'new york medical college'), (2000, 'new york university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'st vincent s health system'), (2007, 'cornell university'), (2007, 'new york medical college'), (2008, 'st vincent s health system'), (2008, 'cornell university'), (2008, 'new york medical college'), (2009, 'st vincent s health system'), (2009, 'cornell university'), (2009, 'new york medical college'), (2012, 'new york university'), (2013, 'new york university'), (2014, 'new york university'), (2015, 'new york university'), (2018, 'new york university'), (2020, 'new york university')) +all_us_institutions_year : ((1987, 'state university of new york system'), (1988, 'state university of new york system'), (1989, 'state university of new york system'), (1996, 'new york medical college'), (1997, 'new york medical college'), (1998, 'new york medical college'), (1998, 'st vincent s health system'), (1999, 'new york medical college'), (1999, 'st vincent s health system'), (2000, 'new york medical college'), (2000, 'new york university'), (2002, 'cornell university'), (2002, 'new york medical college'), (2002, 'new york university'), (2003, 'columbia university'), (2003, 'cornell university'), (2003, 'new york medical college'), (2003, 'new york university'), (2004, 'cornell university'), (2004, 'new york university'), (2005, 'cornell university'), (2005, 'new york university'), (2006, 'cornell university'), (2006, 'university of alabama at birmingham'), (2007, 'catholic medical center'), (2007, 'cornell university'), (2007, 'new york medical college'), (2007, 'st vincent s health system'), (2008, 'cornell university'), (2008, 'new york medical college'), (2008, 'st vincent s health system'), (2009, 'cornell university'), (2009, 'new york medical college'), (2009, 'st vincent s health system'), (2012, 'new york university'), (2013, 'new york university'), (2014, 'new york university'), (2015, 'new york university'), (2018, 'new york university'), (2020, 'new york university')) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : h +lastname : midelfart +middlename : c erik +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of virginia main campus'),) +all_us_institutions_year : ((2010, 'university of virginia main campus'),) + +firstname : h +lastname : midelfort +middlename : c erik +year_range : (1973, 2020) +main_us_institutions_year : ((1973, 'university of virginia'), (1978, 'university of virginia'), (1983, 'university of virginia'), (1984, 'university of virginia'), (1990, 'university of virginia'), (1995, 'university of virginia'), (1996, 'university of virginia'), (1998, 'university of virginia'), (1999, 'university of virginia'), (2000, 'university of virginia'), (2001, 'university of virginia'), (2003, 'university of virginia'), (2006, 'university of virginia'), (2008, 'university of virginia'), (2010, 'university of virginia'), (2018, 'university of virginia'), (2020, 'university of virginia')) +all_us_institutions_year : ((1973, 'university of virginia'), (1978, 'university of virginia'), (1983, 'university of virginia'), (1984, 'university of virginia'), (1990, 'university of virginia'), (1995, 'university of virginia'), (1996, 'university of virginia'), (1998, 'university of virginia'), (1999, 'university of virginia'), (2000, 'university of virginia'), (2001, 'university of virginia'), (2003, 'university of virginia'), (2006, 'university of virginia'), (2008, 'university of virginia'), (2010, 'university of virginia'), (2018, 'university of virginia'), (2019, 'university of virginia'), (2020, 'university of virginia')) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : katherine +lastname : morrissey +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of arizona'),) +all_us_institutions_year : ((2011, 'university of arizona'),) + +firstname : s +lastname : morris +middlename : l +year_range : (1984, 2021) +main_us_institutions_year : ((1985, 'university of arizona'), (1987, 'university of arizona'), (1988, 'university of arizona'), (1991, 'carnegie institution for science'), (1992, 'carnegie institution for science'), (1993, 'carnegie institution for science'), (1996, 'university of hawaii'), (1997, 'university of hawaii'), (1999, 'university of hawaii'), (2001, 'university of hawaii')) +all_us_institutions_year : ((1985, 'university of arizona'), (1987, 'university of arizona'), (1988, 'carnegie institution for science'), (1988, 'university of arizona'), (1991, 'carnegie institution for science'), (1992, 'carnegie institution for science'), (1993, 'carnegie institution for science'), (1996, 'university of hawaii'), (1997, 'national research council'), (1997, 'university of hawaii'), (1998, 'carnegie institution for science'), (1998, 'national research council'), (1998, 'university of hawaii'), (1999, 'university of hawaii'), (2000, 'national research council'), (2000, 'university of hawaii'), (2001, 'national research council'), (2001, 'university of hawaii'), (2001, 'w m keck observatory'), (2002, 'w m keck observatory')) + +7/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rainer +lastname : berger +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of california los angeles'),) +all_us_institutions_year : ((1995, 'university of california los angeles'),) + +firstname : p +lastname : bergeron +middlename : None +year_range : (1984, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2012, 'association of universities for research in astronomy'),) + +7/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mahir +lastname : saul +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((1999, 'university of illinois at urbana champaign'),) + +firstname : mahir +lastname : şaul +middlename : None +year_range : (1986, 2015) +main_us_institutions_year : ((1986, 'university of illinois at urbana champaign'), (1990, 'university of illinois at urbana champaign'), (1993, 'university of illinois at urbana champaign'), (2000, 'university of illinois at urbana champaign'), (2005, 'university of illinois at urbana champaign'), (2007, 'university of illinois at urbana champaign'), (2009, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2015, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1986, 'university of illinois at urbana champaign'), (1990, 'university of illinois at urbana champaign'), (1993, 'university of illinois at urbana champaign'), (2000, 'university of illinois at urbana champaign'), (2005, 'university of illinois at urbana champaign'), (2007, 'university of illinois at urbana champaign'), (2009, 'university of illinois at urbana champaign'), (2011, 'university of illinois at urbana champaign'), (2015, 'university of illinois at urbana champaign')) + +7/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : freedberg +middlename : a +year_range : (1998,) +main_us_institutions_year : ((1998, 'columbia university'),) +all_us_institutions_year : ((1998, 'columbia university'),) + +firstname : david +lastname : greenberg +middlename : a +year_range : (1982, 2020) +main_us_institutions_year : ((1984, 'university of california san francisco'), (1985, 'university of california san francisco'), (1986, 'university of california san francisco'), (1987, 'university of california san francisco'), (1988, 'university of california san francisco'), (1989, 'university of california san francisco'), (1992, 'university of california san francisco'), (1993, 'university of california san francisco'), (1994, 'university of california san francisco'), (1995, 'university of pittsburgh'), (1996, 'university of pittsburgh'), (1997, 'university of pittsburgh'), (1998, 'university of pittsburgh'), (1999, 'university of california san francisco'), (1999, 'university of pittsburgh'), (2000, 'buck institute for research on aging'), (2001, 'buck institute for research on aging'), (2002, 'buck institute for research on aging'), (2003, 'buck institute for research on aging'), (2004, 'buck institute for research on aging'), (2005, 'buck institute for research on aging'), (2006, 'buck institute for research on aging'), (2007, 'buck institute for research on aging'), (2008, 'buck institute for research on aging'), (2009, 'buck institute for research on aging'), (2010, 'buck institute for research on aging'), (2011, 'buck institute for research on aging'), (2012, 'buck institute for research on aging'), (2013, 'buck institute for research on aging'), (2014, 'buck institute for research on aging'), (2015, 'buck institute for research on aging'), (2016, 'buck institute for research on aging'), (2017, 'buck institute for research on aging'), (2019, 'university of pittsburgh')) +all_us_institutions_year : ((1984, 'san francisco general hospital'), (1984, 'university of california san francisco'), (1985, 'university of california san francisco'), (1986, 'university of california san francisco'), (1987, 'san francisco general hospital'), (1987, 'university of california san francisco'), (1988, 'university of california san francisco'), (1989, 'san francisco general hospital'), (1989, 'university of california san francisco'), (1992, 'san francisco general hospital'), (1992, 'university of california san francisco'), (1993, 'university of california san francisco'), (1994, 'university of california san francisco'), (1995, 'university of pittsburgh'), (1996, 'university of pittsburgh'), (1997, 'university of pittsburgh'), (1998, 'university of pittsburgh'), (1999, 'university of california san francisco'), (1999, 'university of pittsburgh'), (2000, 'buck institute for research on aging'), (2001, 'buck institute for research on aging'), (2002, 'buck institute for research on aging'), (2002, 'university of california san francisco'), (2002, 'university of pittsburgh'), (2003, 'buck institute for research on aging'), (2004, 'buck institute for research on aging'), (2004, 'university of pittsburgh'), (2005, 'buck institute for research on aging'), (2005, 'university of pittsburgh'), (2006, 'buck institute for research on aging'), (2007, 'buck institute for research on aging'), (2008, 'buck institute for research on aging'), (2009, 'buck institute for research on aging'), (2010, 'buck institute for research on aging'), (2011, 'buck institute for research on aging'), (2012, 'buck institute for research on aging'), (2013, 'buck institute for research on aging'), (2014, 'buck institute for research on aging'), (2015, 'buck institute for research on aging'), (2016, 'buck institute for research on aging'), (2017, 'buck institute for research on aging'), (2019, 'university of pittsburgh')) + +8/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : velcheru +lastname : rao +middlename : narayana +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of wisconsin madison'),) +all_us_institutions_year : ((2007, 'university of wisconsin madison'),) + +firstname : velcheru +lastname : rao +middlename : narayana +year_range : (1992, 2018) +main_us_institutions_year : ((2007, 'university of wisconsin madison'), (2009, 'university of wisconsin madison')) +all_us_institutions_year : ((2007, 'university of wisconsin madison'), (2009, 'university of wisconsin madison')) + +8/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : albert +lastname : frank +middlename : j von +year_range : (1999,) +main_us_institutions_year : ((1999, 'washington state university'),) +all_us_institutions_year : ((1999, 'washington state university'),) + +firstname : albert +lastname : frank +middlename : j von +year_range : (1989, 2009) +main_us_institutions_year : ((1999, 'washington state university'), (2006, 'washington state university'), (2007, 'washington state university'), (2009, 'washington state university')) +all_us_institutions_year : ((1999, 'washington state university'), (2006, 'washington state university'), (2007, 'washington state university'), (2009, 'washington state university'), (2010, 'washington state university')) + +9/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gordon +lastname : chang +middlename : h +year_range : (1998,) +main_us_institutions_year : ((1998, 'stanford university'),) +all_us_institutions_year : ((1998, 'stanford university'),) + +firstname : gordon +lastname : chang +middlename : h +year_range : (1988, 2019) +main_us_institutions_year : ((1991, 'boston college'), (1991, 'stanford university'), (1992, 'stanford university'), (1995, 'stanford university'), (1998, 'stanford university'), (2004, 'stanford university'), (2005, 'stanford university'), (2008, 'stanford university'), (2015, 'stanford university'), (2018, 'stanford university'), (2019, 'stanford university')) +all_us_institutions_year : ((1991, 'boston college'), (1991, 'stanford university'), (1992, 'stanford university'), (1995, 'stanford university'), (1998, 'stanford university'), (2004, 'stanford university'), (2005, 'stanford university'), (2008, 'stanford university'), (2015, 'stanford university'), (2016, 'stanford university'), (2018, 'stanford university'), (2019, 'stanford university')) + +10/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christine +lastname : ogren +middlename : a +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of iowa'),) +all_us_institutions_year : ((2011, 'university of iowa'),) + +firstname : christine +lastname : ogren +middlename : a +year_range : (1995, 2021) +main_us_institutions_year : ((2006, 'university of iowa'), (2009, 'university of iowa'), (2012, 'university of iowa'), (2015, 'university of iowa'), (2017, 'university of iowa'), (2018, 'university of iowa'), (2019, 'university of iowa'), (2021, 'university of iowa')) +all_us_institutions_year : ((2006, 'university of iowa'), (2009, 'university of iowa'), (2012, 'university of iowa'), (2013, 'university of iowa'), (2015, 'university of iowa'), (2016, 'university of iowa'), (2017, 'university of iowa'), (2018, 'university of iowa'), (2019, 'university of iowa'), (2021, 'university of iowa')) + +11/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : timothy +lastname : colton +middlename : j +year_range : (2014,) +main_us_institutions_year : ((2014, 'harvard university'),) +all_us_institutions_year : ((2014, 'harvard university'),) + +firstname : timothy +lastname : norton +middlename : None +year_range : (2004, 2021) +main_us_institutions_year : ((2004, 'harvard university'), (2012, 'harvard university'), (2014, 'harvard university'), (2016, 'harvard university'), (2019, 'harvard university'), (2020, 'harvard university')) +all_us_institutions_year : ((2004, 'harvard university'), (2012, 'harvard university'), (2014, 'goddard space flight center'), (2014, 'harvard university'), (2014, 'university of maryland baltimore county'), (2015, 'harvard university'), (2016, 'harvard university'), (2017, 'harvard university'), (2018, 'harvard university'), (2019, 'harvard university'), (2020, 'harvard university')) + +12/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : francois +lastname : bon +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'new york university'),) +all_us_institutions_year : ((2015, 'new york university'),) + +firstname : francois +lastname : bonhomme +middlename : None +year_range : (1978, 2020) +main_us_institutions_year : ((1978, 'university of rochester'), (2001, 'sandia national laboratories')) +all_us_institutions_year : ((1978, 'university of rochester'), (1990, 'university of florida'), (1991, 'university of florida'), (1997, 'butler university'), (2001, 'sandia national laboratories'), (2002, 'sandia national laboratories'), (2003, 'sandia national laboratories'), (2004, 'sandia national laboratories'), (2005, 'sandia national laboratories'), (2006, 'sandia national laboratories'), (2007, 'sandia national laboratories'), (2008, 'sandia national laboratories'), (2008, 'state university of new york system'), (2008, 'university of notre dame')) + +12/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : macgregor +lastname : knox +middlename : None +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of rochester'),) +all_us_institutions_year : ((1990, 'university of rochester'),) + +firstname : w +lastname : knox +middlename : h +year_range : (1979, 2021) +main_us_institutions_year : ((1980, 'university of rochester'), (1981, 'university of rochester'), (1982, 'university of rochester'), (1983, 'university of rochester'), (1984, 'university of rochester'), (1985, 'university of rochester'), (1985, 'bell labs'), (1986, 'at t'), (1986, 'bell labs'), (1987, 'bell labs'), (1988, 'bell labs'), (1989, 'bell labs'), (1990, 'bell labs'), (1991, 'bell labs'), (1992, 'bell labs'), (1993, 'bell labs'), (1994, 'bell labs'), (1995, 'bell labs'), (1996, 'bell labs'), (2002, 'university of rochester'), (2003, 'massachusetts institute of technology'), (2003, 'the institute of optics'), (2004, 'university of rochester'), (2005, 'the institute of optics'), (2006, 'the institute of optics'), (2006, 'university of rochester'), (2007, 'the institute of optics'), (2008, 'the institute of optics'), (2009, 'university of rochester'), (2010, 'the institute of optics'), (2011, 'the institute of optics'), (2012, 'the institute of optics'), (2012, 'university of rochester'), (2013, 'the institute of optics'), (2014, 'the institute of optics'), (2015, 'the institute of optics'), (2016, 'university of rochester'), (2017, 'university of rochester'), (2018, 'the institute of optics'), (2019, 'the institute of optics'), (2021, 'the institute of optics')) +all_us_institutions_year : ((1980, 'university of rochester'), (1981, 'university of rochester'), (1982, 'university of rochester'), (1983, 'university of rochester'), (1984, 'at t'), (1984, 'university of rochester'), (1985, 'bell labs'), (1985, 'university of rochester'), (1986, 'at t'), (1986, 'bell labs'), (1987, 'bell labs'), (1988, 'bell labs'), (1989, 'bell labs'), (1990, 'at t'), (1990, 'bell labs'), (1991, 'at t'), (1991, 'bell labs'), (1992, 'at t'), (1992, 'bell labs'), (1993, 'bell labs'), (1994, 'at t'), (1994, 'bell labs'), (1995, 'at t'), (1995, 'bell labs'), (1996, 'at t'), (1996, 'bell labs'), (1998, 'bell labs'), (2000, 'agere systems'), (2000, 'bell labs'), (2001, 'bell labs'), (2001, 'the institute of optics'), (2002, 'bell labs'), (2002, 'university of rochester'), (2003, 'massachusetts institute of technology'), (2003, 'the institute of optics'), (2004, 'the institute of optics'), (2004, 'university of rochester'), (2005, 'the institute of optics'), (2005, 'university of rochester'), (2006, 'the institute of optics'), (2006, 'university of rochester'), (2007, 'the institute of optics'), (2007, 'university of rochester'), (2008, 'the institute of optics'), (2008, 'university of rochester'), (2009, 'the institute of optics'), (2009, 'university of rochester'), (2010, 'the institute of optics'), (2010, 'university of rochester'), (2011, 'the institute of optics'), (2011, 'university of rochester'), (2012, 'the institute of optics'), (2012, 'university of rochester'), (2013, 'the institute of optics'), (2013, 'university of rochester'), (2014, 'the institute of optics'), (2014, 'university of rochester'), (2015, 'the institute of optics'), (2015, 'university of rochester'), (2016, 'university of rochester'), (2017, 'the institute of optics'), (2017, 'university of rochester'), (2018, 'the institute of optics'), (2018, 'university of rochester'), (2019, 'the institute of optics'), (2019, 'university of rochester'), (2020, 'university of rochester'), (2021, 'the institute of optics')) + +12/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : leventhal +middlename : None +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of california los angeles'),) +all_us_institutions_year : ((1996, 'university of california los angeles'),) + +firstname : richard +lastname : leventhal +middlename : m +year_range : (2010, 2018) +main_us_institutions_year : ((2010, 'university of pennsylvania'), (2014, 'university of pennsylvania'), (2018, 'university of pennsylvania')) +all_us_institutions_year : ((2010, 'university of pennsylvania'), (2014, 'university of pennsylvania'), (2018, 'university of pennsylvania')) + +12/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : brigham +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of massachusetts amherst'),) +all_us_institutions_year : ((1999, 'university of massachusetts amherst'),) + +firstname : john +lastname : brigham +middlename : c +year_range : (1972, 2020) +main_us_institutions_year : ((1972, 'florida state university'), (1973, 'florida state university'), (1974, 'florida state university'), (1976, 'florida state university'), (1977, 'florida state university'), (1978, 'florida state university'), (1979, 'florida state university'), (1980, 'florida state university'), (1982, 'florida state university'), (1983, 'florida state university'), (1985, 'florida state university'), (1986, 'florida state university'), (1987, 'florida state university'), (1988, 'florida state university'), (1989, 'florida state university'), (1990, 'florida state university'), (1991, 'florida state university'), (1992, 'florida state university'), (1993, 'florida state university'), (1994, 'florida state university'), (1996, 'florida state university'), (1997, 'florida state university'), (1998, 'florida state university'), (1999, 'florida state university'), (2001, 'florida state university'), (2002, 'florida state university'), (2004, 'florida state university'), (2005, 'florida state university'), (2012, 'florida state university'), (2020, 'florida state university')) +all_us_institutions_year : ((1972, 'florida state university'), (1973, 'florida state university'), (1974, 'florida state university'), (1976, 'florida state university'), (1977, 'florida state university'), (1978, 'florida state university'), (1979, 'florida state university'), (1980, 'florida state university'), (1982, 'florida state university'), (1983, 'florida state university'), (1985, 'florida state university'), (1986, 'florida state university'), (1987, 'florida state university'), (1988, 'florida state university'), (1989, 'florida state university'), (1990, 'florida state university'), (1991, 'florida state university'), (1992, 'florida state university'), (1993, 'florida state university'), (1994, 'florida state university'), (1996, 'florida state university'), (1997, 'florida state university'), (1998, 'florida state university'), (1999, 'florida state university'), (2001, 'florida state university'), (2002, 'florida state university'), (2004, 'florida state university'), (2005, 'florida state university'), (2007, 'florida state university'), (2008, 'florida state university'), (2012, 'florida state university'), (2016, 'florida state university'), (2020, 'florida state university')) + +12/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : cox +middlename : robert +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2007, 'university of north carolina at chapel hill'),) + +firstname : john +lastname : cox +middlename : k +year_range : (2008, 2017) +main_us_institutions_year : ((2008, 'north dakota state university'), (2009, 'north dakota state university'), (2010, 'north dakota state university'), (2012, 'north dakota state university'), (2017, 'north dakota state university')) +all_us_institutions_year : ((2008, 'north dakota state university'), (2009, 'north dakota state university'), (2010, 'north dakota state university'), (2012, 'north dakota state university'), (2015, 'north dakota state university'), (2017, 'north dakota state university')) + +12/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : fishman +middlename : l +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of michigan'),) +all_us_institutions_year : ((2015, 'university of michigan'),) + +firstname : robert +lastname : fishman +middlename : None +year_range : (1977, 2018) +main_us_institutions_year : ((1980, 'rutgers university'), (1982, 'rutgers university'), (1983, 'rutgers university'), (1986, 'rutgers university'), (1991, 'rutgers university'), (1993, 'rutgers university'), (1995, 'rutgers university'), (2000, 'rutgers university')) +all_us_institutions_year : ((1980, 'rutgers university'), (1982, 'rutgers university'), (1983, 'rutgers university'), (1986, 'rutgers university'), (1991, 'rutgers university'), (1993, 'rutgers university'), (1995, 'rutgers university'), (2000, 'rutgers university'), (2015, 'university of michigan')) + +12/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : rogin +middlename : None +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of california berkeley'),) +all_us_institutions_year : ((1998, 'university of california berkeley'),) + +firstname : michael +lastname : rogin +middlename : paul +year_range : (1975, 1996) +main_us_institutions_year : ((1983, 'university of colorado boulder'),) +all_us_institutions_year : ((1983, 'university of colorado boulder'),) + +13/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : tunde +lastname : adeleke +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of montana'),) +all_us_institutions_year : ((2006, 'university of montana'),) + +firstname : tunde +lastname : adeleke +middlename : None +year_range : (2007, 2018) +main_us_institutions_year : ((2007, 'iowa state university'), (2008, 'iowa state university'), (2011, 'iowa state university'), (2015, 'iowa state university'), (2016, 'iowa state university'), (2018, 'iowa state university')) +all_us_institutions_year : ((2007, 'iowa state university'), (2008, 'iowa state university'), (2011, 'iowa state university'), (2015, 'iowa state university'), (2016, 'iowa state university'), (2018, 'iowa state university')) + +13/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 176.8237717986107 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_history_christoph_degree0_graduates_8515.log b/src/dataprep/temp/trainlink_mag_proquest_history_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..6d9bc1f --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_history_christoph_degree0_graduates_8515.log @@ -0,0 +1,732 @@ +Namespace(testing=False, verbose=1, field=['history'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=50000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [95457728] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0007546623547871907 minutes + + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +INFO:dedupe.canopy_index:Removing stop word er +INFO:dedupe.canopy_index:Removing stop word an +INFO:dedupe.canopy_index:Removing stop word ar +INFO:dedupe.canopy_index:Removing stop word st +INFO:dedupe.canopy_index:Removing stop word ma +INFO:dedupe.canopy_index:Removing stop word in +INFO:dedupe.canopy_index:Removing stop word al +INFO:dedupe.canopy_index:Removing stop word en +INFO:dedupe.canopy_index:Removing stop word ll +INFO:dedupe.canopy_index:Removing stop word le +INFO:dedupe.canopy_index:Removing stop word ha +INFO:dedupe.canopy_index:Removing stop word re +INFO:dedupe.canopy_index:Removing stop word ro +INFO:dedupe.canopy_index:Removing stop word ar +INFO:dedupe.canopy_index:Removing stop word or +INFO:dedupe.canopy_index:Removing stop word er +INFO:dedupe.canopy_index:Removing stop word ra +INFO:dedupe.canopy_index:Removing stop word ch +INFO:dedupe.canopy_index:Removing stop word el +INFO:dedupe.canopy_index:Removing stop word la +INFO:dedupe.canopy_index:Removing stop word ri +INFO:dedupe.canopy_index:Removing stop word an +INFO:dedupe.canopy_index:Removing stop word ne +INFO:dedupe.canopy_index:Removing stop word an +INFO:dedupe.canopy_index:Removing stop word in +INFO:dedupe.canopy_index:Removing stop word on +INFO:dedupe.canopy_index:Removing stop word el +INFO:dedupe.canopy_index:Removing stop word on +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:TfidfTextSearchPredicate: (0.2, lastname) +Time elapsed: 20.9757049202919 minutes + +Starting active labeling... +firstname : claudia +lastname : schmid +middlename : None +year : 1995 +year_papertitle : ((1995, 'the vitoria eddy and its relation to the brazil current'), (1997, 'flow and recirculation of antarctic intermediate water across the rio grande rise'), (1998, 'float experiment studies interocean exchanges at the tip of africa'), (1999, 'intermediate water in the brazil malvinas confluence zone a lagrangian view'), (1999, 'kinematic elements of antarctic intermediate water in the western south atlantic'), (1999, 'the intermediate depth circulation of the western south atlantic'), (2000, 'dynamics of intermediate water circulation in the subtropical south atlantic'), (2001, 'new observations of the intermediate depth circulation in the tropical atlantic')) +keywords : frozenset({'climatology', 'oceanography'}) + +firstname : claudia +lastname : schmidt +middlename : maria +year : 1992 +year_papertitle : ((1992, 'history and rationality hume s philosophy as a social theory of knowledge'),) +keywords : frozenset({'history', 'philosophy'}) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : kenneth +lastname : vickery +middlename : p +year : 1983 +year_papertitle : ((1983, 'the rise of settler power in southern rhodesia zimbabwe 1893 1923'), (1985, 'saving settlers maize control in northern rhodesia'), (1986, 'black and white in southern zambia the tonga plateau economy and british imperialism 1890 1939'), (1986, 'business in the shadow of apartheid u s firms in south africa'), (1986, 'challenging rural poverty experiences in institution building and popular participation for rural development in eastern africa'), (1986, 'human rights and development in africa'), (1988, 'southern africa an american enigma'), (1989, 'the second world war revival of forced labor in the rhodesias')) +keywords : frozenset({'ancient history', 'economic growth', 'economy', 'law and economics', 'development economics', 'political economy', 'economic history'}) + +firstname : kenneth +lastname : vickers +middlename : wayne +year : 2000 +year_papertitle : ((2000, 't s stribling 1881 1965 a life of the tennessee novelist'),) +keywords : frozenset({'american history', 'american literature', 'biographies'}) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christopher +lastname : hallinan +middlename : j +year : 1987 +year_papertitle : ((1987, 'the importance of questionnaire wording assessments of weights as an example'), (1988, 'college students perceived risk and seriousness of aids'), (1988, 'muslim and judaic christian perceptions of desirable body shape'), (1989, 'black and white adolescents perceptions of their weight'), (1990, 'values held by prospective coaches towards women s sport participation'), (1991, 'aborigines and positional segregation in australian rugby league'), (1991, 'perceptions of current and ideal body shape of athletes and nonathletes'), (1993, 'body shape perceptions of elderly women exercisers and nonexercisers'), (1994, 'the presentation of human biological diversity in sport and exercise science textbooks the example of race')) +keywords : frozenset({'cognitive psychology', 'demography', 'gerontology', 'developmental psychology', 'applied psychology', 'clinical psychology', 'social science', 'social psychology', 'criminology'}) + +firstname : christopher +lastname : hill +middlename : l +year : 1992 +year_papertitle : ((1992, 'archaeological and pleistocene geology of acheulian and middle paleolithic sites in the bir tarfawi region of the southern egyptian sahara'),) +keywords : frozenset({'archaeology', 'paleoecology', 'geography', 'geology'}) + +0/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ivan +lastname : wong +middlename : g +year : 1983 +year_papertitle : ((1983, 'comments and reply on seismicity of the colorado lineament comment'), (1983, 'deep intraplate seismicity in the western sierra nevada central california'), (1983, 'recent seismicity near capitol reef national park utah and its tectonic implications'), (1984, 'the crownpoint new mexico earthquakes of 1976 and 1977'), (1986, 'the 14 august 1983 cimarron colorado earthquake and the cimarron fault'), (1989, 'contemporary seismicity faulting and the state of stress in the colorado plateau'), (1989, 'observations of mine seismicity in the eastern wasatch plateau utah u s a a possible case of implosional failure'), (1989, 'seismicity of eastern contra costa county san francisco bay region california'), (1990, 'deep intraplate earthquakes in the western united states and their relationship to lithospheric temperatures'), (1990, 'geologic hazards assessment at the idaho national engineering laboratory southeastern idaho abstract')) +keywords : frozenset({'archaeology', 'seismology', 'geomorphology'}) + +firstname : diana +lastname : wong +middlename : pickworth +year : 1999 +year_papertitle : ((1999, 'stamp seals of the ancient yemen'),) +keywords : frozenset({'archaeology'}) + +0/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : caroline +lastname : light +middlename : None +year : 2013 +year_papertitle : ((2013, 'a predominant cause of distress gender benevolence and the agunah in regional perspective'), (2014, 'that pride of race and character the roots of jewish benevolence in the jim crow south'), (2015, 'from a duty to retreat to stand your ground the race and gender politics of do it yourself defense'), (2016, 'carolina israelite how harry golden made us care about jews the south and civil rights by kimberly marlowe hartnett review')) +keywords : frozenset({'religious studies', 'political economy', 'law', 'gender studies', 'social psychology'}) + +firstname : steven +lastname : light +middlename : andrew +year : 1998 +year_papertitle : ((1998, 'there s more than meets the eye southern cities and minority political empowerment following the 1965 voting rights act'),) +keywords : frozenset({'political science', 'law', 'black history'}) + +0/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : martha +lastname : grundy +middlename : paxson +year : 2007 +year_papertitle : ((2007, 'learning to be quaker spiritual formation and religious education among early friends'),) +keywords : frozenset({'religious studies'}) + +firstname : martha +lastname : grundy +middlename : paxson +year : 1990 +year_papertitle : ((1990, 'in the world but not of it quaker faith and the dominant culture middletown meeting bucks county pennsylvania 1750 1850'),) +keywords : frozenset({'religious history', 'american history', 'families family life', 'sociology', 'personal relationships'}) + +0/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : schaefer +middlename : philip rode +year : 2014 +year_papertitle : ((2014, 'divining the self a study in yoruba myth and human consciousness'), (2015, 'cairo pop youth music in contemporary egypt daniel j gilman minneapolis university of minnesota press 2014 xi 256 pp'), (2016, 'discrete discreet appropriation paul bowles non western music and race in tangier'), (2017, 'frontstage backstage participatory music and the festive sacred in essaouira morocco'), (2017, 'trickster theatre the poetics of freedom in urban africa jesse weaver shipley bloomington indiana university press 2015 308 pp')) +keywords : frozenset({'theology', 'aesthetics', 'ethnology', 'anthropology', 'religious studies', 'art history', 'law', 'literature'}) + +firstname : john +lastname : schaefer +middlename : philip rode +year : 2009 +year_papertitle : ((2009, 'moroccan modern race aesthetics and identity in a global culture market'),) +keywords : frozenset({'folklore', 'middle eastern history', 'cultural anthropology'}) + +0/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dietrich +lastname : stout +middlename : None +year : 2000 +year_papertitle : ((2000, 'stone tool making and brain activation position emission tomography pet studies'), (2001, 'constraint and adaptation in primate brain evolution'), (2002, 'skill and cognition in stone tool production an ethnographic case study from irian jaya'), (2002, 'thinking and doing in cognitive archaeology giving skill its due'), (2003, '2 6 million year old stone tools and associated bones from ogs 6 and ogs 7 gona afar ethiopia'), (2004, 'paleoenvironments of the earliest stone toolmakers gona ethiopia'), (2005, 'neural foundations of perception and action in stone knapping'), (2005, 'raw material selectivity of the earliest stone toolmakers at gona afar ethiopia'), (2005, 'the social and cultural context of stone knapping skill acquisition'), (2006, 'acheulean toolmaking and hominin brain evolution a pilot study using positron emission tomography')) +keywords : frozenset({'cognitive psychology', 'cartography', 'archaeology', 'cognitive science', 'artificial intelligence', 'data science', 'mathematics education', 'neuroscience', 'paleontology', 'media studies', 'social science', 'evolutionary biology'}) + +firstname : dietrich +lastname : stout +middlename : w +year : 2003 +year_papertitle : ((2003, 'stone tools and the evolution of human thinking cultural biological and archaeological elements in an anthropology of human origins'),) +keywords : frozenset({'archaeology', 'physical anthropology', 'cognitive therapy'}) + +1/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gillian +lastname : ahlgren +middlename : t w +year : 1994 +year_papertitle : ((1994, 'lucrecia s dreams politics and prophecy in sixteenth century spain by richard l kagan berkely university of california press 1990 ix 229 pp 24 95'), (1994, 'relaciones sobre el estado de las diocesis valencianas by maria milagros carcel orti 3 volumes valencia generalitat valenciana conselleria de cultura educacio i ciencia 1989'), (1994, 'the divine romance teresa of avila s narrative theology by joseph f chorpenning o s f s values and ethics series 4 chicago loyola university press 1992 xiv 176 pp 15 95'), (1995, 'god in la mancha religious reform and the people of cuenca 1500 1650 by sara t nalle the johns hopkins university studies in historical and political science 110th series 2 baltimore md the johns hopkins university press 1992 xviii 306 pp'), (1995, 'negotiating sanctity holy women in sixteenth century spain'), (1997, 'from madrid to purgatory the art and craft of dying in sixteenth century spain by carlos m n eire new york cambridge university press 1995 xiii 571 pp 49 95'), (1997, 'frontiers of heresy the spanish inquisition from the basque lands to sicily by william monter new york cambridge university press 1990 xiv 345 pp 39 50'), (1997, 'the visions of sor maria de agreda writing knowledge and power by clark colahan tucson ariz university of arizona press 1994 ix 194 pp')) +keywords : frozenset({'ancient history', 'theology', 'ethnology', 'religious studies', 'art history', 'gender studies', 'economic history'}) + +firstname : gillian +lastname : ahlgren +middlename : t w +year : 1991 +year_papertitle : ((1991, 'teresa de jesus a case study in mystical creativity and inquisitional censure'),) +keywords : frozenset({'religious history'}) + +2/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chris +lastname : danielson +middlename : None +year : 2013 +year_papertitle : ((2013, 'the politics of voter suppression defending and expanding americans right to vote'), (2013, 'tim s r boyd georgia democrats the civil rights movement and the shaping of the new south'), (2016, 'minion k c morrison aaron henry of mississippi inside agitator'), (2018, 'the dream is lost voting rights and the politics of race in richmond virginia'), (2020, 'poll power the voter education project and the movement for the ballot in the american south')) +keywords : frozenset({'art history', 'law', 'gender studies', 'public administration'}) + +firstname : christopher +lastname : danielson +middlename : alan +year : 2006 +year_papertitle : ((2006, 'the voting rights act and the creation of black politics in mississippi 1965 1986'),) +keywords : frozenset({'american history', 'political science', 'black history'}) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yung +lastname : kim +middlename : suk +year : 2009 +year_papertitle : ((2009, 'paul judaism and the gentiles'), (2011, 'imitators mimetai in 1 cor 4 16 and 11 1 a new reading of threefold embodiment')) +keywords : frozenset({'religious studies', 'psychoanalysis', 'theology'}) + +firstname : sungjae +lastname : kim +middlename : None +year : 1996 +year_papertitle : ((1996, 'the ger and the identity of ancient israel socio literary analysis and deconstructive interpretation'),) +keywords : frozenset({'biblical studies', 'ancient history', 'theology', 'social structure', 'classical studies', 'classical literature'}) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : edwards +middlename : c +year : 1993 +year_papertitle : ((1993, 'gap analysis a geographic approach for assessing national biological diversity'), (1996, 'adequacy of wildlife habitat relation models for estimating spatial distributions of terrestrial vertebrates'), (1996, 'data defensibility and gap analysis'), (1998, 'assessing map accuracy in a remotely sensed ecoregion scale cover map'), (1999, 'use of generalized linear models and digital data in a forest inventory of northern utah')) +keywords : frozenset({'forestry', 'cartography', 'engineering physics', 'remote sensing', 'environmental resource management'}) + +firstname : thomas +lastname : edwards +middlename : joseph skrabak +year : 1989 +year_papertitle : ((1989, 'karl adam and friedrich heiler on the essence of catholicism'),) +keywords : frozenset({'religious history', 'theology'}) + +3/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ben +lastname : craver +middlename : d +year : 2009 +year_papertitle : ((2009, 'connecting non full time faculty to institutional mission a guidebook for college university administrators and faculty developers by leora baron nixon'), (2010, 'teaching death and dying edited by christopher m moreman'), (2010, 'the american university in a postsecular age edited by douglas jacobsen and rhonda hustedt jacobsen'), (2013, 'elements of college teaching by david k irving madison wisc atwood publishing 2011 93 pages isbn 978 1 891859 86 1 19 95'), (2013, 'how learning works seven research based principles for smart teaching by susan a ambrose michael w bridges michele dipietro marsha c lovett and marie k norman san francisco calif jossey bass 2010 xxii 301 pages isbn 978 0 470 48410 4 38 00'), (2013, 'what shall we say evil suffering and the crisis of faith by thomas g long eerdmans 2011 isbn 978 0 8028 6514 4 xiv 158 pp 25'), (2014, 'five big ideas for effective teaching connecting mind brain and education research to classroom practice by donna wilson and marcus conyers new york n y teachers college press 2013 xii 196 pages isbn 13978 0807754252 28 95'), (2014, 'jesus after modernity a twenty first century critique of our modern concept of truth and the truth of the gospel by james p danaher james clarke 2011 isbn 978 0 227 68001 8 xi 145 pp 27'), (2014, 'making sense of death and immortality by paul badham spck 2013 isbn 978 0 281 06458 8 ix 86 pp 14'), (2016, 'the pursuit of the soul psychoanalysis soul making and the christian tradition peter tyler bloomsbury t t clark 2016 isbn 978 0 56714 077 7 vi 200 pp 14 99 reviews')) +keywords : frozenset({'theology', 'gerontology', 'psychoanalysis', 'religious studies', 'art history', 'management', 'media studies', 'environmental ethics', 'humanities', 'pedagogy', 'classics'}) + +firstname : bennie +lastname : craver +middlename : dale +year : 1994 +year_papertitle : ((1994, 'the divine government of the world the function of providence in the theology of friedrich schleiermacher'),) +keywords : frozenset({'philosophy of religion', 'religious history', 'theology'}) + +3/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : adrian +lastname : bantjes +middlename : a +year : 2001 +year_papertitle : ((2001, 'daniela spenser the impossible triangle mexico soviet russia and the united states in the 1920s durham nc and london duke university press 1999 pp xiv 254 34 00 11 95 pb'), (2002, 'fragments of a golden age the politics of culture in mexico since 1940 review'), (2005, 'review popular piety and political identity in mexico s cristero rebellion michoacan 1927 29'), (2006, 'juan soldado rapist murderer martyr saint'), (2006, 'reconciling modernity urban state formation in 1940s leon mexico by daniel newcomer lincoln the university of nebraska press 2004 pp x 288 illustrations notes bibliography index 50 00 cloth'), (2006, 'reconciling modernity urban state formation in 1940s leon mexico review'), (2008, 'surviving mexico s dirty war a political prisoner s memoir')) +keywords : frozenset({'theology', 'ethnology', 'anthropology', 'religious studies', 'art history', 'law', 'media studies', 'gender studies', 'economic history', 'classics'}) + +firstname : adrian +lastname : bantjes +middlename : alexander +year : 1991 +year_papertitle : ((1991, 'politics class and culture in post revolutionary mexico cardenismo and sonora 1929 1940'),) +keywords : frozenset({'political science', 'religious history', 'labor relations', 'latin american history'}) + +3/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : tone +middlename : lawrence +year : 1994 +year_papertitle : ((1994, 'the fatal knot the guerrilla war in navarre and the defeat of napoleon in spain'), (1995, 'the most monstrous of wars the napoleonic guerrilla war in southern italy 1806 1811 the fatal knot the guerrilla war in navarre and the defeat of napoleon in spain'), (1998, 'the machete and the liberation of cuba'), (1999, 'la guerrilla espanola y la derrota de napoleon'), (2001, 'spaniards and nazi germany by wayne bowen'), (2001, 'the prisoners of cabrera napoleon s forgotten soldiers 1809 1814')) +keywords : frozenset({'ancient history', 'theology', 'ethnology', 'demography', 'law'}) + +firstname : john +lastname : tone +middlename : l +year : 1989 +year_papertitle : ((1989, 'the fatal knot popular resistance and guerrilla warfare in navarre 1808 1814'),) +keywords : frozenset({'european history'}) + +4/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : peter +lastname : boag +middlename : g +year : 1989 +year_papertitle : ((1989, 'dear friends the civil war letters of francis marion elliott a pennsylvania country boy'), (1991, 'the making of the american landscape edited by michael p conzen boston mas sachusetts unwin hyman 1990 xvi 433 pp illustrations list of contributors notes bibliography index cloth 55 00 paper 24 95'), (1993, 'to reclaim a divided west water law and public policy 1848 1902'), (1994, 'keepers of the flame the role of fire in american culture 1775 1925 by margaret hindle hazen and robert m hazen princeton princeton university press 1992 x 281 pp 29 95 isbn 0 691 04809 6')) +keywords : frozenset({'law and economics', 'religious studies', 'art history', 'law', 'classics'}) + +firstname : peter +lastname : boag +middlename : guy +year : 1988 +year_papertitle : ((1988, 'the calapooian matrix landscape and experience on a western frontier'),) +keywords : frozenset({'american history'}) + +5/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : daniel +lastname : silva +middlename : b domingues da +year : 2008 +year_papertitle : ((2008, 'the atlantic slave trade to maranhao 1680 1846 volume routes and organisation'), (2008, 'the slave trade to pernambuco 1561 1851'), (2010, 'the supply of slaves from luanda 1768 1806 records of anselmo da fonseca coutinho'), (2013, 'using african names to identify the origins of captives in the transatlantic slave trade crowd sourcing and the registers of liberated africans 1808 1862'), (2014, 'the diaspora of africans liberated from slave ships in the nineteenth century'), (2015, 'crossings africa the americas and the atlantic slave trade by james walvin london england reaktion books 2013 pp 272 35 00'), (2015, 'the kimbundu diaspora to brazil records from the slave ship brilhante 1838')) +keywords : frozenset({'ancient history', 'ethnology', 'economy', 'demography', 'genealogy', 'economic history'}) + +firstname : daniel +lastname : silva +middlename : barros domingues da +year : 2011 +year_papertitle : ((2011, 'crossroads slave frontiers of angola c1780 1867'),) +keywords : frozenset({'african history', 'latin american history', 'black history'}) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lorraine +lastname : volo +middlename : bayard de +year : 2001 +year_papertitle : ((2001, 'mothers of heroes and martyrs gender identity politics in nicaragua 1979 1999'), (2002, 'after revolution mapping gender and cultural politics in neoliberal nicaragua by florence e babb 2000 austin university of texas press 2001 reviewed by lorraine bayard de volo'), (2003, 'analyzing politics and change in women s organizations'), (2003, 'engendering revolution explaining women s increased participation in guerrilla movements women and guerrilla movements nicaragua el salvador chiapas cuba'), (2003, 'radical women in latin america left and right'), (2003, 'service and surveillance infrapolitics at work among casino cocktail waitresses'), (2003, 'the mixed blessings of war and motherhood mothers of heroes and martyrs gender identity politics in nicaragua 1979 to 1999'), (2004, 'from the inside out ethnographic methods in political research'), (2004, 'ilja a luciak after the revolution gender and democracy in el salvador nicaragua and guatemala baltimore md the johns hopkins university 2001 pp xi 297 27 95 pb'), (2004, 'mobilizing mothers for war cross national framing strategies in nicaragua s contra war')) +keywords : frozenset({'theology', 'development economics', 'religious studies', 'social science', 'political economy', 'epistemology', 'public relations', 'humanities', 'gender studies', 'economic history'}) + +firstname : lorraine +lastname : volo +middlename : marie bayard de +year : 1996 +year_papertitle : ((1996, 'heroes martyrs and mothers maternal identity politics in revolutionary nicaragua'),) +keywords : frozenset({'womens studies', 'latin american history', 'political science', 'social structure', 'social psychology'}) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : schneider +middlename : c +year : 1990 +year_papertitle : ((1990, 'should america go to war the debate over foreign policy in chicago 1939 1941'), (1991, 'arms politics and the economy historical and contemporary perspectives'), (1992, 'henry kissinger doctor of diplomacy by robert d schulzinger new york columbia university press 1989 xii 291 pp cloth 27 95 isbn 0 231 06952 9 paper 13 95 isbn 0 231 06953 7')) +keywords : frozenset({'political economy', 'economy', 'humanities'}) + +firstname : james +lastname : schneider +middlename : dennis +year : 1999 +year_papertitle : ((1999, 'documentary film in the public sphere pare lorentz s the river and its alternatives'),) +keywords : frozenset({'american history', 'motion pictures', 'rhetoric', 'composition'}) + +8/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nancy +lastname : deusen +middlename : van +year : 1982 +year_papertitle : ((1982, 'style nationality and the sequence in the middle ages'), (1982, 'the medieval latin sequence a complete catalogue of the sources and edition of the texts and melodies'), (1985, 'origins of a significant medieval genre the musical trope up to the twelfth century'), (1985, 'the image of the harp and trecento reception of plato s phaedo')) +keywords : frozenset({'art history', 'telecommunications', 'linguistics', 'literature'}) + +firstname : nancy +lastname : deusen +middlename : elena van +year : 1995 +year_papertitle : ((1995, 'recogimiento for women and girls in colonial lima an institutional and cultural practice'),) +keywords : frozenset({'womens studies', 'latin american history'}) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : lewis +middlename : b +year : 2004 +year_papertitle : ((2004, 'the book of corrections reflections on the national crisis during the japanese invasion of korea 1592 1598 by songnyong yu translated by choi byonghyon berkeley institute of east asian studies university of california 2002 xi 249 pp 20 00 paper'), (2006, 'accounting techniques in korea 18th century archival samples from a non profit association in the sinitic world'), (2006, 'wages rents and interest rates in southern korea 1700 to 1900'), (2008, 'korean expansion and decline from the seventeenth to the nineteenth century a view suggested by adam smith'), (2009, 'stability or decline demand or supply'), (2010, 'a scroll of the 1748 korean embassy to japan preserved in the british museum'), (2011, 'robert i hellyer defining engagement japan and global contexts 1640 1868 harvard east asian monographs number 326 cambridge mass harvard university asia center 2009 pp xvi 281 39 95'), (2011, 'the wanli emperor and ming china s defence of korea against japan')) +keywords : frozenset({'ancient history', 'labour economics', 'anthropology', 'macroeconomics', 'economic geography', 'classical economics', 'market economy', 'media studies', 'economic history', 'accounting'}) + +firstname : james +lastname : lewis +middlename : welborn +year : 1987 +year_papertitle : ((1987, 'at home in the city mainstream protestantism in gary indiana 1906 1983'),) +keywords : frozenset({'religious history'}) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : carolyn +lastname : brown +middlename : j +year : 1983 +year_papertitle : ((1983, 'children s discourse competence an evaluation of the development of inferential processes'), (1985, 'a comparison of ap and abr tuning characteristics in the guinea pig'), (1987, 'comparison of cineradiographic and photodetection techniques for assessing velopharyngeal function during speech'), (1988, 'electrically evoked brainstem potentials in cochlear implant patients with multi electrode stimulation'), (1988, 'the association between acoustic and articulatory events in a delayed auditory feedback paradigm'), (1990, 'electrically evoked whole nerve action potentials data from human cochlear implant users'), (1990, 'electrically evoked whole nerve action potentials parametric data from the cat'), (1990, 'variations in self paced behaviors in stutterers and nonstutterers')) +keywords : frozenset({'cognitive psychology', 'linguistics', 'orthodontics', 'dentistry', 'audiology', 'acoustics', 'biomedical engineering', 'developmental psychology'}) + +firstname : carolyn +lastname : brown +middlename : anderson +year : 1985 +year_papertitle : ((1985, 'a history of the development of workers consciousness of the coal miners at enugu government colliery nigeria 1914 1950 labor class industrial relations ethnic'),) +keywords : frozenset({'african history'}) + +8/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : norma +lastname : anderson +middlename : None +year : 2015 +year_papertitle : ((2015, 'picturing development in malawi'), (2017, 'ephemeral development agendas and the process of priority shifts in malawi'), (2017, 'teaching note bring class concepts to life implementing intensive interview projects for deep learning'), (2017, 'thinking globally interviewing locally using an intensive interview project to teach globalization and social change'), (2018, 'book review the one true universal barbara ehrenreich natural causes an epidemic of wellness the certainty of dying and killing ourselves to live longer new york twelve 2018'), (2018, 'editor s notebook the personal is political')) +keywords : frozenset({'economic growth', 'artificial intelligence', 'social science', 'mathematics education', 'environmental ethics', 'pedagogy', 'gender studies'}) + +firstname : norman +lastname : anderson +middlename : reginald +year : 1992 +year_papertitle : ((1992, 'a comparison of the college aspirations and achievements of afro american secondary students from a rural county on the eastern shore of maryland during pre desegregation 1965 1969 immediate desegregation 1970 1974 and later desegregation 1985 1989 periods'),) +keywords : frozenset({'african american studies', 'secondary education', 'black history', 'education history', 'ethnic studies'}) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : victor +lastname : lopez +middlename : None +year : 2002 +year_papertitle : ((2002, 'hip dislocations in athletes'), (2004, 'single point fixation for shoulder instability'), (2006, 'familiality of polarity at illness onset in bipolar affective disorder'), (2007, 'attempted suicide in bipolar disorder pedigrees evidence for linkage to 2p12'), (2007, 'nested association between genetic variation in tryptophan hydroxylase ii bipolar affective disorder and suicide attempts'), (2007, 'the bipolar disorder phenome database a resource for genetic studies')) +keywords : frozenset({'database', 'orthodontics', 'psychiatry', 'physical therapy', 'clinical psychology', 'genetics'}) + +firstname : victoriano +lastname : lopez +middlename : roncero +year : 1988 +year_papertitle : ((1988, 'el concepto historiografico y politico en las obras de quevedo spanish text'),) +keywords : frozenset({'european history', 'romance literature'}) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jodi +lastname : eastberg +middlename : r b +year : 2011 +year_papertitle : ((2011, 'valuing in decision making ability teaching learning and assessment across the curriculum and campus culture at alverno college'), (2012, 'beyond the annotated bibliography engaging students with library collections')) +keywords : frozenset({'medical education', 'library science', 'pedagogy', 'information retrieval'}) + +firstname : jodi +lastname : eastberg +middlename : rhea bartley +year : 2009 +year_papertitle : ((2009, 'west meets east british perceptions of china through the life and works of sir george thomas staunton 1781 1859'),) +keywords : frozenset({'european history', 'history'}) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : matthew +lastname : emerson +middlename : y +year : 2012 +year_papertitle : ((2012, 'book review engaging the word the new testament and the christian believer'), (2012, 'book review jesus paul and the people of god a theological dialogue with n t wright'), (2013, 'arbitrary allegory typical typology or intertextual interpretation paul s use of the pentateuch in galatians 4 21 31'), (2013, 'christ and the new creation a canonical approach to the theology of the new testament'), (2013, 'i will be your god and you will be my people attachment theory and the grand narrative of scripture'), (2013, 'summaries of israel s story reviewing a compositional category'), (2014, 'book review god of the living a biblical theology'), (2014, 'book review the hermeneutics of the apostolic proclamation the center of paul s method of scriptural interpretationthe hermeneutics of the apostolic proclamation the center of paul s method of scriptural interpretation by batesmatthew w waco tx baylor university press 2012 pp vii 400 cloth 69 95'), (2014, 'book review the kingdom of god as liturgical empire a theological commentary on 1 2 chroniclesthe kingdom of god as liturgical empire a theological commentary on 1 2 chronicles by hahnscott w grand rapids mi baker academic 2012 pp xi 225 paper 24 99'), (2014, 'book review the messiah his brothers and the nationsthe messiah his brothers and the nations by hoodjason b london uk t t clark 2011 pp xii 193 cloth 110')) +keywords : frozenset({'religious studies', 'psychoanalysis', 'literature', 'theology'}) + +firstname : matthew +lastname : emerson +middlename : charles +year : 1988 +year_papertitle : ((1988, 'decorated clay tobacco pipes from the chesapeake'),) +keywords : frozenset({'archaeology', 'american history', 'american studies'}) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : eric +lastname : mogren +middlename : w +year : 2001 +year_papertitle : ((2001, 'the animals came dancing native american sacred ecology and animal kinship howard l harrod tucson the university of arizona press 2000 220 pages'),) +keywords : frozenset({'gerontology', 'anthropology'}) + +firstname : eric +lastname : mogren +middlename : thomas +year : 2011 +year_papertitle : ((2011, 'governance in the united states columbia river basin an historical analysis'),) +keywords : frozenset({'american history', 'aquatic sciences', 'social structure', 'public administration'}) + +8/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : celeste +lastname : bustamante +middlename : gonzalez de +year : 2009 +year_papertitle : ((2009, 'the past and the future of brazilian television news'), (2010, 'club de senoritas productions of mexican femininity in the 1950s'), (2013, 'muy buenas noches mexico television and the cold war'), (2014, 'journalism in times of violence social media use by us and mexican journalists working in northern mexico'), (2014, 'silencing mexico a study of influences on journalists in the northern states'), (2016, 'professionalism under threat of violence journalism reflexivity and the potential for collective professional autonomy in northern mexico')) +keywords : frozenset({'cartography', 'social psychology', 'public relations', 'multimedia', 'media studies', 'gender studies', 'economic history', 'criminology'}) + +firstname : celestine +lastname : bustamante +middlename : gonzalez de +year : 2006 +year_papertitle : ((2006, 'i tele visiones i tele visions the making of mexican television news 1950 1970'),) +keywords : frozenset({'journalism', 'latin american history'}) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : miller +middlename : t +year : 2004 +year_papertitle : ((2004, 'a jury of one s godless peers'), (2004, 'religion uniquely disfavored'), (2005, 'the legal death of terri schiavo'), (2006, 'darwin in dover pa'), (2008, 'intervention and free markets'), (2008, 'the coase theorem and the preferential option for the poor'), (2008, 'wrongful omissions by corporate directors stone v ritter and adapting the process model of the delaware business judgment rule'), (2009, 'canceling the deal two models of material adverse change clauses in business combination agreements'), (2009, 'the economics of deal risk allocating risk through mac clauses in business combination agreements'), (2009, 'toward a theory of human rights')) +keywords : frozenset({'economic policy', 'religious studies', 'art history', 'law', 'positive economics', 'environmental ethics', 'actuarial science'}) + +firstname : robert +lastname : miller +middlename : joseph +year : 1986 +year_papertitle : ((1986, 'prophecy and persecution in luke acts martyr prophet'),) +keywords : frozenset({'biblical studies', 'religious history'}) + +9/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : greg +lastname : guelcher +middlename : p +year : 2004 +year_papertitle : ((2004, 'the making of japanese manchuria 1904 1932 by yoshihisa tak matsusaka cambridge harvard university asia center 2001 522 pp 52 00 hardcover isbn 0 674 00369 1'), (2010, 'a history of pashtun migration 1775 2006 by robert nichols')) +keywords : frozenset({'political economy', 'humanities', 'economic history'}) + +firstname : gregory +lastname : guelcher +middlename : paul +year : 1999 +year_papertitle : ((1999, 'dreams of empire the japanese agricultural colonization of manchuria 1931 1945 in history and memory'),) +keywords : frozenset({'history', 'international law', 'international relations', 'agricultural economics'}) + +9/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : greg +lastname : robinson +middlename : None +year : 2001 +year_papertitle : ((2001, 'by order of the president'), (2003, 'mendez v westminister asian latino coalition triumphant'), (2004, 'mine okubo the new york years'), (2005, 'korematsu and beyond japanese americans and the origins of strict scrutiny'), (2007, 'paul laurence dunbar a credit to his race'), (2008, 'mine okubo following her own road')) +keywords : frozenset({'applied mathematics', 'art history', 'law', 'humanities', 'literature'}) + +firstname : gregory +lastname : robinson +middlename : keith +year : 2011 +year_papertitle : ((2011, 'the trickster archetype tracing the trickster myths to their proto trickster roots'),) +keywords : frozenset({'american history', 'native american studies'}) + +10/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dong +lastname : yoon +middlename : sup +year : 1996 +year_papertitle : ((1996, 'a case of paraesophageal hernia repaired by laparoscopic approach'), (1996, 'laparoscopy assisted radical subtotal gastrectomy for early gastric carcinoma'), (1997, 'breast conserving therapy in stage i ii breast cancer in korea'), (1997, 'surgical treatment of intestinal beh c c et s disease'), (1997, 'surgical treatment of intestinal beha et s disease'), (1997, 'surgical treatment of intestinal behcet s disease'), (1998, 'cell proliferation index and the expression of p53 and bcl 2 in tumorous and non tumorous lesions of hepatocellular carcinoma and metastatic liver cancer'), (1999, 'expression of biliary antigen and its clinical significance in hepatocellular carcinoma'), (1999, 'non alcoholic duct destructive chronic pancreatitis recognition before definitive treatment'), (1999, 'villous adenoma of the bile ducts a case report and a review of the reported cases in korea')) +keywords : frozenset({'cancer research', 'pathology', 'surgery', 'general surgery', 'gastroenterology', 'internal medicine'}) + +firstname : dongyoung +lastname : yoon +middlename : None +year : 2010 +year_papertitle : ((2010, 'gad and nathan the portrayal of socio religious roles and functions of david s prophets in the light of ancient near eastern prophet king relationship'),) +keywords : frozenset({'biblical studies', 'near eastern studies', 'religious history', 'judaic studies'}) + +10/10 positive, 19/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jesse +lastname : spohnholz +middlename : None +year : 2007 +year_papertitle : ((2007, 'olympias and chrysostom the debate over wesel s reformed deaconesses 1568 1609'), (2008, 'multiconfessional celebration of the eucharist in sixteenth century wesel'), (2013, 'from priest s whore to pastor s wife clerical marriage and the process of reform in the early german reformation by marjorie elizabeth plummer burlington vt ashgate 2012 pp xvii 340 cloth 119 95 isbn 987 1 4094 4154 0'), (2014, 'calvinism and religious exile during the revolt of the netherlands 1568 1609')) +keywords : frozenset({'theology', 'gender studies', 'religious studies', 'classics'}) + +firstname : jesse +lastname : spohnholz +middlename : albert +year : 2004 +year_papertitle : ((2004, 'strangers and neighbors the tactics of toleration in the dutch exile community of wesel 1550 1590'),) +keywords : frozenset({'european history', 'history', 'religious history'}) + +10/10 positive, 20/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +INFO:rlr.crossvalidation:using cross validation to find optimum alpha... +/home/christoph/anaconda3/envs/science-career-tempenv/lib/python3.9/site-packages/rlr/crossvalidation.py:116: RuntimeWarning: invalid value encountered in double_scalars + matthews_cc = ((true_dupes * true_distinct +INFO:rlr.crossvalidation:optimum alpha: 0.000010, score 0.583195042160939 +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:SimplePredicate: (firstTokenPredicate, lastname) +Done in 31.282833449045818 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_materials science_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_materials science_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..e771465 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_materials science_christoph_degree0_advisors_9015.log @@ -0,0 +1,738 @@ +Namespace(testing=False, verbose=1, field=['materials science'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [192562407] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0005853454271952311 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 269.55807259480156 minutes + +Starting active labeling... +firstname : r +lastname : dover +middlename : bruce van +year_range : (2009,) +main_us_institutions_year : ((2009, 'cornell university'),) +all_us_institutions_year : ((2009, 'cornell university'),) + +firstname : r +lastname : dover +middlename : b van +year_range : (1982, 2021) +main_us_institutions_year : ((1983, 'bell labs'), (1986, 'bell labs'), (1987, 'bell labs'), (1988, 'bell labs'), (1989, 'bell labs'), (1991, 'bell labs'), (1992, 'bell labs'), (1993, 'bell labs'), (1994, 'bell labs'), (1995, 'bell labs'), (1996, 'bell labs'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2018, 'cornell university'), (2021, 'cornell university')) +all_us_institutions_year : ((1983, 'bell labs'), (1986, 'bell labs'), (1987, 'bell labs'), (1988, 'bell labs'), (1989, 'bell labs'), (1990, 'bell labs'), (1991, 'bell labs'), (1992, 'bell labs'), (1993, 'bell labs'), (1994, 'bell labs'), (1995, 'bell labs'), (1996, 'bell labs'), (1998, 'bell labs'), (1999, 'bell labs'), (2005, 'cornell university'), (2006, 'cornell university'), (2007, 'bell labs'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2018, 'cornell university'), (2021, 'cornell university')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : r +lastname : pease +middlename : fabian +year_range : (1991,) +main_us_institutions_year : ((1991, 'stanford university'),) +all_us_institutions_year : ((1991, 'stanford university'),) + +firstname : r +lastname : pease +middlename : f w +year_range : (1870, 2012) +main_us_institutions_year : ((1870, 'stanford university'), (1966, 'university of california berkeley'), (1967, 'university of california berkeley'), (1981, 'stanford university'), (1983, 'stanford university'), (1984, 'stanford university'), (1986, 'stanford university'), (1987, 'stanford university'), (1988, 'stanford university'), (1989, 'stanford university'), (1990, 'stanford university'), (1991, 'stanford university'), (1992, 'stanford university'), (1993, 'stanford university'), (1994, 'stanford university'), (1995, 'stanford university'), (1996, 'stanford university'), (1997, 'stanford university'), (2003, 'stanford university'), (2006, 'stanford university'), (2007, 'stanford university'), (2008, 'stanford university'), (2009, 'stanford university'), (2010, 'stanford university'), (2011, 'stanford university'), (2012, 'stanford university')) +all_us_institutions_year : ((1870, 'stanford university'), (1966, 'university of california berkeley'), (1967, 'university of california berkeley'), (1981, 'stanford university'), (1982, 'stanford university'), (1983, 'stanford university'), (1984, 'stanford university'), (1986, 'stanford university'), (1987, 'stanford university'), (1988, 'stanford university'), (1989, 'stanford university'), (1990, 'stanford university'), (1991, 'stanford university'), (1992, 'stanford university'), (1993, 'stanford university'), (1994, 'stanford university'), (1995, 'stanford university'), (1996, 'stanford university'), (1997, 'stanford university'), (1998, 'stanford university'), (1999, 'stanford university'), (2000, 'stanford university'), (2001, 'stanford university'), (2003, 'stanford university'), (2005, 'stanford university'), (2006, 'stanford university'), (2007, 'stanford university'), (2008, 'stanford university'), (2009, 'stanford university'), (2010, 'stanford university'), (2011, 'stanford university'), (2012, 'stanford university')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : boon +lastname : teo +middlename : t +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of illinois chicago'),) +all_us_institutions_year : ((2002, 'university of illinois chicago'),) + +firstname : boon +lastname : teo +middlename : k +year_range : (1981, 2017) +main_us_institutions_year : ((1987, 'university of illinois at chicago'), (1988, 'university of illinois at chicago'), (1990, 'university of illinois at chicago'), (1991, 'university of illinois at chicago'), (1992, 'university of illinois at chicago'), (1993, 'university of illinois at chicago'), (1995, 'university of illinois at chicago'), (1997, 'university of illinois at chicago'), (1998, 'university of illinois at chicago'), (1999, 'university of illinois at chicago'), (2000, 'university of illinois at chicago'), (2001, 'university of illinois at chicago'), (2002, 'university of illinois at chicago'), (2003, 'university of illinois at chicago'), (2004, 'university of illinois at chicago'), (2005, 'university of illinois at chicago'), (2006, 'university of illinois at chicago'), (2007, 'university of illinois at chicago'), (2009, 'university of illinois at chicago'), (2010, 'university of illinois at chicago'), (2011, 'university of illinois at chicago'), (2012, 'university of illinois at chicago'), (2013, 'university of illinois at chicago'), (2016, 'university of illinois at chicago'), (2017, 'university of illinois at chicago')) +all_us_institutions_year : ((1987, 'university of illinois at chicago'), (1988, 'university of illinois at chicago'), (1989, 'university of illinois at chicago'), (1990, 'university of illinois at chicago'), (1991, 'university of illinois at chicago'), (1992, 'university of illinois at chicago'), (1993, 'university of illinois at chicago'), (1995, 'university of illinois at chicago'), (1997, 'university of illinois at chicago'), (1998, 'university of illinois at chicago'), (1999, 'university of illinois at chicago'), (2000, 'university of illinois at chicago'), (2001, 'university of illinois at chicago'), (2001, 'wright patterson air force base'), (2002, 'university of illinois at chicago'), (2002, 'wright patterson air force base'), (2003, 'university of illinois at chicago'), (2004, 'university of illinois at chicago'), (2005, 'university of illinois at chicago'), (2006, 'university of illinois at chicago'), (2007, 'university of illinois at chicago'), (2009, 'university of illinois at chicago'), (2010, 'university of illinois at chicago'), (2011, 'university of illinois at chicago'), (2012, 'university of illinois at chicago'), (2013, 'university of illinois at chicago'), (2016, 'university of illinois at chicago'), (2017, 'university of illinois at chicago')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jonathan +lastname : owen +middlename : s +year_range : (2015,) +main_us_institutions_year : ((2015, 'columbia university'),) +all_us_institutions_year : ((2015, 'columbia university'),) + +firstname : jonathan +lastname : owen +middlename : j +year_range : (1991, 1998) +main_us_institutions_year : ((1994, 'university of california santa barbara'), (1995, 'university of california santa barbara'), (1998, 'university of california santa barbara')) +all_us_institutions_year : ((1994, 'university of california santa barbara'), (1995, 'university of california santa barbara'), (1998, 'university of california santa barbara')) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : roger +lastname : barker +middlename : l +year_range : (1992,) +main_us_institutions_year : ((1992, 'north carolina state university'),) +all_us_institutions_year : ((1992, 'north carolina state university'),) + +firstname : roger +lastname : barker +middlename : a +year_range : (1988, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((1999, 'norwich university'),) + +3/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mircea +lastname : grigoriu +middlename : None +year_range : (1998,) +main_us_institutions_year : ((1998, 'cornell university'),) +all_us_institutions_year : ((1998, 'cornell university'),) + +firstname : mircea +lastname : grigoriu +middlename : None +year_range : (1977, 2021) +main_us_institutions_year : ((1977, 'massachusetts institute of technology'), (1980, 'cornell university'), (1981, 'cornell university'), (1982, 'cornell university'), (1984, 'cornell university'), (1986, 'cornell university'), (1987, 'cornell university'), (1988, 'cornell university'), (1989, 'cornell university'), (1990, 'cornell university'), (1991, 'cornell university'), (1992, 'cornell university'), (1993, 'cornell university'), (1994, 'cornell university'), (1995, 'cornell university'), (1996, 'cornell university'), (1997, 'cornell university'), (1998, 'cornell university'), (1999, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2019, 'cornell university'), (2020, 'cornell university'), (2021, 'cornell university')) +all_us_institutions_year : ((1977, 'massachusetts institute of technology'), (1980, 'cornell university'), (1981, 'cornell university'), (1982, 'cornell university'), (1984, 'cornell university'), (1985, 'cornell university'), (1986, 'cornell university'), (1987, 'cornell university'), (1988, 'cornell university'), (1989, 'cornell university'), (1990, 'cornell university'), (1991, 'cornell university'), (1992, 'cornell university'), (1993, 'cornell university'), (1994, 'cornell university'), (1995, 'cornell university'), (1996, 'cornell university'), (1997, 'cornell university'), (1998, 'cornell university'), (1999, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2004, 'cornell university'), (2006, 'cornell university'), (2007, 'cornell university'), (2007, 'sandia national laboratories'), (2008, 'cornell university'), (2008, 'university at buffalo'), (2009, 'cornell university'), (2009, 'sandia national laboratories'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2019, 'cornell university'), (2020, 'cornell university'), (2021, 'cornell university')) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : fuller +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'georgia institute of technology'),) +all_us_institutions_year : ((2008, 'georgia institute of technology'),) + +firstname : thomas +lastname : fuller +middlename : f +year_range : (1992, 2021) +main_us_institutions_year : ((1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1994, 'lawrence berkeley national laboratory'), (2002, 'utc power'), (2007, 'georgia institute of technology'), (2007, 'georgia tech research institute'), (2008, 'georgia institute of technology'), (2008, 'georgia tech research institute'), (2009, 'georgia institute of technology'), (2010, 'georgia institute of technology'), (2010, 'georgia tech research institute'), (2011, 'georgia institute of technology'), (2012, 'georgia institute of technology'), (2013, 'georgia institute of technology'), (2014, 'georgia institute of technology'), (2015, 'georgia institute of technology'), (2015, 'georgia tech research institute'), (2016, 'georgia institute of technology'), (2017, 'georgia institute of technology'), (2018, 'georgia institute of technology'), (2019, 'georgia institute of technology'), (2020, 'georgia institute of technology')) +all_us_institutions_year : ((1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1994, 'lawrence berkeley national laboratory'), (2000, 'university of california berkeley'), (2000, 'utc power'), (2002, 'utc power'), (2007, 'georgia institute of technology'), (2007, 'georgia tech research institute'), (2008, 'georgia institute of technology'), (2008, 'georgia tech research institute'), (2008, 'utc power'), (2009, 'georgia institute of technology'), (2009, 'georgia tech research institute'), (2010, 'georgia institute of technology'), (2010, 'georgia tech research institute'), (2011, 'georgia institute of technology'), (2011, 'georgia tech research institute'), (2012, 'georgia institute of technology'), (2013, 'georgia institute of technology'), (2014, 'georgia institute of technology'), (2014, 'georgia tech research institute'), (2015, 'georgia institute of technology'), (2015, 'georgia tech research institute'), (2016, 'georgia institute of technology'), (2017, 'georgia institute of technology'), (2018, 'georgia institute of technology'), (2019, 'georgia institute of technology'), (2020, 'georgia institute of technology')) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : justin +lastname : hanes +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'johns hopkins university'),) +all_us_institutions_year : ((2009, 'johns hopkins university'),) + +firstname : justin +lastname : hanes +middlename : None +year_range : (1995, 2021) +main_us_institutions_year : ((1995, 'massachusetts institute of technology'), (1996, 'massachusetts institute of technology'), (1997, 'massachusetts institute of technology'), (1998, 'massachusetts institute of technology'), (1999, 'johns hopkins university'), (2001, 'johns hopkins university school of medicine'), (2002, 'johns hopkins university'), (2003, 'johns hopkins university school of medicine'), (2003, 'johns hopkins university'), (2004, 'johns hopkins university'), (2005, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university school of medicine'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university school of medicine'), (2013, 'johns hopkins university'), (2014, 'johns hopkins university school of medicine'), (2015, 'johns hopkins university school of medicine'), (2016, 'johns hopkins university school of medicine'), (2017, 'johns hopkins university school of medicine'), (2018, 'johns hopkins university school of medicine'), (2019, 'johns hopkins university school of medicine'), (2020, 'johns hopkins university school of medicine')) +all_us_institutions_year : ((1995, 'massachusetts institute of technology'), (1996, 'massachusetts institute of technology'), (1997, 'johns hopkins university school of medicine'), (1997, 'massachusetts institute of technology'), (1997, 'pennsylvania state university'), (1998, 'johns hopkins university'), (1998, 'massachusetts institute of technology'), (1999, 'johns hopkins university'), (2001, 'johns hopkins university school of medicine'), (2001, 'massachusetts institute of technology'), (2001, 'university of pennsylvania'), (2002, 'johns hopkins university'), (2002, 'johns hopkins university school of medicine'), (2002, 'massachusetts institute of technology'), (2003, 'johns hopkins university'), (2003, 'johns hopkins university school of medicine'), (2004, 'johns hopkins university'), (2005, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2009, 'johns hopkins university school of medicine'), (2010, 'johns hopkins university'), (2010, 'johns hopkins university school of medicine'), (2011, 'johns hopkins university'), (2011, 'johns hopkins university school of medicine'), (2012, 'johns hopkins university'), (2012, 'johns hopkins university school of medicine'), (2013, 'johns hopkins university'), (2013, 'johns hopkins university school of medicine'), (2014, 'johns hopkins university'), (2014, 'johns hopkins university school of medicine'), (2015, 'johns hopkins university'), (2015, 'johns hopkins university school of medicine'), (2016, 'johns hopkins university'), (2016, 'johns hopkins university school of medicine'), (2017, 'johns hopkins university'), (2017, 'johns hopkins university school of medicine'), (2018, 'johns hopkins university'), (2018, 'johns hopkins university school of medicine'), (2019, 'johns hopkins university'), (2019, 'johns hopkins university school of medicine'), (2020, 'johns hopkins university'), (2020, 'johns hopkins university school of medicine')) + +5/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : haydn +lastname : chen +middlename : None +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((1997, 'university of illinois at urbana champaign'),) + +firstname : haydn +lastname : chen +middlename : None +year_range : (1977, 2020) +main_us_institutions_year : ((1977, 'northwestern university'), (1980, 'university of illinois at urbana champaign'), (1981, 'university of illinois at urbana champaign'), (1982, 'university of illinois at urbana champaign'), (1983, 'university of illinois at urbana champaign'), (1984, 'university of illinois at urbana champaign'), (1985, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign'), (1988, 'hodges university'), (1989, 'hodges university'), (1990, 'university of illinois at urbana champaign'), (1991, 'university of illinois at urbana champaign'), (1992, 'university of illinois at urbana champaign'), (1993, 'university of illinois at urbana champaign'), (1994, 'university of illinois at urbana champaign'), (1994, 'hodges university'), (1995, 'hodges university'), (1996, 'university of illinois at urbana champaign'), (1997, 'university of illinois at urbana champaign'), (1998, 'hodges university'), (1999, 'university of illinois at urbana champaign'), (2000, 'university of illinois at urbana champaign'), (2001, 'hodges university'), (2012, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1977, 'argonne national laboratory'), (1977, 'northwestern university'), (1980, 'university of illinois at urbana champaign'), (1981, 'university of illinois at urbana champaign'), (1982, 'university of illinois at urbana champaign'), (1983, 'university of illinois at urbana champaign'), (1984, 'university of illinois at urbana champaign'), (1985, 'hodges university'), (1985, 'university of illinois at urbana champaign'), (1986, 'hodges university'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign'), (1988, 'hodges university'), (1988, 'university of illinois at urbana champaign'), (1989, 'hodges university'), (1989, 'university of illinois at urbana champaign'), (1990, 'hodges university'), (1990, 'university of illinois at urbana champaign'), (1991, 'hodges university'), (1991, 'university of illinois at urbana champaign'), (1992, 'hodges university'), (1992, 'university of illinois at urbana champaign'), (1993, 'hodges university'), (1993, 'university of illinois at urbana champaign'), (1994, 'hodges university'), (1994, 'university of illinois at urbana champaign'), (1995, 'hodges university'), (1995, 'university of illinois at urbana champaign'), (1996, 'hodges university'), (1996, 'university of illinois at urbana champaign'), (1997, 'hodges university'), (1997, 'university of illinois at urbana champaign'), (1998, 'hodges university'), (1998, 'university of illinois at urbana champaign'), (1999, 'hodges university'), (1999, 'university of illinois at urbana champaign'), (2000, 'hodges university'), (2000, 'university of illinois at urbana champaign'), (2001, 'hodges university'), (2002, 'hodges university'), (2002, 'university of illinois at urbana champaign'), (2003, 'hodges university'), (2003, 'university of illinois at urbana champaign'), (2004, 'hodges university'), (2004, 'university of illinois at urbana champaign'), (2005, 'hodges university'), (2006, 'hodges university'), (2006, 'university of illinois at urbana champaign'), (2011, 'hodges university'), (2012, 'university of illinois at urbana champaign'), (2013, 'hodges university'), (2019, 'university of illinois at urbana champaign')) + +6/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : henrique +lastname : reis +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2007, 'university of illinois at urbana champaign'),) + +firstname : henrique +lastname : reis +middlename : l +year_range : (2014, 2015) +main_us_institutions_year : ((2014, 'university of illinois at urbana champaign'), (2015, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((2014, 'university of illinois at urbana champaign'), (2015, 'university of illinois at urbana champaign')) + +7/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kyaw +lastname : u +middlename : tha paw +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of california davis'),) +all_us_institutions_year : ((2000, 'university of california davis'),) + +firstname : kyaw +lastname : u +middlename : tha paw +year_range : (1975, 2021) +main_us_institutions_year : ((1975, 'massachusetts institute of technology'), (1979, 'yale university'), (1980, 'yale university'), (1982, 'purdue university'), (1983, 'purdue university'), (1984, 'purdue university'), (1985, 'purdue university'), (1986, 'university of california davis'), (1987, 'university of california davis'), (1988, 'university of california davis'), (1989, 'university of california davis'), (1992, 'university of california davis'), (1993, 'university of california davis'), (1995, 'university of california davis'), (1997, 'university of california'), (2000, 'university of california davis'), (2002, 'university of california davis'), (2003, 'university of california davis'), (2004, 'university of california'), (2004, 'university of california davis'), (2005, 'university of california davis'), (2006, 'university of california davis'), (2007, 'university of california davis'), (2008, 'university of california davis'), (2009, 'university of california davis'), (2010, 'university of california davis'), (2011, 'university of california davis'), (2012, 'university of california davis'), (2013, 'university of california davis'), (2014, 'university of california davis'), (2016, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis'), (2020, 'university of california davis'), (2021, 'university of california davis')) +all_us_institutions_year : ((1975, 'massachusetts institute of technology'), (1979, 'yale university'), (1980, 'yale university'), (1982, 'purdue university'), (1983, 'purdue university'), (1984, 'purdue university'), (1985, 'purdue university'), (1986, 'university of california davis'), (1987, 'university of california davis'), (1988, 'university of california davis'), (1989, 'university of california davis'), (1992, 'university of california davis'), (1993, 'university of california davis'), (1995, 'university of california davis'), (1997, 'university of california'), (2000, 'university of california davis'), (2002, 'university of california davis'), (2003, 'university of california davis'), (2004, 'university of california'), (2004, 'university of california davis'), (2005, 'university of california davis'), (2006, 'university of california'), (2006, 'university of california davis'), (2007, 'university of california davis'), (2008, 'university of california davis'), (2009, 'university of california davis'), (2010, 'university of california davis'), (2011, 'university of california davis'), (2012, 'university of california davis'), (2013, 'university of california davis'), (2014, 'university of california'), (2014, 'university of california davis'), (2015, 'university of california davis'), (2016, 'massachusetts institute of technology'), (2016, 'university of california'), (2016, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'massachusetts institute of technology'), (2019, 'university of california davis'), (2020, 'university of california davis'), (2021, 'university of california davis')) + +7/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : l +lastname : popoleski +middlename : d timmie +year_range : (1997,) +main_us_institutions_year : ((1997, 'university of maryland baltimore county'),) +all_us_institutions_year : ((1997, 'university of maryland baltimore county'),) + +firstname : l +lastname : topoleski +middlename : d timmie +year_range : (1996, 2020) +main_us_institutions_year : ((1996, 'university of maryland baltimore county'), (1998, 'university of maryland baltimore county'), (1999, 'university of maryland baltimore county'), (2000, 'university of maryland baltimore county'), (2007, 'university of maryland baltimore county'), (2010, 'university of maryland baltimore county'), (2013, 'university of maryland baltimore county'), (2014, 'university of maryland baltimore county'), (2015, 'university of maryland baltimore county'), (2016, 'university of maryland baltimore county'), (2019, 'university of maryland baltimore county'), (2019, 'center for devices and radiological health'), (2020, 'university of maryland baltimore county')) +all_us_institutions_year : ((1996, 'university of maryland baltimore county'), (1998, 'university of maryland baltimore county'), (1999, 'university of maryland baltimore county'), (2000, 'university of maryland baltimore county'), (2007, 'university of maryland baltimore county'), (2010, 'university of maryland baltimore county'), (2011, 'university of maryland baltimore county'), (2012, 'university of maryland baltimore county'), (2013, 'center for devices and radiological health'), (2013, 'food and drug administration'), (2013, 'university of maryland baltimore county'), (2014, 'center for devices and radiological health'), (2014, 'university of maryland baltimore county'), (2015, 'center for devices and radiological health'), (2015, 'university of maryland baltimore county'), (2016, 'center for devices and radiological health'), (2016, 'food and drug administration'), (2016, 'university of maryland baltimore county'), (2017, 'university of maryland baltimore county'), (2019, 'center for devices and radiological health'), (2019, 'university of maryland baltimore county'), (2020, 'university of maryland baltimore county')) + +8/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : krishna +lastname : rajan +middlename : None +year_range : (1993,) +main_us_institutions_year : ((1993, 'rensselaer polytechnic institute'),) +all_us_institutions_year : ((1993, 'rensselaer polytechnic institute'),) + +firstname : krishna +lastname : rajana +middlename : m +year_range : (1994, 1995) +main_us_institutions_year : ((1994, 'pennsylvania state university'), (1995, 'pennsylvania state university')) +all_us_institutions_year : ((1993, 'pennsylvania state university'), (1994, 'pennsylvania state university'), (1995, 'pennsylvania state university'), (1996, 'pennsylvania state university')) + +9/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeffrey +lastname : lang +middlename : h +year_range : (2008,) +main_us_institutions_year : ((2008, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2008, 'massachusetts institute of technology'),) + +firstname : jeffrey +lastname : yang +middlename : None +year_range : (2013, 2019) +main_us_institutions_year : ((2014, 'university of washington'), (2016, 'massachusetts institute of technology'), (2016, 'university of washington'), (2017, 'massachusetts institute of technology'), (2017, 'university of washington'), (2018, 'massachusetts institute of technology'), (2018, 'pfizer'), (2019, 'university of washington')) +all_us_institutions_year : ((2014, 'university of washington'), (2016, 'massachusetts institute of technology'), (2016, 'university of washington'), (2017, 'massachusetts institute of technology'), (2017, 'university of washington'), (2018, 'massachusetts institute of technology'), (2018, 'pfizer'), (2019, 'university of washington')) + +9/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nandika +lastname : souza +middlename : anne d +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of north texas'),) +all_us_institutions_year : ((2000, 'university of north texas'),) + +firstname : nandika +lastname : dsouza +middlename : ann +year_range : (2005, 2020) +main_us_institutions_year : ((2005, 'university of north texas'), (2016, 'university of north texas'), (2020, 'university of north texas')) +all_us_institutions_year : ((2005, 'university of north texas'), (2016, 'university of north texas'), (2020, 'university of north texas')) + +9/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : weinberg +middlename : c +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of arizona'),) +all_us_institutions_year : ((1998, 'university of arizona'),) + +firstname : michael +lastname : wei +middlename : c +year_range : (2011, 2012) +main_us_institutions_year : ((2012, 'johns hopkins university applied physics laboratory'), (2012, 'johns hopkins university')) +all_us_institutions_year : ((2012, 'johns hopkins university'), (2012, 'johns hopkins university applied physics laboratory')) + +9/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : silverthorn +middlename : barbara +year_range : (2011,) +main_us_institutions_year : ((2011, 'marquette university'),) +all_us_institutions_year : ((2011, 'marquette university'),) + +firstname : m +lastname : silver +middlename : None +year_range : (2001, 2004) +main_us_institutions_year : ((2001, 'agilent technologies'), (2002, 'agilent technologies')) +all_us_institutions_year : ((2000, 'agilent technologies'), (2001, 'agilent technologies'), (2002, 'agilent technologies')) + +9/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : d +lastname : douglass +middlename : None +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of california los angeles'),) +all_us_institutions_year : ((1993, 'university of california los angeles'),) + +firstname : susanne +lastname : douglas +middlename : None +year_range : (2002, 2017) +main_us_institutions_year : ((2002, 'california institute of technology'), (2004, 'california institute of technology'), (2005, 'jet propulsion laboratory'), (2008, 'california institute of technology'), (2009, 'jet propulsion laboratory'), (2010, 'jet propulsion laboratory'), (2010, 'california institute of technology'), (2012, 'california institute of technology'), (2017, 'california institute of technology')) +all_us_institutions_year : ((2002, 'california institute of technology'), (2004, 'california institute of technology'), (2005, 'jet propulsion laboratory'), (2008, 'california institute of technology'), (2009, 'jet propulsion laboratory'), (2010, 'california institute of technology'), (2010, 'jet propulsion laboratory'), (2012, 'california institute of technology'), (2017, 'california institute of technology')) + +9/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christoph +lastname : beckermann +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of iowa'),) +all_us_institutions_year : ((2009, 'university of iowa'),) + +firstname : kevin +lastname : becker +middlename : g +year_range : (1991, 2020) +main_us_institutions_year : ((1991, 'vanderbilt university'), (1992, 'university of iowa'), (1993, 'national institutes of health'), (1994, 'vanderbilt university'), (1994, 'national institutes of health'), (1995, 'national institutes of health'), (1996, 'national institutes of health'), (1997, 'national institutes of health'), (1998, 'national institutes of health'), (1999, 'national institutes of health'), (2000, 'national institutes of health'), (2001, 'national institutes of health'), (2002, 'national institutes of health'), (2003, 'national institutes of health'), (2004, 'national institutes of health'), (2005, 'national institutes of health'), (2006, 'national institutes of health'), (2007, 'national institutes of health'), (2008, 'national institutes of health'), (2009, 'national institutes of health'), (2010, 'national institutes of health'), (2011, 'national institutes of health'), (2012, 'national institutes of health'), (2013, 'national institutes of health'), (2014, 'national institutes of health'), (2015, 'national institutes of health'), (2016, 'national institutes of health'), (2017, 'national institutes of health'), (2018, 'national institutes of health'), (2019, 'national institutes of health'), (2020, 'national institutes of health')) +all_us_institutions_year : ((1991, 'vanderbilt university'), (1992, 'university of iowa'), (1993, 'national institutes of health'), (1993, 'vanderbilt university'), (1994, 'national institutes of health'), (1994, 'vanderbilt university'), (1995, 'national institutes of health'), (1996, 'national institutes of health'), (1997, 'national institutes of health'), (1998, 'national institutes of health'), (1999, 'national institutes of health'), (2000, 'national institutes of health'), (2001, 'national institutes of health'), (2001, 'university of california irvine'), (2002, 'national institutes of health'), (2003, 'national institutes of health'), (2004, 'national institutes of health'), (2005, 'national institutes of health'), (2006, 'national institutes of health'), (2006, 'united states department of health and human services'), (2007, 'national institute on drug abuse'), (2007, 'national institutes of health'), (2008, 'national institutes of health'), (2009, 'national institutes of health'), (2010, 'national institute on drug abuse'), (2010, 'national institutes of health'), (2011, 'national institutes of health'), (2011, 'university of maryland baltimore'), (2012, 'national institutes of health'), (2012, 'united states department of health and human services'), (2013, 'national institutes of health'), (2014, 'national institutes of health'), (2015, 'national institutes of health'), (2016, 'national institutes of health'), (2017, 'national institutes of health'), (2018, 'national institutes of health'), (2019, 'national institutes of health'), (2020, 'national institutes of health')) + +9/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : y +lastname : lin +middlename : k +year_range : (1992,) +main_us_institutions_year : ((1992, 'florida atlantic university'),) +all_us_institutions_year : ((1992, 'florida atlantic university'),) + +firstname : y +lastname : li +middlename : None +year_range : (1997, 1999) +main_us_institutions_year : ((1997, 'university of illinois at urbana champaign'), (1999, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1997, 'university of illinois at urbana champaign'), (1999, 'university of illinois at urbana champaign')) + +9/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : alexandridis +lastname : paschalis +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university at buffalo'),) +all_us_institutions_year : ((2008, 'university at buffalo'),) + +firstname : e +lastname : paschalis +middlename : p +year_range : (1994, 1995) +main_us_institutions_year : ((1994, 'university at buffalo'), (1995, 'university at buffalo')) +all_us_institutions_year : ((1994, 'university at buffalo'), (1995, 'university at buffalo')) + +9/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yi +lastname : li +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'polytechnic university'),) +all_us_institutions_year : ((2011, 'polytechnic university'),) + +firstname : yi +lastname : li +middlename : None +year_range : (2010, 2013) +main_us_institutions_year : ((2010, 'northeastern university'), (2011, 'northeastern university'), (2013, 'northeastern university')) +all_us_institutions_year : ((2010, 'northeastern university'), (2011, 'northeastern university'), (2013, 'northeastern university')) + +9/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : elaine +lastname : charlson +middlename : m +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of missouri columbia'),) +all_us_institutions_year : ((1992, 'university of missouri columbia'),) + +firstname : e +lastname : charlson +middlename : j +year_range : (1990, 1996) +main_us_institutions_year : ((1992, 'university of missouri'), (1995, 'university of missouri'), (1996, 'university of missouri')) +all_us_institutions_year : ((1992, 'university of missouri'), (1995, 'university of missouri'), (1996, 'university of missouri')) + +9/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : mantovani +middlename : g +year_range : (1999,) +main_us_institutions_year : ((1999, 'florida institute of technology'),) +all_us_institutions_year : ((1999, 'florida institute of technology'),) + +firstname : j +lastname : mantovani +middlename : g +year_range : (1989, 1990) +main_us_institutions_year : ((1990, 'university of tennessee'),) +all_us_institutions_year : ((1990, 'university of tennessee'),) + +9/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jayant +lastname : kumar +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of massachusetts lowell'),) +all_us_institutions_year : ((2012, 'university of massachusetts lowell'),) + +firstname : jayant +lastname : kumar +middlename : None +year_range : (1986, 1989) +main_us_institutions_year : ((1986, 'university of southern california'), (1987, 'university of southern california'), (1988, 'university of southern california'), (1989, 'university of southern california')) +all_us_institutions_year : ((1986, 'university of southern california'), (1987, 'university of southern california'), (1988, 'university of southern california'), (1989, 'university of southern california')) + +9/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : ellison +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'clemson university'),) +all_us_institutions_year : ((2000, 'clemson university'),) + +firstname : michael +lastname : ellison +middlename : j +year_range : (1982, 2020) +main_us_institutions_year : ((1987, 'massachusetts institute of technology'), (1989, 'massachusetts institute of technology'), (1991, 'massachusetts institute of technology'), (1991, 'university of chicago')) +all_us_institutions_year : ((1987, 'massachusetts institute of technology'), (1989, 'massachusetts institute of technology'), (1991, 'massachusetts institute of technology'), (1991, 'university of chicago')) + +9/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : paul +lastname : barton +middlename : i +year_range : (2009,) +main_us_institutions_year : ((2009, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2009, 'massachusetts institute of technology'),) + +firstname : paul +lastname : barton +middlename : None +year_range : (2007, 2009) +main_us_institutions_year : ((2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan')) +all_us_institutions_year : ((2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan')) + +9/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : linda +lastname : katehi +middlename : p b +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of michigan'),) +all_us_institutions_year : ((2003, 'university of michigan'),) + +firstname : l +lastname : katchi +middlename : p b +year_range : (1995, 2002) +main_us_institutions_year : ((1995, 'university of michigan'), (2002, 'university of michigan')) +all_us_institutions_year : ((1995, 'university of michigan'), (2002, 'university of michigan')) + +9/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ji +lastname : lee +middlename : ung +year_range : (2014,) +main_us_institutions_year : ((2014, 'university at albany suny'),) +all_us_institutions_year : ((2014, 'university at albany suny'),) + +firstname : ji +lastname : lee +middlename : ung +year_range : (2009, 2010) +main_us_institutions_year : ((2009, 'state university of new york system'),) +all_us_institutions_year : ((2009, 'state university of new york system'),) + +10/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : w +lastname : sweitz +middlename : rudolf +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of new hampshire main campus'),) +all_us_institutions_year : ((2000, 'university of new hampshire main campus'),) + +firstname : w +lastname : seitz +middlename : rudolf +year_range : (1972, 2021) +main_us_institutions_year : ((1977, 'university of new hampshire'), (1982, 'university of new hampshire'), (1984, 'university of new hampshire'), (1985, 'university of new hampshire'), (1986, 'university of new hampshire'), (1987, 'university of new hampshire'), (1989, 'university of new hampshire'), (1990, 'university of new hampshire'), (1991, 'university of new hampshire'), (1993, 'university of new hampshire'), (1994, 'university of new hampshire'), (1995, 'university of new hampshire'), (1997, 'university of new hampshire'), (1999, 'university of new hampshire'), (2003, 'university of new hampshire'), (2011, 'university of new hampshire'), (2012, 'university of new hampshire'), (2018, 'university of new hampshire'), (2019, 'university of new hampshire'), (2020, 'university of new hampshire'), (2021, 'university of new hampshire')) +all_us_institutions_year : ((1977, 'university of new hampshire'), (1981, 'university of new hampshire'), (1982, 'university of new hampshire'), (1984, 'university of new hampshire'), (1985, 'university of new hampshire'), (1986, 'university of new hampshire'), (1987, 'university of new hampshire'), (1988, 'university of new hampshire'), (1989, 'university of new hampshire'), (1990, 'university of new hampshire'), (1991, 'university of new hampshire'), (1993, 'university of new hampshire'), (1994, 'university of new hampshire'), (1995, 'university of new hampshire'), (1997, 'university of new hampshire'), (1999, 'university of new hampshire'), (2003, 'university of new hampshire'), (2011, 'university of new hampshire'), (2012, 'university of new hampshire'), (2013, 'university of new hampshire'), (2018, 'university of new hampshire'), (2019, 'university of new hampshire'), (2020, 'university of new hampshire'), (2021, 'university of new hampshire')) + +11/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : k +lastname : hsia +middlename : jimmy +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2002, 'university of illinois at urbana champaign'),) + +firstname : k +lastname : hsieh +middlename : c +year_range : (1993, 2007) +main_us_institutions_year : ((1993, 'university of illinois at urbana champaign'), (1994, 'university of illinois at urbana champaign'), (1995, 'university of illinois at urbana champaign'), (1997, 'university of illinois at urbana champaign'), (1999, 'university of illinois at urbana champaign'), (2001, 'university of illinois at urbana champaign'), (2005, 'university of illinois at urbana champaign'), (2006, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1993, 'university of illinois at urbana champaign'), (1994, 'university of illinois at urbana champaign'), (1995, 'university of illinois at urbana champaign'), (1997, 'university of illinois at urbana champaign'), (1999, 'university of illinois at urbana champaign'), (2001, 'university of illinois at urbana champaign'), (2005, 'university of illinois at urbana champaign'), (2006, 'university of illinois at urbana champaign')) + +12/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lionel +lastname : kimerling +middlename : c +year_range : (2006,) +main_us_institutions_year : ((2006, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2006, 'massachusetts institute of technology'),) + +firstname : lionel +lastname : moh +middlename : c h +year_range : (2014, 2018) +main_us_institutions_year : ((2014, 'massachusetts institute of technology'), (2017, 'massachusetts institute of technology'), (2018, 'massachusetts institute of technology')) +all_us_institutions_year : ((2014, 'massachusetts institute of technology'), (2017, 'massachusetts institute of technology'), (2018, 'massachusetts institute of technology')) + +12/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : i +lastname : ume +middlename : charles +year_range : (1998,) +main_us_institutions_year : ((1998, 'georgia institute of technology'),) +all_us_institutions_year : ((1998, 'georgia institute of technology'),) + +firstname : i +lastname : dragomir +middlename : c +year_range : (2005, 2006) +main_us_institutions_year : ((2005, 'georgia institute of technology'), (2006, 'georgia institute of technology')) +all_us_institutions_year : ((2005, 'georgia institute of technology'), (2006, 'georgia institute of technology')) + +12/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : melissa +lastname : tate +middlename : l knothe +year_range : (2012,) +main_us_institutions_year : ((2012, 'case western reserve university'),) +all_us_institutions_year : ((2012, 'case western reserve university'),) + +firstname : m +lastname : tate +middlename : l knothe +year_range : (1998, 2003) +main_us_institutions_year : ((2003, 'cleveland clinic'),) +all_us_institutions_year : ((2003, 'cleveland clinic'), (2004, 'cleveland clinic')) + +12/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yongrak +lastname : kim +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of nebraska lincoln'),) +all_us_institutions_year : ((2007, 'university of nebraska lincoln'),) + +firstname : yong +lastname : kim +middlename : ho +year_range : (2003, 2021) +main_us_institutions_year : ((2008, 'university of southern california'), (2009, 'university of southern california'), (2010, 'university of southern california'), (2011, 'university of southern california'), (2012, 'university of southern california'), (2013, 'university of southern california'), (2014, 'university of north carolina at chapel hill'), (2015, 'research triangle park'), (2018, 'national research council'), (2019, 'national research council'), (2020, 'university of north carolina at chapel hill'), (2021, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((2008, 'university of southern california'), (2009, 'university of southern california'), (2010, 'university of southern california'), (2011, 'university of southern california'), (2012, 'university of southern california'), (2013, 'university of southern california'), (2014, 'university of north carolina at chapel hill'), (2015, 'research triangle park'), (2018, 'national research council'), (2018, 'research triangle park'), (2019, 'national research council'), (2020, 'university of north carolina at chapel hill'), (2021, 'university of north carolina at chapel hill'), (2021, 'university of southern california')) + +12/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 1007.040619079272 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_materials science_christoph_degree0_graduates_8515.log b/src/dataprep/temp/trainlink_mag_proquest_materials science_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..e9e00f9 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_materials science_christoph_degree0_graduates_8515.log @@ -0,0 +1,612 @@ +Namespace(testing=False, verbose=1, field=['materials science'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=50000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [192562407] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0007550279299418131 minutes + + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +Time elapsed: 96.09755960702896 minutes + +Starting active labeling... +firstname : yenny +lastname : chandra +middlename : None +year : 2012 +year_papertitle : ((2012, 'a lower bound on snap through instability of curved beams under thermomechanical loads'), (2012, 'numerical pathologies in snap through simulations'), (2013, 'a numerical investigation of snap through in a shallow arch like model'), (2013, 'characterizing dynamic transitions associated with snap through of clamped shallow arches'), (2013, 'transient behavior of curved structures'), (2015, 'a robust composite time integration scheme for snap through problems')) +keywords : frozenset({'structural engineering', 'control theory', 'mechanics', 'mechanical engineering', 'classical mechanics', 'control engineering'}) + +firstname : yenny +lastname : chandra +middlename : None +year : 2013 +year_papertitle : ((2013, 'transient behavior of curved structures'),) +keywords : frozenset({'mechanical engineering', 'materials science', 'civil engineering'}) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : alvin +lastname : loke +middlename : leng sun +year : 1996 +year_papertitle : ((1996, 'kinetics of copper drift in pecvd dielectrics'), (1997, 'effect of texture on the electromigration of cvd copper'), (1997, 'electrical extraction of the in plane dielectric constant of fluorinated polyimide'), (1998, 'barrier seed layer requirements for copper interconnects'), (1998, 'copper drift in low k polymer dielectrics for ulsi metallization'), (1998, 'electrical leakage at low k polyimide teos interface'), (1998, 'electrical reliability of cu and low k dielectric integration'), (1998, 'electromigration of submicron damascene copper interconnects'), (1998, 'microstructure and reliability of copper interconnects'), (1999, 'process integration issues of low permittivity dielectrics with copper for high performance interconnects')) +keywords : frozenset({'analytical chemistry', 'composite material', 'electrical engineering', 'electronic engineering', 'optoelectronics', 'engineering physics', 'metallurgy'}) + +firstname : alvin +lastname : loke +middlename : leng sun +year : 1999 +year_papertitle : ((1999, 'process integration issues of low permittivity dielectrics with copper for high performance interconnects'),) +keywords : frozenset({'electrical engineering', 'materials science'}) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yi +lastname : zheng +middlename : None +year : 2012 +year_papertitle : ((2012, 'biological agent sensing integrated circuit basic a new cmos magnetic biosensor system'), (2013, 'improved biological agent sensing integrated circuit basic')) +keywords : frozenset({'electrical engineering', 'electronic engineering', 'acoustics'}) + +firstname : yi +lastname : zheng +middlename : None +year : 2014 +year_papertitle : ((2014, 'biological agent sensing integrated circuit basic a new complementary metal oxide semiconductor cmos magnetic biosensor system'),) +keywords : frozenset({'biomedical engineering', 'electrical engineering', 'computer engineering'}) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ashim +lastname : gupta +middlename : None +year : 2010 +year_papertitle : ((2010, 'abstract 2563 upregulation of mir 155 is associated with cisplatin resistance in the head and neck cancer umscc 10b 15s cells'), (2011, 'detection of micrornas in cultured cells and paraffin embedded tissue specimens by in situ hybridization'), (2013, 'alterations in disc height foraminal height and foraminal width following one and two level axialifa ââ a ââ a radiological analysis'), (2013, 'sagittal lumbar alignment following axial lumbar interbody fusion with trans1'), (2013, 'single walled carbon nanotube composites for bone tissue engineering'), (2014, 'in vitro evaluation of three dimensional single walled carbon nanotube composites for bone tissue engineering'), (2014, 'surgical retrieval isolation and in vitro expansion of human anterior cruciate ligament derived cells for tissue engineering applications'), (2015, 'abstract 142 a microbiologic comparison of acellular dermal matrices as an aseptic reconstructive material and a scaffold for stem cell in growth'), (2015, 'biocompatibility of single walled carbon nanotube composites for bone regeneration'), (2015, 'bone graft substitutes for spine fusion a brief review')) +keywords : frozenset({'surgery', 'composite material', 'nuclear medicine', 'molecular biology', 'anatomy', 'cancer research', 'immunology', 'biomedical engineering'}) + +firstname : ashim +lastname : gupta +middlename : None +year : 2014 +year_papertitle : ((2014, 'evaluation of non functionalized single walled carbon nanotubes composites for bone tissue engineering'),) +keywords : frozenset({'molecular biology', 'materials science', 'biomedical engineering'}) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lewis +lastname : arco +middlename : gomez de +year : 2007 +year_papertitle : ((2007, 'optical properties of carbon nanotubes near infrared induced hyperthermia as therapy for brain tumors'), (2009, 'scalable chemical vapor deposition of single and few layer graphene'), (2009, 'synthesis and metal to semiconductor conversion of carbon nanotubes by light irradiation'), (2009, 'wafer scale fabrication of separated carbon nanotube thin film transistors for display applications'), (2010, '2 4 6 trinitrotoluene tnt chemical sensing based on aligned single walled carbon nanotubes and zno nanowires'), (2010, 'continuous highly flexible and transparent graphene films by chemical vapor deposition for organic photovoltaics'), (2010, 'synthesis and device applications of high density aligned carbon nanotubes using low pressure chemical vapor deposition and stacked multiple transfer')) +keywords : frozenset({'nanotechnology', 'chemical physics', 'chemical engineering', 'optoelectronics', 'biophysics'}) + +firstname : lewis +lastname : arco +middlename : mortimer gomez de +year : 2010 +year_papertitle : ((2010, 'graphene and carbon nanotubes synthesis characterization and applications for beyond silicon electronics'),) +keywords : frozenset({'physical chemistry', 'materials science', 'nanotechnology'}) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yung +lastname : wang +middlename : jui +year : 2009 +year_papertitle : ((2009, 'high resolution compton scattering as a probe of the fermi surface in the iron based superconductor lao1 xfxfeas'), (2009, 'momentum density fermi surface and directional compton profile in the iron based superconductor laofeas'), (2010, 'dynamical structure factor computations in extended momentum space in electron doped cuprates'), (2010, 'proposal to determine the fermi surface topology of a doped iron based superconductor using bulk sensitive fourier transform compton scattering'), (2011, 'imaging doped holes in a cuprate superconductor with high resolution compton scattering'), (2011, 'search for new topological insulators'), (2011, 'topological insulators in the quaternary chalcogenide compounds and ternary famatinite compounds'), (2012, 'persistence of covalent bonding in liquid silicon probed by inelastic x ray scattering')) +keywords : frozenset({'condensed matter physics', 'chemical physics', 'atomic physics', 'molecular physics', 'quantum mechanics'}) + +firstname : yung +lastname : wang +middlename : jui +year : 2013 +year_papertitle : ((2013, 'modeling electronic structure and spectroscopy in correlated materials and topological insulators'),) +keywords : frozenset({'condensed matter physics', 'materials science'}) + +5/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sun +lastname : kim +middlename : kyoung +year : 2002 +year_papertitle : ((2002, 'an experimental study on the thermoplastic filament winding process using commingled yarns'), (2002, 'an inverse method for estimating thermophysical properties of fluid flowing in a circulating duct'), (2002, 'determination of permeability of fibrous medium considering inertial effects'), (2002, 'solution of inverse heat conduction problems using maximum entropy method'), (2002, 'solving a nonlinear inverse convection problem using the sequential gradient method'), (2003, 'a solution method for a nonlinear three dimensional inverse heat conduction problem using the sequential gradient method combined with cubic spline function specification'), (2003, 'characterization of boundary conditions during thermoplastic composite tape lay up process using an inverse method'), (2003, 'determination of in plane permeability of fiber preforms by the gas flow method using pressure measurements'), (2003, 'determination of three dimensional permeability of fiber preforms by the inverse parameter estimation technique'), (2003, 'solution to inverse heat conduction problem in nanoscale using sequential method')) +keywords : frozenset({'mathematical optimization', 'composite material', 'geometry', 'applied mathematics', 'mechanics', 'mechanical engineering', 'thermodynamics', 'mathematical analysis'}) + +firstname : sun +lastname : kim +middlename : wook +year : 2013 +year_papertitle : ((2013, 'modulation of stem cell fate by electrical stimulation'),) +keywords : frozenset({'cellular biology', 'physiology', 'biomedical engineering'}) + +6/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ke +lastname : xu +middlename : None +year : 2005 +year_papertitle : ((2005, 'a nanosized y2o3 based catalytic chemiluminescent sensor for trimethylamine'), (2005, 'an energy transfer cataluminescence reaction on nanosized catalysts and its application to chemical sensors'), (2006, 'cover picture ground state equilibrium thermodynamics and switching kinetics of bistable 2 rotaxanes switched in solution polymer gels and molecular electronic devices chem eur j 1 2006'), (2006, 'ground state equilibrium thermodynamics and switching kinetics of bistable 2 rotaxanes switched in solution polymer gels and molecular electronic devices'), (2006, 'size dependent transport and thermoelectric properties of individual polycrystalline bismuth nanowires'), (2007, 'a 160 kilobit molecular electronic memory patterned at 10 11 bits per square centimetre'), (2007, 'the emergence of a coupled quantum dot array in a doped silicon nanowire gated by ultrahigh density top gate electrodes'), (2008, 'controlled fabrication and electrical properties of long quasi one dimensional superconducting nanowire arrays'), (2008, 'long highly ordered high temperature superconductor nanowire arrays')) +keywords : frozenset({'condensed matter physics', 'nanotechnology', 'photochemistry', 'analytical chemistry', 'stereochemistry', 'electrical engineering', 'optoelectronics', 'engineering physics', 'thermodynamics', 'physical chemistry', 'inorganic chemistry', 'crystallography'}) + +firstname : ke +lastname : xu +middlename : None +year : 2014 +year_papertitle : ((2014, 'graphene based nanostructures and dna based biomolecule sensors'),) +keywords : frozenset({'nanotechnology', 'electrical engineering', 'biomedical engineering'}) + +6/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : xu +lastname : lu +middlename : None +year : 2010 +year_papertitle : ((2010, 'ferroelectric properties and magnetoelectric effect in 1 x ni0 93co0 02cu0 05fe2o4 xpzt particulate composites'), (2010, 'ferroelectric thin film diaphragm resonators for bio detection'), (2010, 'piezoelectric membrane based biosensor platform'), (2011, 'piezoelectric biosensor platform based on zno micro membrane'), (2012, 'biosensor platform based on stress improved piezoelectric membrane'), (2013, 'transformation of vibration shapes in resonances of micromachined piezoelectric circular membrane'), (2016, 'nano clip based composites with a low percolation threshold and high dielectric constant'), (2017, 'piezoelectric excited membrane for liquids viscosity and mass density measurement'), (2017, 'structure evolution and exceptionally ultra low hysteresis unipolar electric field induced strain in 1 x nanbo 3 x batio 3 lead free ferroelectrics')) +keywords : frozenset({'condensed matter physics', 'analytical chemistry', 'structural engineering', 'composite material', 'optoelectronics', 'thermodynamics'}) + +firstname : xu +lastname : lu +middlename : None +year : 2014 +year_papertitle : ((2014, 'thermoelectric properties of natural mineral based tetrahedrite compounds'),) +keywords : frozenset({'condensed matter physics', 'materials science'}) + +6/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : qiang +lastname : zhu +middlename : None +year : 1994 +year_papertitle : ((1994, 'thermo mechanical processing and superplastic deformation of complex aluminum alloys by torsion testing'), (1996, 'deformation kinetics and microstructure during hot torsion of al99 9 al 11zn al 1mn al 5mg al 5mg 0 6mn al 5mg 0 6mn 1fe'), (1996, 'energy dissipation efficiency analysis and microstructural mechanisms in aluminum'), (1996, 'evolution of grain structure in hot torsion of al 5mg 0 7mn aa5083'), (2000, 'microstructural evolution of aluminium magnesium alloys during thermomechanical processing'), (2000, 'microstructural modelling of aluminium alloys during thermomechanical processing'), (2001, 'evolution of microbands in high purity aluminium 3 magnesium during hot deformation testing in tension compression'), (2001, 'mechanical properties of alumina glass dental composites')) +keywords : frozenset({'mechanics', 'metallurgy', 'thermodynamics', 'composite material'}) + +firstname : qiang +lastname : zhu +middlename : None +year : 2000 +year_papertitle : ((2000, 'features of nonlinear wave wave and wave body interactions'),) +keywords : frozenset({'ocean engineering', 'mechanics'}) + +7/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : wei +lastname : wang +middlename : None +year : 2016 +year_papertitle : ((2016, 'adaptive surrogate model based fast path planning for spacecraft formation reconfiguration on libration point orbits'), (2016, 'adaptive surrogate model based multi objective transfer trajectory optimization between different libration points'), (2017, 'a fast multi objective optimization design method for emergency libration point orbits transfer between the sun earth and the earth moon systems'), (2017, 'an analytical study for global buckling of circular tubes under axial and oblique compression'), (2017, 'an improved two arcs deformational theoretical model of the expansion tubes'), (2018, 'analysis of the carrying capacity for tubes under oblique loading'), (2018, 'coupling of creases and shells'), (2019, 'ethephon improved stalk strength of maize zea mays l mainly through altering internode morphological traits to modulate mechanical properties under field conditions'), (2019, 'programmable self assembly of magnetic handshake materials'), (2020, 'surface electrochemical actuators for micron scale fluid pumping and autonomous swimming')) +keywords : frozenset({'mathematical optimization', 'composite material', 'geometry', 'structural engineering', 'horticulture', 'control theory', 'mechanics', 'optoelectronics', 'computer hardware'}) + +firstname : weicheng +lastname : wang +middlename : david +year : 1990 +year_papertitle : ((1990, 'ultrasonic evaluation of elastic and interfacial properties of composite materials and laminates'),) +keywords : frozenset({'mechanical engineering', 'mechanics'}) + +7/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : xianghui +lastname : xiao +middlename : None +year : 2015 +year_papertitle : ((2015, '4d model based iterative reconstruction from interlaced views'), (2015, 'the three dimensional morphology of growing dendrites'), (2015, 'timbir a method for time space reconstruction from interlaced views'), (2016, 'direct model based tomographic reconstruction of the complex refractive index'), (2017, 'solidification in 4d from dendrites to eutectics')) +keywords : frozenset({'artificial intelligence', 'algorithm', 'data science', 'biological system', 'computer vision', 'metallurgy'}) + +firstname : xiangyu +lastname : xiao +middlename : None +year : 2004 +year_papertitle : ((2004, 'a multiple sensors approach to wood defect detection'),) +keywords : frozenset({'wood', 'technology', 'electrical engineering', 'artificial intelligence'}) + +7/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : daniel +lastname : goldman +middlename : None +year : 2002 +year_papertitle : ((2002, 'an experiment based model of oxygen transport in capillary networks under normal and septic conditions'), (2002, 'calculations of oxygen transport by red blood cells and hemoglobin solutions in capillaries'), (2002, 'modeling the effect of biophysical parameters on mass transport in capillary networks'), (2003, 'simulations of capillary network oxygen transport during transient ischemia in the presence and absence of tissue myoglobin'), (2004, 'effect of sepsis on skeletal muscle oxygen consumption and tissue oxygenation interpreting capillary oxygen transport data using a mathematical model'), (2004, 'microvascular oxygen transport abnormalities in experimental sepsis interpreted using a computational model'), (2005, 'increased leukocyte transit times through capillaries contributes to maldistribution of flow'), (2006, 'a computational model of oxygen transport in skeletal muscle for sprouting and splitting modes of angiogenesis'), (2006, 'effect of decreased o2 supply on skeletal muscle oxygenation and o2 consumption during sepsis role of heterogeneous capillary spacing and blood flow'), (2006, 'impact of early sepsis on oxygen delivery in the microvasculature')) +keywords : frozenset({'analytical chemistry', 'surgery', 'internal medicine', 'cardiology', 'mechanics', 'anatomy', 'pathology', 'biomedical engineering', 'biophysics', 'immunology', 'simulation'}) + +firstname : daniel +lastname : mccarthy +middlename : joseph +year : 1994 +year_papertitle : ((1994, 'vibration based diagnostics of reciprocating machinery'),) +keywords : frozenset({'mechanical engineering', 'mechanics'}) + +7/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hunter +lastname : bachman +middlename : None +year : 2016 +year_papertitle : ((2016, 'acoustofluidic coating of particles and cells'), (2017, 'acoustic separation of nanoparticles in continuous flow'), (2017, 'acoustofluidic bacteria separation'), (2017, 'acoustofluidic waveguides for localized control of acoustic wavefront in microfluidics'), (2017, 'enriching nanoparticles via acoustofluidics'), (2017, 'separation acoustic separation of nanoparticles in continuous flow adv funct mater 14 2017'), (2018, 'a sharp edge based acoustofluidic chemical signal generator'), (2018, 'acoustic actuation of in situ fabricated artificial cilia'), (2018, 'acoustofluidic devices controlled by cell phones'), (2019, 'on chip stool liquefaction via acoustofluidics')) +keywords : frozenset({'nanotechnology', 'analytical chemistry', 'chemical engineering', 'optoelectronics', 'optics', 'biomedical engineering', 'computer hardware', 'biological system', 'acoustics'}) + +firstname : hunter +lastname : karmel +middlename : jason +year : 2015 +year_papertitle : ((2015, 'investigating the self assembly and nanopatterning characteristics of organic molecular adlayers on silicon and graphene via scanning tunneling microscopy'),) +keywords : frozenset({'materials science', 'chemistry'}) + +7/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : tammy +lastname : summers +middlename : s edgecumbe +year : 1999 +year_papertitle : ((1999, 'mechanical properties microstructure and corrosion performance of c 22 alloy aged at 260 c to 800 c'), (1999, 'phase stability and mechanical properties of c 22 alloy aged in the temperature range 590 to 760 c for 16 000 hours'), (2000, 'influence of aging on microstructure mechanical properties and corrosion resistance of a ni 22cr 13mo 3w alloy'), (2002, 'influence of thermal aging on the mechanical and corrosion properties of gtaw welds of alloy n06022'), (2004, 'identification and quantification of topologically close packed phases tcp in alloy 22 using electron backscatter diffraction ebsd analysis')) +keywords : frozenset({'metallurgy', 'optics', 'composite material'}) + +firstname : tammy +lastname : summers +middlename : suzanne edgecumbe +year : 1991 +year_papertitle : ((1991, 'improving fatigue life in near eutectic tin lead solders'),) +keywords : frozenset({'materials science'}) + +7/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chao +lastname : lian +middlename : None +year : 2012 +year_papertitle : ((2012, 'the structural and electronic properties of silicon nanoribbons on ag 110 a first principles study'), (2013, 'strain induced phase transitions in silicene bilayers a first principles and tight binding study'), (2013, 'strain modulated electronic properties of silicon nanoribbons with armchair edges'), (2015, 'raman scattering investigation of large positive magnetoresistance material wte2'), (2015, 'the effects of thermal and electric fields on the electronic structures of silicene'), (2016, 'ab initio evidence for nonthermal characteristics in ultrafast laser melting'), (2016, 'direct evidence of metallic bands in a monolayer boron sheet'), (2016, 'magnetic dirac fermions and chern insulator supported on pristine silicon surface'), (2017, 'band gap adjustment of sic honeycomb structure through hydrogenation and fluorination'), (2017, 'high thermopower and potential thermoelectric properties of crystalline lih and nah')) +keywords : frozenset({'condensed matter physics', 'nanotechnology', 'computational chemistry', 'optoelectronics', 'physical chemistry', 'molecular physics'}) + +firstname : chao +lastname : li +middlename : None +year : 2005 +year_papertitle : ((2005, 'synthesis electronic properties and applications of oxide nanowires'),) +keywords : frozenset({'electrical engineering', 'materials science'}) + +7/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : daniel +lastname : sanders +middlename : p +year : 2000 +year_papertitle : ((2000, 'improved thermal and mechanical properties of polybenzoxazines based on alkyl substituted aromatic amines'), (2000, 'regioselectivity and network structure of difunctional alkyl substituted aromatic amine based polybenzoxazines'), (2001, 'fluoropolymer resist materials for 157nm microlithography'), (2001, 'regioselectivity of the ring opening polymerization of monofunctional alkyl substituted aromatic amine based benzoxazines'), (2002, 'metal catalyzed vinyl addition polymers for 157 nm resist applications 2 fluorinated norbornenes synthesis polymerization and initial imaging results'), (2002, 'synthesis of symmetrical trisubstituted olefins by cross metathesis'), (2003, 'a general model for selectivity in olefin cross metathesis'), (2003, 'metal catalyzed addition polymers for 157 nm resist applications synthesis and polymerization of partially fluorinated ester functionalized tricyclo 4 2 1 02 5 non 7 enes'), (2003, 'vacuum uv influenced design of polymers and dissolution inhibitors for next generation photolithography'), (2005, 'prevention of undesirable isomerization during olefin metathesis')) +keywords : frozenset({'nanotechnology', 'chemical engineering', 'organic chemistry', 'polymer chemistry'}) + +firstname : daniel +lastname : sanders +middlename : gordon +year : 2008 +year_papertitle : ((2008, 'development of friction stir welding combined with superplastic forming processes for the fabrication of titanium structures'),) +keywords : frozenset({'mechanical engineering', 'aerospace materials', 'materials science'}) + +7/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jiddu +lastname : bezares +middlename : None +year : 2008 +year_papertitle : ((2008, 'macromolecular structure of the organic framework of nacre in haliotis rufescens implications for growth and mechanical behavior'), (2010, 'macromolecular structure of the organic framework of nacre in haliotis rufescens implications for mechanical response'), (2011, 'macromolecular structure and viscoelastic response of the organic framework of nacre in haliotis rufescens a perspective and overview'), (2011, 'non explosive simulated blast loading of balsa core sandwich composite beams'), (2012, 'core structure of aligned chitin fibers within the interlamellar framework extracted from haliotis rufescens nacre part i implications for growth and mechanical response'), (2012, 'indentation of nanotwinned fcc metals implications for nanotwin stability')) +keywords : frozenset({'nanotechnology', 'chemical engineering', 'composite material', 'structural engineering', 'anatomy', 'biological system', 'metallurgy'}) + +firstname : jiddu +lastname : bezareschavez +middlename : None +year : 2013 +year_papertitle : ((2013, 'the nanomechanics of biomineralized soft tissues and organic matrices'),) +keywords : frozenset({'materials science', 'civil engineering'}) + +7/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : calvin +lastname : chan +middlename : k +year : 2003 +year_papertitle : ((2003, 'lack of thermodynamic equilibrium in conjugated organic molecular thin films'), (2004, 'contact potential difference measurements of doped organic molecular thin films'), (2004, 'measurement of interface potential change and space charge region across metal organic metal structures using kelvin probe force microscopy'), (2005, 'direct determination of the hole density of states in undoped and doped amorphous organic films with high lateral resolution'), (2005, 'electron affinities of 1 1 diaryl 2 3 4 5 tetraphenylsiloles direct measurements and comparison with experimental and theoretical estimates'), (2005, 'enhancement of iridium based organic light emitting diodes by spatial doping of the hole transport layer'), (2005, 'how do electronic carriers cross si bound alkyl monolayers'), (2005, 'nanoscale measurements of electronic properties in organic thin film transistors'), (2005, 'polarization at the gold pentacene interface'), (2006, 'threshold voltage as a measure of molecular level shift in organic thin film transistors')) +keywords : frozenset({'nanotechnology', 'condensed matter physics', 'chemical physics', 'analytical chemistry', 'atomic physics', 'optoelectronics', 'physical chemistry'}) + +firstname : calvin +lastname : chan +middlename : karfai +year : 2008 +year_papertitle : ((2008, 'materials properties and applications of nitrogen doped organic semiconductors'),) +keywords : frozenset({'electrical engineering', 'materials science'}) + +8/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : angel +lastname : perez +middlename : r torrado +year : 2014 +year_papertitle : ((2014, 'erratum to fracture surface analysis of 3d printed tensile specimens of novel abs based materials'), (2014, 'fracture surface analysis of 3d printed tensile specimens of novel abs based materials'), (2014, 'novel abs based binary and ternary polymer blends for material extrusion 3d printing'), (2015, 'comparison of stress concentrator fabrication for 3d printed polymeric izod impact test specimens'), (2015, 'mechanical electromagnetic and x ray shielding characterization of a 3d printable tungsten polycarbonate polymer matrix composite for space based applications')) +keywords : frozenset({'composite material'}) + +firstname : angel +lastname : perez +middlename : ramon torrado +year : 2015 +year_papertitle : ((2015, 'defeating anisotropy in material extrusion 3d printing via materials development'),) +keywords : frozenset({'mechanical engineering', 'materials science'}) + +9/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chun +lastname : feng +middlename : None +year : 2020 +year_papertitle : ((2020, 'on line monitoring and analysis method of three dimensional fluorescence spectrum in urban domestic sewage treatment process'), (2021, 'artificial neural networks combined multi wavelength transmission spectrum feature extraction for sensitive identification of waterborne bacteria'), (2021, 'simultaneous determination of nitrate chemical oxygen demand and turbidity in water based on uv vis absorption spectrometry combined with interval analysis'), (2021, 'turbidity compensation method based on mie scattering theory for water chemical oxygen demand determination by uv vis spectrometry')) +keywords : frozenset({'analytical chemistry', 'pattern recognition', 'optics', 'artificial intelligence'}) + +firstname : chun +lastname : geng +middlename : None +year : 2014 +year_papertitle : ((2014, 'a new role for the cyt 18 n terminus and three dimensional dna crystals as vehicles for biocatalysis'),) +keywords : frozenset({'biomedical engineering', 'molecular biology', 'biochemistry'}) + +10/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : millette +middlename : r +year : 1980 +year_papertitle : ((1980, 'aggressive water assessing the extent of the problem'), (1980, 'concentration and size of asbestos in water supplies'), (1981, 'the need to control asbestos fibers in potable water supply systems'), (1983, 'accumulation of ingested asbestos fibers in rat tissues over time'), (1983, 'asbestos in water supplies of the united states'), (1983, 'epidemiology study of the use of asbestos cement pipe for the distribution of drinking water in escambia county florida'), (1983, 'summary of discussion sessions workshop on ingested asbestos'), (1983, 'use of quantitative analysis of urine to assess exposure to asbestos fibers in drinking water in the puget sound region'), (1985, 'x ray microanalysis of calcium potassium and phosphorus in liver mitochondria stressed by carbon tetrachloride'), (1987, 'occurrence and biological activity testing of particulates in drinking water')) +keywords : frozenset({'waste management', 'medical education', 'environmental health', 'radiochemistry', 'anatomy', 'mineralogy', 'pathology', 'biochemistry', 'bioinformatics', 'environmental protection', 'physiology', 'environmental chemistry', 'natural resource economics'}) + +firstname : james +lastname : miller +middlename : christopher +year : 2013 +year_papertitle : ((2013, 'investigation of trace uranium in biological matrices'),) +keywords : frozenset({'nuclear chemistry', 'nuclear engineering'}) + +10/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : eric +lastname : hansen +middlename : w +year : 1981 +year_papertitle : ((1981, 'circular harmonic image reconstruction experiments'), (1981, 'laser scanning phase modulation microscope a'), (1981, 'space variant filtering for abel type transforms a'), (1982, 'fast hankel transform algorithm a'), (1982, 'nondetour phase computer generated holograms an improved variation'), (1982, 'progress in laser scanning phase modulation microscopy a'), (1982, 'rotation invariant optical processing'), (1984, 'abel inversion by kalman filtering'), (1985, 'analysis of polarization anomaly correction in a video enhanced microscope a'), (1985, 'recursive methods for computing the abel transform and its inverse')) +keywords : frozenset({'mathematical optimization', 'optoelectronics', 'algorithm', 'mathematical analysis', 'optics'}) + +firstname : eric +lastname : hansen +middlename : james +year : 2000 +year_papertitle : ((2000, 'a two surface anisotropic damage plasticity model for plain concrete'),) +keywords : frozenset({'materials science', 'civil engineering'}) + +10/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ying +lastname : liao +middlename : None +year : 2002 +year_papertitle : ((2002, 'large eddy simulations of a stirred tank using the lattice boltzmann method on a nonuniform grid'), (2004, 'simulation of bubble motion in liquids')) +keywords : frozenset({'computational physics', 'mechanics'}) + +firstname : ying +lastname : li +middlename : jun +year : 2007 +year_papertitle : ((2007, 'culture of human embryonic stem cells on peptide functionalized polymer networks'),) +keywords : frozenset({'biomedical engineering'}) + +10/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : julibeth +lastname : hoz +middlename : m martinez de la +year : 2009 +year_papertitle : ((2009, 'size effect on the stability of cu ag nanoalloys'), (2010, 'evolution of a pt 1 1 1 surface at high oxygen coverage in acid medium'), (2010, 'theoretical infrared and terahertz spectra of an rdx aluminum complex'), (2011, 'geometric and electronic confinement effects on catalysis'), (2011, 'molecular dynamics simulations of surface oxidation on pt 111 and pt ptco pt3co 111'), (2012, 'p n junction at the interface between metallic systems'), (2013, 'characterization of electronic states inside metallic nanopores'), (2013, 'dealloying of platinum based alloy catalysts kinetic monte carlo simulations'), (2013, 'local surface structure effect on reactivity of molecules confined between metallic surfaces'), (2013, 'vibrational spectra of an rdx film over an aluminum substrate from molecular dynamics simulations and density functional theory')) +keywords : frozenset({'condensed matter physics', 'nanotechnology', 'chemical physics', 'chemical engineering', 'analytical chemistry', 'atomic physics', 'computational chemistry', 'thermodynamics', 'physical chemistry', 'molecular physics', 'metallurgy'}) + +firstname : julibeth +lastname : hoz +middlename : milena martinez de la +year : 2014 +year_papertitle : ((2014, 'enhanced catalytic activities of nanostructured materials'),) +keywords : frozenset({'materials science'}) + +10/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jennifer +lastname : kyle +middlename : reiber +year : 2009 +year_papertitle : ((2009, 'heparin antagonism by polyvalent display of cationic motifs on virus like particles'), (2011, 'analysis of light scattering from human breast tissue using a custom dual optical scanning near field optical microscope'), (2011, 'electronic microarrays in dna computing'), (2011, 'graphene metrology centimeter scale high resolution metrology of entire cvd grown graphene sheets small 18 2011'), (2011, 'molecular absorption and photodesorption in pristine and functionalized large area graphene layers'), (2011, 'photo electrical effect of pristine and functionalized graphene grown by chemical vapor deposition'), (2011, 'rapid large scale characterization of cvd graphene layers on glass using fluorescence quenching microscopy'), (2012, 'industrial graphene metrology'), (2012, 'surface characterization non invasive high throughput metrology of functionalized graphene sheets adv funct mater 21 2012'), (2012, 'transmission near field scanning optical microscopy investigation on cellular uptake behavior of iron oxide nanoparticles')) +keywords : frozenset({'nanotechnology', 'analytical chemistry', 'chemical engineering', 'theoretical computer science', 'biophysics', 'biochemistry', 'computer hardware', 'optics'}) + +firstname : jennifer +lastname : kyle +middlename : lynn reiber +year : 2012 +year_papertitle : ((2012, 'multi scale optical metrology of biomaterials and nanomaterials for medical and industrial applications'),) +keywords : frozenset({'materials science', 'medical imaging', 'electrical engineering', 'optics'}) + +11/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : ballard +middlename : h +year : 2005 +year_papertitle : ((2005, 'characterization of a military training site containing 232thorium'), (2006, 'overview of multimethod geophysical system development for enhanced near surface target detection discrimination and characterization'), (2008, 'portable magnetic frequency domain electromagnetic induction sensor system development')) +keywords : frozenset({'geophysics', 'remote sensing', 'environmental engineering'}) + +firstname : john +lastname : ballard +middlename : robert +year : 2012 +year_papertitle : ((2012, 'refocusing of dual mode ultrasound arrays for optimal therapeutic gain'),) +keywords : frozenset({'electrical engineering', 'biomedical engineering'}) + +12/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hossam +lastname : munim +middlename : e abd el +year : 2006 +year_papertitle : ((2006, 'a variational approach for shapes registration using vector maps'), (2007, 'a new global registration approach of medical imaging using vector maps'), (2007, 'a new variational approach for 3d shape registration'), (2010, 'assessment of kidney function using dynamic contrast enhanced mri techniques'), (2013, 'a novel approach for lung nodules segmentation in chest ct using level sets')) +keywords : frozenset({'radiology', 'computer vision', 'pattern recognition', 'artificial intelligence'}) + +firstname : hossam +lastname : munim +middlename : el din hassan abd el +year : 2007 +year_papertitle : ((2007, 'implicit curve surface evolution with application to the image segmentation problem'),) +keywords : frozenset({'medical imaging', 'electrical engineering', 'biomedical engineering', 'biophysics'}) + +12/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 226.45181078910826 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_mathematics_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_mathematics_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..b2a1a53 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_mathematics_christoph_degree0_advisors_9015.log @@ -0,0 +1,722 @@ +Namespace(testing=False, verbose=1, field=['mathematics'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [33923547] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.00048980712890625 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 84.94286736249924 minutes + +Starting active labeling... +firstname : runze +lastname : li +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'pennsylvania state university'),) +all_us_institutions_year : ((2012, 'pennsylvania state university'),) + +firstname : min +lastname : li +middlename : None +year_range : (2006, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2013, 'pennsylvania state university')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : xiaofan +lastname : li +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'illinois institute of technology'),) +all_us_institutions_year : ((2012, 'illinois institute of technology'),) + +firstname : ling +lastname : li +middlename : None +year_range : (1998, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2009, 'illinois institute of technology'), (2009, 'university of michigan'), (2010, 'illinois institute of technology'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2016, 'georgia regents university'), (2016, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan')) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : runze +lastname : li +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'pennsylvania state university'),) +all_us_institutions_year : ((2012, 'pennsylvania state university'),) + +firstname : min +lastname : li +middlename : None +year_range : (2006, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2013, 'pennsylvania state university')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : xiaofan +lastname : li +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'illinois institute of technology'),) +all_us_institutions_year : ((2012, 'illinois institute of technology'),) + +firstname : ling +lastname : li +middlename : None +year_range : (1998, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2009, 'illinois institute of technology'), (2009, 'university of michigan'), (2010, 'illinois institute of technology'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2016, 'georgia regents university'), (2016, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan')) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : x +lastname : li +middlename : rong +year_range : (2013,) +main_us_institutions_year : ((2013, 'university of new orleans'),) +all_us_institutions_year : ((2013, 'university of new orleans'),) + +firstname : x +lastname : li +middlename : rong +year_range : (1992, 2021) +main_us_institutions_year : ((1992, 'university of connecticut'), (1996, 'university of new orleans'), (1998, 'university of new orleans'), (1999, 'university of new orleans'), (2001, 'university of new orleans'), (2002, 'university of new orleans'), (2005, 'university of new orleans'), (2006, 'university of new orleans'), (2007, 'university of new orleans'), (2008, 'university of new orleans'), (2009, 'university of new orleans'), (2010, 'university of new orleans'), (2011, 'university of new orleans'), (2012, 'university of new orleans'), (2013, 'university of new orleans'), (2014, 'university of new orleans'), (2015, 'university of new orleans'), (2016, 'university of new orleans'), (2017, 'university of new orleans'), (2018, 'university of new orleans'), (2019, 'university of new orleans'), (2020, 'university of new orleans'), (2021, 'university of new orleans')) +all_us_institutions_year : ((1992, 'university of connecticut'), (1996, 'university of new orleans'), (1997, 'university of new orleans'), (1998, 'university of new orleans'), (1999, 'university of new orleans'), (2000, 'university of new orleans'), (2001, 'university of new orleans'), (2002, 'university of new orleans'), (2003, 'university of new orleans'), (2005, 'university of new orleans'), (2006, 'university of new orleans'), (2007, 'university of new orleans'), (2008, 'university of new orleans'), (2009, 'university of new orleans'), (2010, 'university of new orleans'), (2011, 'university of new orleans'), (2012, 'university of new orleans'), (2013, 'university of new orleans'), (2014, 'university of new orleans'), (2015, 'university of new orleans'), (2016, 'university of new orleans'), (2017, 'university of new orleans'), (2018, 'university of new orleans'), (2019, 'university of new orleans'), (2020, 'university of new orleans'), (2021, 'university of new orleans')) + +0/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : brian +lastname : berry +middlename : j l +year_range : (2001,) +main_us_institutions_year : ((2001, 'university of texas at dallas'),) +all_us_institutions_year : ((2001, 'university of texas at dallas'),) + +firstname : brian +lastname : berry +middlename : j l +year_range : (1957, 2020) +main_us_institutions_year : ((1958, 'university of washington'), (1959, 'university of chicago'), (1960, 'university of chicago'), (1962, 'university of chicago'), (1964, 'university of chicago'), (1967, 'university of chicago'), (1970, 'university of chicago'), (1971, 'university of chicago'), (1972, 'university of chicago'), (1974, 'university of chicago'), (1976, 'harvard university'), (1977, 'harvard university'), (1980, 'harvard university'), (1981, 'harvard university'), (1982, 'carnegie mellon university'), (1987, 'carnegie mellon university'), (1988, 'university of texas at dallas'), (1989, 'university of texas at dallas'), (1990, 'university of texas at dallas'), (1991, 'university of texas at dallas'), (1992, 'university of texas at dallas'), (1993, 'university of texas at dallas'), (1994, 'university of texas at dallas'), (1995, 'university of texas at dallas'), (1996, 'university of texas at dallas'), (1997, 'university of texas at dallas'), (1999, 'university of texas at dallas'), (2000, 'university of texas at dallas'), (2000, 'university of texas at austin'), (2001, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2002, 'university of texas at austin'), (2003, 'university of texas at austin'), (2004, 'university of texas at dallas'), (2008, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2013, 'university of texas at dallas'), (2013, 'university of texas at austin'), (2014, 'university of texas at dallas'), (2015, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2017, 'university of texas at dallas'), (2019, 'university of texas at austin'), (2020, 'university of texas at dallas')) +all_us_institutions_year : ((1958, 'university of washington'), (1959, 'university of chicago'), (1960, 'university of chicago'), (1962, 'university of chicago'), (1964, 'university of chicago'), (1967, 'university of chicago'), (1968, 'university of chicago'), (1970, 'university of chicago'), (1971, 'university of chicago'), (1972, 'university of chicago'), (1973, 'university of chicago'), (1974, 'university of chicago'), (1976, 'harvard university'), (1977, 'harvard university'), (1979, 'harvard university'), (1980, 'carnegie mellon university'), (1980, 'harvard university'), (1981, 'harvard university'), (1982, 'carnegie mellon university'), (1985, 'carnegie mellon university'), (1986, 'carnegie mellon university'), (1987, 'carnegie mellon university'), (1988, 'university of texas at dallas'), (1989, 'university of texas at dallas'), (1990, 'university of texas at dallas'), (1991, 'university of texas at dallas'), (1992, 'university of texas at dallas'), (1993, 'university of texas at dallas'), (1994, 'university of texas at dallas'), (1995, 'university of texas at dallas'), (1996, 'university of texas at dallas'), (1997, 'university of texas at dallas'), (1999, 'university of texas at dallas'), (2000, 'university of texas at austin'), (2000, 'university of texas at dallas'), (2001, 'university of texas at dallas'), (2002, 'university of texas at austin'), (2002, 'university of texas at dallas'), (2003, 'university of texas at austin'), (2004, 'university of texas at dallas'), (2005, 'university of chicago'), (2005, 'university of texas at dallas'), (2005, 'university of washington'), (2007, 'university of texas at austin'), (2008, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2013, 'university of texas at austin'), (2013, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2015, 'university of texas at dallas'), (2016, 'university of texas at austin'), (2016, 'university of texas at dallas'), (2017, 'university of texas at dallas'), (2019, 'university of texas at austin'), (2020, 'university of texas at dallas')) + +1/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : dick +middlename : p +year_range : (1994,) +main_us_institutions_year : ((1994, 'oregon state university'),) +all_us_institutions_year : ((1994, 'oregon state university'),) + +firstname : thomas +lastname : dick +middlename : p +year_range : (1988, 2020) +main_us_institutions_year : ((1988, 'oregon state university'), (1991, 'oregon state university'), (1992, 'oregon state university'), (1994, 'oregon state university'), (1997, 'oregon state university'), (2000, 'oregon state university'), (2003, 'oregon state university'), (2016, 'oregon state university')) +all_us_institutions_year : ((1988, 'oregon state university'), (1991, 'oregon state university'), (1992, 'oregon state university'), (1994, 'oregon state university'), (1997, 'oregon state university'), (2000, 'oregon state university'), (2003, 'oregon state university'), (2016, 'oregon state university'), (2018, 'oregon state university')) + +2/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dilip +lastname : madan +middlename : b +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of maryland college park'),) +all_us_institutions_year : ((1998, 'university of maryland college park'),) + +firstname : dilip +lastname : madan +middlename : b +year_range : (1980, 2021) +main_us_institutions_year : ((1989, 'university of maryland college park'), (1990, 'university of maryland college park'), (1991, 'university of maryland college park'), (1993, 'university of maryland college park'), (1994, 'university of maryland college park'), (1995, 'university of maryland college park'), (1997, 'university of maryland college park'), (1998, 'university of maryland college park'), (1999, 'university of maryland college park'), (2000, 'university of maryland college park'), (2001, 'university of maryland college park'), (2002, 'university of maryland college park'), (2003, 'university of maryland college park'), (2004, 'university of maryland college park'), (2005, 'university of maryland college park'), (2006, 'university of maryland college park'), (2007, 'university of maryland college park'), (2008, 'university of maryland college park'), (2009, 'university of maryland college park'), (2010, 'university of maryland college park'), (2011, 'university of maryland college park'), (2012, 'university of maryland college park'), (2014, 'university of maryland college park'), (2015, 'university of maryland college park'), (2016, 'university of maryland college park'), (2017, 'university of maryland college park'), (2019, 'university of maryland college park'), (2020, 'university of maryland college park'), (2021, 'university of maryland college park')) +all_us_institutions_year : ((1989, 'university of maryland college park'), (1990, 'university of maryland college park'), (1991, 'university of maryland college park'), (1993, 'university of maryland college park'), (1994, 'university of maryland college park'), (1995, 'university of maryland college park'), (1996, 'university of maryland college park'), (1997, 'university of maryland college park'), (1998, 'university of maryland college park'), (1999, 'university of maryland college park'), (2000, 'university of maryland college park'), (2001, 'university of maryland college park'), (2002, 'university of maryland college park'), (2003, 'university of maryland college park'), (2004, 'university of maryland college park'), (2005, 'university of maryland college park'), (2006, 'university of maryland college park'), (2007, 'university of maryland college park'), (2008, 'university of maryland college park'), (2009, 'university of maryland college park'), (2010, 'university of maryland college park'), (2011, 'university of maryland college park'), (2012, 'university of maryland college park'), (2013, 'university of maryland college park'), (2014, 'university of maryland college park'), (2015, 'university of maryland college park'), (2016, 'university of maryland college park'), (2017, 'university of maryland college park'), (2018, 'university of maryland college park'), (2019, 'university of maryland college park'), (2020, 'university of maryland college park'), (2021, 'university of maryland college park')) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : enright +middlename : j +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of california san diego'),) +all_us_institutions_year : ((1990, 'university of california san diego'),) + +firstname : thomas +lastname : enright +middlename : j +year_range : (1975, 2010) +main_us_institutions_year : ((1978, 'university of california san diego'), (1983, 'university of california san diego'), (1985, 'university of california san diego'), (1986, 'university of california san diego'), (1990, 'university of california san diego'), (1995, 'university of california san diego'), (1997, 'university of california san diego'), (2003, 'university of california san diego'), (2004, 'university of california san diego'), (2010, 'university of california san diego')) +all_us_institutions_year : ((1978, 'university of california san diego'), (1983, 'university of california san diego'), (1985, 'university of california san diego'), (1986, 'university of california san diego'), (1990, 'university of california san diego'), (1995, 'university of california san diego'), (1997, 'university of california san diego'), (2003, 'university of california san diego'), (2004, 'university of california san diego'), (2010, 'university of california san diego'), (2014, 'university of california san diego')) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jun +lastname : liu +middlename : s +year_range : (2002,) +main_us_institutions_year : ((2002, 'stanford university'),) +all_us_institutions_year : ((2002, 'stanford university'),) + +firstname : jun +lastname : li +middlename : None +year_range : (1994, 2021) +main_us_institutions_year : ((1997, 'california institute of technology'), (1998, 'stanford university'), (1999, 'california institute of technology'), (2001, 'university of michigan'), (2001, 'pfizer'), (2002, 'university of michigan'), (2002, 'pfizer'), (2002, 'stanford university'), (2003, 'stanford university'), (2004, 'stanford university'), (2005, 'stanford university'), (2006, 'stanford university'), (2007, 'stanford university'), (2008, 'stanford university'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) +all_us_institutions_year : ((1997, 'california institute of technology'), (1998, 'stanford university'), (1999, 'california institute of technology'), (2001, 'pfizer'), (2001, 'university of michigan'), (2002, 'pfizer'), (2002, 'stanford university'), (2002, 'university of michigan'), (2003, 'pfizer'), (2003, 'stanford university'), (2003, 'university of michigan'), (2004, 'harvard university'), (2004, 'stanford university'), (2005, 'incyte'), (2005, 'stanford university'), (2005, 'university of michigan'), (2006, 'stanford university'), (2007, 'stanford university'), (2008, 'stanford university'), (2008, 'university of california san francisco'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'stanford university'), (2010, 'university of michigan'), (2011, 'genentech'), (2011, 'harvard university'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'harvard university'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'harvard university'), (2016, 'university of michigan'), (2017, 'harvard university'), (2017, 'university of michigan'), (2018, 'stanford university'), (2018, 'university of michigan'), (2019, 'harvard university'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) + +5/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lewis +lastname : robertson +middlename : c +year_range : (1998,) +main_us_institutions_year : ((1998, 'wesleyan university'),) +all_us_institutions_year : ((1998, 'wesleyan university'),) + +firstname : lewis +lastname : roberts +middlename : c +year_range : (2004, 2020) +main_us_institutions_year : ((2008, 'jet propulsion laboratory'), (2009, 'california institute of technology'), (2010, 'jet propulsion laboratory'), (2010, 'california institute of technology'), (2011, 'jet propulsion laboratory'), (2012, 'jet propulsion laboratory'), (2013, 'jet propulsion laboratory'), (2016, 'jet propulsion laboratory')) +all_us_institutions_year : ((2008, 'jet propulsion laboratory'), (2009, 'california institute of technology'), (2009, 'jet propulsion laboratory'), (2010, 'california institute of technology'), (2010, 'jet propulsion laboratory'), (2011, 'jet propulsion laboratory'), (2012, 'jet propulsion laboratory'), (2013, 'jet propulsion laboratory'), (2016, 'jet propulsion laboratory'), (2018, 'jet propulsion laboratory'), (2019, 'jet propulsion laboratory')) + +5/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : viswanathan +lastname : ramakrishnan +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'medical university of south carolina'),) +all_us_institutions_year : ((2014, 'medical university of south carolina'),) + +firstname : viswanath +lastname : ramakrishna +middlename : None +year_range : (1999, 2003) +main_us_institutions_year : ((2000, 'university of texas at dallas'), (2001, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2003, 'university of texas at dallas')) +all_us_institutions_year : ((2000, 'university of texas at dallas'), (2001, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2003, 'university of texas at dallas')) + +5/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mariusz +lastname : urbanski +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of north texas'),) +all_us_institutions_year : ((2012, 'university of north texas'),) + +firstname : mariusz +lastname : urbański +middlename : None +year_range : (1997, 2020) +main_us_institutions_year : ((2000, 'university of north texas'), (2004, 'university of north texas'), (2007, 'university of north texas'), (2008, 'university of north texas'), (2009, 'university of north texas'), (2010, 'university of north texas'), (2014, 'university of north texas'), (2019, 'university of north texas'), (2020, 'university of north texas')) +all_us_institutions_year : ((2000, 'university of north texas'), (2004, 'university of north texas'), (2007, 'university of north texas'), (2008, 'university of north texas'), (2009, 'university of north texas'), (2010, 'university of north texas'), (2014, 'university of north texas'), (2019, 'university of north texas'), (2020, 'university of north texas')) + +5/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : may +middlename : peter +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of chicago'),) +all_us_institutions_year : ((2011, 'university of chicago'),) + +firstname : j +lastname : carlstrom +middlename : e +year_range : (1987, 2021) +main_us_institutions_year : ((1992, 'california institute of technology'), (1993, 'california institute of technology'), (1995, 'california institute of technology'), (1996, 'california institute of technology'), (1997, 'university of chicago'), (1998, 'university of chicago'), (1999, 'university of chicago'), (2000, 'university of chicago'), (2001, 'university of chicago'), (2002, 'university of chicago'), (2003, 'university of chicago'), (2004, 'university of chicago'), (2005, 'university of chicago'), (2006, 'university of chicago'), (2007, 'university of chicago'), (2008, 'university of chicago'), (2009, 'university of chicago'), (2010, 'university of chicago'), (2011, 'university of chicago'), (2012, 'university of chicago'), (2013, 'university of chicago'), (2013, 'argonne national laboratory'), (2014, 'university of chicago'), (2015, 'university of chicago'), (2016, 'university of chicago'), (2017, 'university of chicago'), (2018, 'university of chicago'), (2019, 'university of chicago'), (2020, 'university of chicago')) +all_us_institutions_year : ((1991, 'california institute of technology'), (1992, 'california institute of technology'), (1993, 'california institute of technology'), (1995, 'california institute of technology'), (1996, 'california institute of technology'), (1996, 'university of chicago'), (1997, 'california institute of technology'), (1997, 'university of chicago'), (1998, 'university of chicago'), (1999, 'university of chicago'), (2000, 'university of chicago'), (2001, 'university of chicago'), (2002, 'university of chicago'), (2003, 'university of chicago'), (2004, 'university of chicago'), (2005, 'university of chicago'), (2006, 'university of chicago'), (2007, 'university of chicago'), (2008, 'university of chicago'), (2009, 'university of chicago'), (2010, 'university of chicago'), (2011, 'argonne national laboratory'), (2011, 'university of chicago'), (2012, 'argonne national laboratory'), (2012, 'case western reserve university'), (2012, 'university of chicago'), (2013, 'argonne national laboratory'), (2013, 'university of arizona'), (2013, 'university of chicago'), (2014, 'argonne national laboratory'), (2014, 'university of chicago'), (2015, 'argonne national laboratory'), (2015, 'lawrence berkeley national laboratory'), (2015, 'university of chicago'), (2016, 'argonne national laboratory'), (2016, 'university of chicago'), (2017, 'argonne national laboratory'), (2017, 'university of chicago'), (2018, 'argonne national laboratory'), (2018, 'university of chicago'), (2019, 'argonne national laboratory'), (2019, 'university of chicago'), (2020, 'argonne national laboratory'), (2020, 'university of chicago')) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : hales +middlename : c +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of pittsburgh'),) +all_us_institutions_year : ((2004, 'university of pittsburgh'),) + +firstname : thomas +lastname : hales +middlename : c +year_range : (1992, 2020) +main_us_institutions_year : ((1992, 'university of chicago'), (1993, 'university of chicago'), (1994, 'university of michigan'), (1997, 'university of michigan'), (2001, 'university of michigan'), (2003, 'university of pittsburgh'), (2004, 'university of pittsburgh'), (2005, 'university of pittsburgh'), (2006, 'university of pittsburgh'), (2007, 'university of pittsburgh'), (2009, 'university of pittsburgh'), (2010, 'university of pittsburgh'), (2013, 'university of pittsburgh'), (2014, 'university of pittsburgh'), (2017, 'university of pittsburgh'), (2020, 'university of pittsburgh')) +all_us_institutions_year : ((1992, 'university of chicago'), (1993, 'university of chicago'), (1994, 'university of michigan'), (1997, 'university of michigan'), (1998, 'university of michigan'), (1999, 'university of michigan'), (2001, 'university of michigan'), (2002, 'university of pittsburgh'), (2003, 'university of pittsburgh'), (2004, 'university of pittsburgh'), (2005, 'university of pittsburgh'), (2006, 'university of pittsburgh'), (2007, 'university of pittsburgh'), (2009, 'university of pittsburgh'), (2010, 'university of pittsburgh'), (2011, 'university of pittsburgh'), (2012, 'university of pittsburgh'), (2013, 'university of pittsburgh'), (2014, 'university of pittsburgh'), (2015, 'university of pittsburgh'), (2016, 'university of pittsburgh'), (2017, 'university of pittsburgh'), (2020, 'university of pittsburgh')) + +6/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : vadim +lastname : markel +middlename : a +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of pennsylvania'),) +all_us_institutions_year : ((2014, 'university of pennsylvania'),) + +firstname : vadim +lastname : markel +middlename : a +year_range : (1988, 2020) +main_us_institutions_year : ((1994, 'new mexico state university'), (1995, 'new mexico state university'), (1996, 'new mexico state university'), (1997, 'new mexico state university'), (1998, 'new mexico state university'), (1999, 'new mexico state university'), (1999, 'university of georgia'), (2000, 'washington university in st louis'), (2001, 'washington university in st louis'), (2002, 'washington university in st louis'), (2003, 'washington university in st louis'), (2004, 'university of pennsylvania'), (2005, 'university of pennsylvania'), (2006, 'university of pennsylvania'), (2007, 'university of pennsylvania'), (2008, 'university of pennsylvania'), (2009, 'university of pennsylvania'), (2010, 'university of pennsylvania'), (2011, 'university of pennsylvania'), (2012, 'university of pennsylvania'), (2013, 'university of pennsylvania'), (2014, 'university of pennsylvania'), (2015, 'university of pennsylvania'), (2018, 'university of pennsylvania'), (2019, 'university of pennsylvania'), (2020, 'university of pennsylvania')) +all_us_institutions_year : ((1994, 'new mexico state university'), (1995, 'new mexico state university'), (1996, 'new mexico state university'), (1997, 'new mexico state university'), (1998, 'new mexico state university'), (1999, 'new mexico state university'), (1999, 'university of georgia'), (2000, 'washington university in st louis'), (2001, 'new mexico state university'), (2001, 'university of washington'), (2001, 'washington university in st louis'), (2002, 'university of washington'), (2002, 'washington university in st louis'), (2003, 'washington university in st louis'), (2004, 'university of pennsylvania'), (2005, 'university of pennsylvania'), (2006, 'university of pennsylvania'), (2007, 'university of pennsylvania'), (2008, 'university of pennsylvania'), (2009, 'university of pennsylvania'), (2010, 'university of pennsylvania'), (2011, 'university of pennsylvania'), (2012, 'university of pennsylvania'), (2013, 'university of pennsylvania'), (2014, 'university of pennsylvania'), (2015, 'university of pennsylvania'), (2017, 'university of pennsylvania'), (2018, 'university of pennsylvania'), (2019, 'university of pennsylvania'), (2020, 'university of pennsylvania')) + +7/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chjan +lastname : lim +middlename : c +year_range : (1995,) +main_us_institutions_year : ((1995, 'rensselaer polytechnic institute'),) +all_us_institutions_year : ((1995, 'rensselaer polytechnic institute'),) + +firstname : chjan +lastname : lim +middlename : c +year_range : (1986, 2020) +main_us_institutions_year : ((1988, 'syracuse university'), (1988, 'university of michigan'), (1989, 'university of michigan'), (1991, 'rensselaer polytechnic institute'), (1993, 'rensselaer polytechnic institute'), (1995, 'rensselaer polytechnic institute'), (1996, 'rensselaer polytechnic institute'), (1998, 'rensselaer polytechnic institute'), (2001, 'rensselaer polytechnic institute'), (2003, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2013, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2015, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute'), (2017, 'rensselaer polytechnic institute'), (2020, 'rensselaer polytechnic institute')) +all_us_institutions_year : ((1988, 'syracuse university'), (1988, 'university of michigan'), (1989, 'university of michigan'), (1991, 'rensselaer polytechnic institute'), (1993, 'rensselaer polytechnic institute'), (1995, 'rensselaer polytechnic institute'), (1996, 'rensselaer polytechnic institute'), (1998, 'rensselaer polytechnic institute'), (2001, 'rensselaer polytechnic institute'), (2003, 'rensselaer polytechnic institute'), (2005, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2013, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2015, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute'), (2017, 'rensselaer polytechnic institute'), (2020, 'rensselaer polytechnic institute')) + +8/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jun +lastname : liu +middlename : s +year_range : (2013,) +main_us_institutions_year : ((2013, 'harvard university'),) +all_us_institutions_year : ((2013, 'harvard university'),) + +firstname : jun +lastname : liu +middlename : s +year_range : (1987, 2021) +main_us_institutions_year : ((1990, 'harvard university'), (1991, 'harvard university'), (1993, 'harvard university'), (1994, 'harvard university'), (1995, 'stanford university'), (1996, 'stanford university'), (1997, 'stanford university'), (1998, 'stanford university'), (1999, 'stanford university'), (2000, 'stanford university'), (2001, 'harvard university'), (2002, 'harvard university'), (2003, 'harvard university'), (2004, 'harvard university'), (2005, 'harvard university'), (2006, 'harvard university'), (2007, 'harvard university'), (2008, 'harvard university'), (2009, 'harvard university'), (2010, 'harvard university'), (2011, 'harvard university'), (2012, 'harvard university'), (2013, 'harvard university'), (2014, 'harvard university'), (2015, 'harvard university'), (2016, 'harvard university'), (2017, 'harvard university'), (2018, 'harvard university'), (2019, 'harvard university'), (2020, 'harvard university'), (2021, 'harvard university')) +all_us_institutions_year : ((1990, 'harvard university'), (1991, 'harvard university'), (1991, 'stanford university'), (1993, 'harvard university'), (1994, 'harvard university'), (1995, 'harvard university'), (1995, 'stanford university'), (1996, 'stanford university'), (1997, 'stanford university'), (1998, 'stanford university'), (1999, 'stanford university'), (2000, 'stanford university'), (2001, 'harvard university'), (2002, 'harvard university'), (2003, 'harvard university'), (2004, 'harvard university'), (2005, 'duke university'), (2005, 'harvard university'), (2006, 'harvard university'), (2006, 'university of pennsylvania'), (2007, 'harvard university'), (2008, 'harvard university'), (2008, 'university of california los angeles'), (2008, 'university of pennsylvania'), (2009, 'harvard university'), (2010, 'harvard university'), (2011, 'harvard university'), (2011, 'pennsylvania state university'), (2012, 'harvard university'), (2013, 'harvard university'), (2014, 'harvard university'), (2015, 'harvard university'), (2016, 'harvard university'), (2017, 'harvard university'), (2018, 'harvard university'), (2019, 'harvard university'), (2019, 'stanford university'), (2020, 'harvard university'), (2020, 'stanford university'), (2021, 'harvard university')) + +9/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : avner +lastname : freidman +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of minnesota'),) +all_us_institutions_year : ((1995, 'university of minnesota'),) + +firstname : avner +lastname : friedman +middlename : None +year_range : (1957, 2021) +main_us_institutions_year : ((1960, 'university of minnesota'), (1960, 'university of california berkeley'), (1962, 'northwestern university'), (1962, 'university of minnesota'), (1962, 'stanford university'), (1963, 'northwestern university'), (1964, 'northwestern university'), (1965, 'northwestern university'), (1967, 'northwestern university'), (1968, 'northwestern university'), (1969, 'northwestern university'), (1970, 'northwestern university'), (1971, 'northwestern university'), (1972, 'northwestern university'), (1973, 'northwestern university'), (1974, 'northwestern university'), (1975, 'northwestern university'), (1976, 'northwestern university'), (1977, 'northwestern university'), (1978, 'northwestern university'), (1979, 'northwestern university'), (1980, 'northwestern university'), (1981, 'northwestern university'), (1982, 'northwestern university'), (1983, 'northwestern university'), (1984, 'northwestern university'), (1985, 'northwestern university'), (1986, 'northwestern university'), (1987, 'purdue university'), (1988, 'purdue university'), (1989, 'university of minnesota'), (1990, 'university of minnesota'), (1991, 'university of minnesota'), (1992, 'university of minnesota'), (1993, 'university of minnesota'), (1994, 'university of minnesota'), (1995, 'university of minnesota'), (1996, 'university of minnesota'), (1997, 'university of minnesota'), (1998, 'university of minnesota'), (1999, 'university of minnesota'), (2000, 'university of minnesota'), (2001, 'university of minnesota'), (2002, 'university of minnesota'), (2003, 'ohio state university'), (2004, 'ohio state university'), (2005, 'ohio state university'), (2006, 'ohio state university'), (2007, 'ohio state university'), (2008, 'ohio state university'), (2009, 'ohio state university'), (2010, 'ohio state university'), (2011, 'ohio state university'), (2012, 'ohio state university'), (2013, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university'), (2016, 'ohio state university'), (2017, 'ohio state university'), (2018, 'ohio state university'), (2019, 'ohio state university'), (2020, 'ohio state university'), (2021, 'mathematical biosciences institute'), (2021, 'ohio state university')) +all_us_institutions_year : ((1960, 'university of california berkeley'), (1960, 'university of minnesota'), (1962, 'northwestern university'), (1962, 'stanford university'), (1962, 'university of minnesota'), (1963, 'northwestern university'), (1964, 'northwestern university'), (1965, 'northwestern university'), (1967, 'northwestern university'), (1968, 'northwestern university'), (1969, 'northwestern university'), (1970, 'northwestern university'), (1971, 'northwestern university'), (1972, 'northwestern university'), (1973, 'northwestern university'), (1974, 'northwestern university'), (1975, 'northwestern university'), (1976, 'northwestern university'), (1977, 'northwestern university'), (1977, 'university of pittsburgh'), (1978, 'northwestern university'), (1978, 'university of minnesota'), (1979, 'northwestern university'), (1980, 'northwestern university'), (1981, 'northwestern university'), (1982, 'northwestern university'), (1983, 'northwestern university'), (1983, 'university of minnesota'), (1984, 'northwestern university'), (1985, 'northwestern university'), (1985, 'university of chicago'), (1986, 'northwestern university'), (1986, 'purdue university'), (1987, 'northwestern university'), (1987, 'purdue university'), (1987, 'university of minnesota'), (1988, 'northern illinois university'), (1988, 'purdue university'), (1988, 'university of minnesota'), (1989, 'university of minnesota'), (1990, 'university of minnesota'), (1991, 'university of minnesota'), (1992, 'university of minnesota'), (1992, 'university of notre dame'), (1993, 'university of minnesota'), (1994, 'university of minnesota'), (1995, 'university of minnesota'), (1996, 'university of minnesota'), (1997, 'university of minnesota'), (1998, 'university of minnesota'), (1999, 'university of minnesota'), (2000, 'university of minnesota'), (2001, 'university of minnesota'), (2002, 'ohio state university'), (2002, 'university of minnesota'), (2003, 'ohio state university'), (2003, 'university of minnesota'), (2004, 'ohio state university'), (2005, 'mathematical biosciences institute'), (2005, 'ohio state university'), (2006, 'ohio state university'), (2007, 'ohio state university'), (2008, 'mathematical biosciences institute'), (2008, 'ohio state university'), (2009, 'ohio state university'), (2010, 'mathematical biosciences institute'), (2010, 'ohio state university'), (2011, 'mathematical biosciences institute'), (2011, 'ohio state university'), (2012, 'ohio state university'), (2013, 'mathematical biosciences institute'), (2013, 'ohio state university'), (2014, 'mathematical biosciences institute'), (2014, 'ohio state university'), (2015, 'ohio state university'), (2016, 'ohio state university'), (2016, 'southern illinois university edwardsville'), (2017, 'howard university'), (2017, 'mathematical biosciences institute'), (2017, 'ohio state university'), (2018, 'ohio state university'), (2019, 'mathematical biosciences institute'), (2019, 'ohio state university'), (2020, 'mathematical biosciences institute'), (2020, 'ohio state university'), (2021, 'mathematical biosciences institute'), (2021, 'ohio state university')) + +10/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yuhong +lastname : yang +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of minnesota'),) +all_us_institutions_year : ((2008, 'university of minnesota'),) + +firstname : yuhong +lastname : yan +middlename : None +year_range : (2000, 2020) +main_us_institutions_year : ((2005, 'national research council'), (2006, 'national research council'), (2007, 'national research council')) +all_us_institutions_year : ((2004, 'national research council'), (2005, 'national research council'), (2006, 'national research council'), (2007, 'national research council'), (2009, 'national research council')) + +11/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hong +lastname : wang +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of south carolina'),) +all_us_institutions_year : ((2009, 'university of south carolina'),) + +firstname : hong +lastname : yan +middlename : None +year_range : (1988, 2021) +main_us_institutions_year : ((1988, 'michigan state university'), (1992, 'university of washington'), (1994, 'university of washington'), (1995, 'university of washington'), (2001, 'university of texas at austin'), (2006, 'u s securities and exchange commission'), (2006, 'university of texas at austin'), (2007, 'university of south carolina'), (2008, 'university of south carolina'), (2010, 'university of south carolina'), (2010, 'binghamton university'), (2011, 'binghamton university')) +all_us_institutions_year : ((1988, 'michigan state university'), (1990, 'dupont'), (1990, 'university of michigan'), (1991, 'university of michigan'), (1992, 'university of washington'), (1994, 'university of washington'), (1995, 'university of washington'), (2001, 'university of texas at austin'), (2003, 'university of texas at austin'), (2005, 'university of south carolina'), (2006, 'u s securities and exchange commission'), (2006, 'university of texas at austin'), (2007, 'university of south carolina'), (2008, 'university of south carolina'), (2009, 'university of south carolina'), (2010, 'binghamton university'), (2010, 'university of south carolina'), (2011, 'binghamton university'), (2011, 'university of south carolina'), (2012, 'binghamton university'), (2013, 'binghamton university')) + +11/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : franks +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'northwestern university'),) +all_us_institutions_year : ((1994, 'northwestern university'),) + +firstname : john +lastname : francis +middlename : None +year_range : (2000, 2021) +main_us_institutions_year : ((2010, 'northwestern university'), (2012, 'northwestern university'), (2013, 'northwestern university'), (2014, 'northwestern university'), (2017, 'northwestern university'), (2018, 'northwestern university'), (2019, 'northwestern university'), (2020, 'northwestern university')) +all_us_institutions_year : ((2008, 'northwestern university'), (2010, 'northwestern university'), (2011, 'northwestern university'), (2012, 'northwestern university'), (2013, 'northwestern university'), (2014, 'northwestern university'), (2015, 'northwestern university'), (2017, 'northwestern university'), (2018, 'northwestern university'), (2019, 'northwestern university'), (2020, 'northwestern university')) + +11/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yu +lastname : cheng +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of pittsburgh'),) +all_us_institutions_year : ((2015, 'university of pittsburgh'),) + +firstname : yu +lastname : chen +middlename : None +year_range : (2016, 2020) +main_us_institutions_year : ((2019, 'university of pennsylvania'), (2020, 'university of pennsylvania')) +all_us_institutions_year : ((2018, 'university of pennsylvania'), (2019, 'university of pennsylvania'), (2020, 'university of pennsylvania')) + +11/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jun +lastname : li +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of california riverside'),) +all_us_institutions_year : ((2012, 'university of california riverside'),) + +firstname : jun +lastname : liu +middlename : None +year_range : (2011, 2020) +main_us_institutions_year : ((2011, 'chinese academy of sciences'), (2013, 'chinese academy of sciences')) +all_us_institutions_year : ((2011, 'chinese academy of sciences'), (2013, 'chinese academy of sciences')) + +11/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dennis +lastname : lin +middlename : k j +year_range : (2015,) +main_us_institutions_year : ((2015, 'pennsylvania state university'),) +all_us_institutions_year : ((2015, 'pennsylvania state university'),) + +firstname : j +lastname : li +middlename : k j +year_range : (1979, 2020) +main_us_institutions_year : ((1979, 'university of pennsylvania'), (1980, 'university of pennsylvania'), (1983, 'rutgers university'), (1984, 'rutgers university'), (1985, 'rutgers university'), (1986, 'rutgers university'), (1988, 'rutgers university'), (1989, 'rutgers university'), (1990, 'rutgers university'), (1991, 'rutgers university'), (1992, 'rutgers university'), (1993, 'rutgers university'), (1994, 'rutgers university'), (1995, 'rutgers university'), (1996, 'rutgers university'), (1997, 'rutgers university'), (1998, 'rutgers university'), (1999, 'rutgers university'), (2000, 'rutgers university'), (2001, 'rutgers university'), (2002, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2012, 'rutgers university'), (2013, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2017, 'rutgers university'), (2018, 'rutgers university'), (2019, 'rutgers university'), (2020, 'rutgers university')) +all_us_institutions_year : ((1977, 'university of pennsylvania'), (1979, 'university of pennsylvania'), (1980, 'university of pennsylvania'), (1983, 'rutgers university'), (1984, 'rutgers university'), (1985, 'rutgers university'), (1986, 'rutgers university'), (1988, 'rutgers university'), (1989, 'rutgers university'), (1990, 'rutgers university'), (1991, 'rutgers university'), (1992, 'rutgers university'), (1993, 'rutgers university'), (1994, 'rutgers university'), (1995, 'rutgers university'), (1996, 'rutgers university'), (1997, 'rutgers university'), (1998, 'rutgers university'), (1999, 'rutgers university'), (2000, 'rutgers university'), (2001, 'rutgers university'), (2002, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2004, 'university of pennsylvania'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2012, 'rutgers university'), (2013, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2016, 'university of pennsylvania'), (2017, 'rutgers university'), (2018, 'rutgers university'), (2018, 'university of pennsylvania'), (2019, 'rutgers university'), (2020, 'rutgers university')) + +11/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : c +lastname : warner +middlename : robert +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of maryland college park'),) +all_us_institutions_year : ((1998, 'university of maryland college park'),) + +firstname : christopher +lastname : warner +middlename : m +year_range : (2011, 2019) +main_us_institutions_year : ((2011, 'engineer research and development center'), (2012, 'engineer research and development center'), (2014, 'engineer research and development center'), (2015, 'engineer research and development center'), (2017, 'engineer research and development center'), (2018, 'engineer research and development center'), (2019, 'engineer research and development center')) +all_us_institutions_year : ((2011, 'engineer research and development center'), (2012, 'engineer research and development center'), (2014, 'engineer research and development center'), (2015, 'engineer research and development center'), (2016, 'engineer research and development center'), (2017, 'engineer research and development center'), (2018, 'engineer research and development center'), (2019, 'engineer research and development center'), (2019, 'united states army corps of engineers')) + +11/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : alan +lastname : weinstein +middlename : None +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of california berkeley'),) +all_us_institutions_year : ((1990, 'university of california berkeley'),) + +firstname : alan +lastname : weinstein +middlename : c +year_range : (1980, 2020) +main_us_institutions_year : ((1987, 'washington university in st louis'), (2009, 'cleveland state university')) +all_us_institutions_year : ((1987, 'washington university in st louis'), (1990, 'cleveland state university'), (1999, 'cleveland state university'), (2000, 'cleveland state university'), (2005, 'cleveland state university'), (2006, 'cleveland state university'), (2007, 'cleveland marshall college of law'), (2008, 'cleveland marshall college of law'), (2008, 'cleveland state university'), (2009, 'cleveland state university'), (2010, 'cleveland marshall college of law'), (2011, 'cleveland marshall college of law'), (2012, 'cleveland marshall college of law'), (2015, 'cleveland marshall college of law'), (2016, 'cleveland marshall college of law')) + +11/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : visarath +lastname : in +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'claremont graduate university'),) +all_us_institutions_year : ((2011, 'claremont graduate university'),) + +firstname : visarath +lastname : in +middlename : None +year_range : (1995, 2018) +main_us_institutions_year : ((1995, 'georgia institute of technology'), (1997, 'naval surface warfare center'), (1998, 'florida atlantic university'), (1998, 'naval surface warfare center'), (2000, 'georgia institute of technology'), (2000, 'naval surface warfare center')) +all_us_institutions_year : ((1995, 'georgia institute of technology'), (1997, 'georgia institute of technology'), (1997, 'naval surface warfare center'), (1998, 'florida atlantic university'), (1998, 'naval surface warfare center'), (2000, 'georgia institute of technology'), (2000, 'naval surface warfare center')) + +11/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : maarten +lastname : hoop +middlename : v de +year_range : (2012,) +main_us_institutions_year : ((2012, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2012, 'massachusetts institute of technology'),) + +firstname : maarten +lastname : hoop +middlename : v de +year_range : (2013, 2015) +main_us_institutions_year : ((2013, 'purdue university'), (2015, 'purdue university')) +all_us_institutions_year : ((2013, 'purdue university'), (2015, 'purdue university')) + +11/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : saltman +middlename : j +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of texas at austin'),) +all_us_institutions_year : ((2008, 'university of texas at austin'),) + +firstname : david +lastname : saltman +middlename : j +year_range : (2001, 2010) +main_us_institutions_year : ((2010, 'princeton university'),) +all_us_institutions_year : ((2010, 'princeton university'),) + +11/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : waiyuan +lastname : tan +middlename : None +year_range : (1991,) +main_us_institutions_year : ((1991, 'memphis state university'),) +all_us_institutions_year : ((1991, 'memphis state university'),) + +firstname : w +lastname : tan +middlename : y +year_range : (1981, 1988) +main_us_institutions_year : ((1981, 'university of memphis'), (1982, 'university of memphis'), (1983, 'university of memphis'), (1984, 'university of memphis'), (1988, 'university of memphis')) +all_us_institutions_year : ((1981, 'university of memphis'), (1982, 'university of memphis'), (1983, 'university of memphis'), (1984, 'university of memphis'), (1988, 'university of memphis')) + +12/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jon +lastname : mccammond +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of california santa barbara'),) +all_us_institutions_year : ((2008, 'university of california santa barbara'),) + +firstname : jonathan +lastname : mccammond +middlename : p +year_range : (1991, 1992) +main_us_institutions_year : ((1991, 'university of california berkeley'), (1992, 'university of california berkeley')) +all_us_institutions_year : ((1991, 'university of california berkeley'), (1992, 'university of california berkeley')) + +13/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hamid +lastname : krim +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'north carolina state university'),) +all_us_institutions_year : ((2012, 'north carolina state university'),) + +firstname : h +lastname : krim +middlename : None +year_range : (1990, 1996) +main_us_institutions_year : ((1990, 'northeastern university'), (1991, 'northeastern university'), (1993, 'northeastern university'), (1994, 'northeastern university'), (1995, 'northeastern university'), (1996, 'northeastern university')) +all_us_institutions_year : ((1990, 'northeastern university'), (1991, 'northeastern university'), (1993, 'northeastern university'), (1994, 'northeastern university'), (1995, 'northeastern university'), (1996, 'northeastern university')) + +13/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : wei +lastname : zhu +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'stony brook university'),) +all_us_institutions_year : ((2011, 'stony brook university'),) + +firstname : wei +lastname : zhu +middlename : None +year_range : (2005, 2020) +main_us_institutions_year : ((2005, 'new york university'), (2006, 'new york university'), (2006, 'university of california los angeles'), (2007, 'courant institute of mathematical sciences'), (2009, 'new york university'), (2010, 'courant institute of mathematical sciences'), (2011, 'university of alabama'), (2013, 'university of alabama'), (2015, 'university of alabama'), (2019, 'university of alabama'), (2020, 'university of alabama')) +all_us_institutions_year : ((2005, 'new york university'), (2006, 'new york university'), (2006, 'university of california los angeles'), (2007, 'courant institute of mathematical sciences'), (2009, 'new york university'), (2010, 'courant institute of mathematical sciences'), (2011, 'university of alabama'), (2013, 'university of alabama'), (2014, 'university of alabama'), (2015, 'university of alabama'), (2019, 'university of alabama'), (2020, 'university of alabama')) + +13/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 288.05011426210405 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_medicine_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_medicine_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..3137922 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_medicine_christoph_degree0_advisors_9015.log @@ -0,0 +1,688 @@ +Namespace(testing=False, verbose=1, field=['medicine'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [71924100] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008391976356506347 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 1392.6933927973112 minutes + +Starting active labeling... +firstname : wen +lastname : kao +middlename : hong linda +year_range : (2008,) +main_us_institutions_year : ((2008, 'johns hopkins university'),) +all_us_institutions_year : ((2008, 'johns hopkins university'),) + +firstname : wen +lastname : kao +middlename : hong linda +year_range : (1999, 2017) +main_us_institutions_year : ((1999, 'johns hopkins university'), (2000, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university'), (2014, 'johns hopkins university'), (2015, 'johns hopkins university'), (2016, 'johns hopkins university'), (2017, 'johns hopkins university school of medicine'), (2017, 'johns hopkins university')) +all_us_institutions_year : ((1999, 'johns hopkins university'), (2000, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university'), (2014, 'johns hopkins university'), (2015, 'johns hopkins university'), (2015, 'university of texas at austin'), (2016, 'johns hopkins university'), (2017, 'johns hopkins university'), (2017, 'johns hopkins university school of medicine')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : ronaldo +lastname : costa +middlename : c da +year_range : (2014,) +main_us_institutions_year : ((2014, 'ohio state university'),) +all_us_institutions_year : ((2014, 'ohio state university'),) + +firstname : ronaldo +lastname : costa +middlename : c da +year_range : (1969, 2020) +main_us_institutions_year : ((2010, 'ohio state university'), (2011, 'ohio state university'), (2012, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university'), (2016, 'ohio state university'), (2017, 'ohio state university'), (2018, 'ohio state university'), (2019, 'ohio state university'), (2020, 'ohio state university')) +all_us_institutions_year : ((2010, 'ohio state university'), (2011, 'ohio state university'), (2012, 'ohio state university'), (2014, 'ohio state university'), (2015, 'ohio state university'), (2016, 'ohio state university'), (2017, 'ohio state university'), (2018, 'ohio state university'), (2019, 'ohio state university'), (2020, 'ohio state university')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : allen +lastname : gao +middlename : c +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of california davis'),) +all_us_institutions_year : ((2014, 'university of california davis'),) + +firstname : allen +lastname : gao +middlename : c +year_range : (1996, 2021) +main_us_institutions_year : ((1996, 'johns hopkins university'), (1997, 'johns hopkins university school of medicine'), (1998, 'johns hopkins university'), (1999, 'university of pittsburgh'), (2000, 'university of pittsburgh'), (2002, 'university of pittsburgh'), (2003, 'roswell park cancer institute'), (2004, 'roswell park cancer institute'), (2005, 'roswell park cancer institute'), (2006, 'roswell park cancer institute'), (2007, 'roswell park cancer institute'), (2008, 'roswell park cancer institute'), (2009, 'university of california davis'), (2010, 'university of california davis'), (2011, 'university of california davis'), (2012, 'university of california davis'), (2013, 'university of california davis'), (2014, 'university of california davis'), (2015, 'university of california davis'), (2016, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis'), (2020, 'university of california davis'), (2021, 'university of california davis')) +all_us_institutions_year : ((1996, 'johns hopkins university'), (1997, 'johns hopkins university school of medicine'), (1998, 'johns hopkins university'), (1999, 'johns hopkins university'), (1999, 'university of pittsburgh'), (2000, 'university of pittsburgh'), (2002, 'university of pittsburgh'), (2003, 'roswell park cancer institute'), (2003, 'university of pittsburgh'), (2004, 'roswell park cancer institute'), (2005, 'roswell park cancer institute'), (2006, 'roswell park cancer institute'), (2006, 'university at buffalo'), (2007, 'roswell park cancer institute'), (2008, 'roswell park cancer institute'), (2008, 'university of california davis'), (2009, 'university of california davis'), (2010, 'university of california davis'), (2011, 'university of california davis'), (2012, 'university of california davis'), (2012, 'veterans health administration'), (2013, 'university of california davis'), (2014, 'university of california'), (2014, 'university of california davis'), (2015, 'university of california davis'), (2016, 'university of california'), (2016, 'university of california davis'), (2017, 'university of california davis'), (2018, 'university of california davis'), (2019, 'university of california davis'), (2020, 'university of california'), (2020, 'university of california davis'), (2021, 'university of california davis')) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ellen +lastname : robey +middlename : None +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of california berkeley'),) +all_us_institutions_year : ((2003, 'university of california berkeley'),) + +firstname : ellen +lastname : robey +middlename : a +year_range : (1984, 2021) +main_us_institutions_year : ((1990, 'howard hughes medical institute'), (1991, 'howard hughes medical institute'), (1992, 'howard hughes medical institute'), (1994, 'university of california berkeley'), (1995, 'university of california berkeley'), (1996, 'university of california berkeley'), (1997, 'university of california berkeley'), (1998, 'university of california berkeley'), (1999, 'university of california berkeley'), (2000, 'university of california berkeley'), (2001, 'university of california berkeley'), (2002, 'university of california berkeley'), (2003, 'university of california berkeley'), (2004, 'university of california berkeley'), (2005, 'university of california berkeley'), (2006, 'university of california berkeley'), (2007, 'university of california berkeley'), (2008, 'university of california berkeley'), (2009, 'university of california berkeley'), (2010, 'university of california berkeley'), (2011, 'university of california berkeley'), (2012, 'university of california berkeley'), (2013, 'university of california berkeley'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2016, 'university of california berkeley'), (2019, 'university of california berkeley'), (2020, 'university of california berkeley'), (2021, 'university of california berkeley')) +all_us_institutions_year : ((1990, 'howard hughes medical institute'), (1991, 'howard hughes medical institute'), (1991, 'icahn school of medicine at mount sinai'), (1992, 'howard hughes medical institute'), (1994, 'university of california berkeley'), (1995, 'university of california berkeley'), (1996, 'johns hopkins university'), (1996, 'university of california berkeley'), (1997, 'university of california berkeley'), (1998, 'university of california berkeley'), (1999, 'university of california berkeley'), (2000, 'university of california berkeley'), (2001, 'university of california berkeley'), (2002, 'university of california berkeley'), (2003, 'university of california berkeley'), (2004, 'university of california berkeley'), (2005, 'university of california berkeley'), (2006, 'university of california berkeley'), (2007, 'university of california berkeley'), (2008, 'university of california berkeley'), (2009, 'university of california berkeley'), (2010, 'university of california berkeley'), (2011, 'university of california berkeley'), (2012, 'university of california berkeley'), (2013, 'university of california berkeley'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2016, 'university of california berkeley'), (2018, 'university of california berkeley'), (2019, 'university of california berkeley'), (2020, 'university of california berkeley'), (2021, 'university of california berkeley')) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : penn +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'vanderbilt university'),) +all_us_institutions_year : ((2015, 'vanderbilt university'),) + +firstname : john +lastname : penn +middlename : s +year_range : (1985, 2021) +main_us_institutions_year : ((1987, 'baylor college of medicine'), (1988, 'baylor college of medicine'), (1989, 'baylor college of medicine'), (1990, 'baylor college of medicine'), (1990, 'university of arkansas for medical sciences'), (1991, 'university of arkansas for medical sciences'), (1992, 'university of arkansas for medical sciences'), (1993, 'university of arkansas for medical sciences'), (1994, 'university of arkansas for medical sciences'), (1995, 'university of arkansas for medical sciences'), (1996, 'university of arkansas for medical sciences'), (1997, 'university of arkansas for medical sciences'), (1998, 'university of arkansas for medical sciences'), (1998, 'wayne state university'), (2000, 'vanderbilt university'), (2001, 'vanderbilt university'), (2002, 'vanderbilt university'), (2003, 'vanderbilt university'), (2004, 'vanderbilt university'), (2005, 'vanderbilt university'), (2006, 'vanderbilt university'), (2007, 'vanderbilt university'), (2008, 'vanderbilt university'), (2009, 'vanderbilt university'), (2010, 'vanderbilt university'), (2011, 'vanderbilt university'), (2012, 'vanderbilt university'), (2013, 'vanderbilt university'), (2014, 'vanderbilt university'), (2015, 'vanderbilt university'), (2016, 'vanderbilt university'), (2017, 'vanderbilt university'), (2018, 'vanderbilt university'), (2019, 'vanderbilt university'), (2020, 'vanderbilt university medical center'), (2020, 'vanderbilt university'), (2021, 'vanderbilt university')) +all_us_institutions_year : ((1987, 'baylor college of medicine'), (1988, 'baylor college of medicine'), (1989, 'baylor college of medicine'), (1990, 'baylor college of medicine'), (1990, 'university of arkansas for medical sciences'), (1991, 'university of arkansas for medical sciences'), (1992, 'university of arkansas for medical sciences'), (1993, 'university of arkansas for medical sciences'), (1994, 'university of arkansas for medical sciences'), (1995, 'university of arkansas for medical sciences'), (1996, 'university of arkansas for medical sciences'), (1997, 'university of arkansas for medical sciences'), (1998, 'university of arkansas for medical sciences'), (1998, 'wayne state university'), (2000, 'vanderbilt university'), (2001, 'vanderbilt university'), (2002, 'vanderbilt university'), (2003, 'vanderbilt university'), (2004, 'vanderbilt university'), (2004, 'vanderbilt university medical center'), (2005, 'vanderbilt university'), (2006, 'vanderbilt university'), (2007, 'vanderbilt university'), (2008, 'vanderbilt university'), (2009, 'vanderbilt university'), (2010, 'vanderbilt university'), (2010, 'vanderbilt university medical center'), (2011, 'vanderbilt university'), (2011, 'vanderbilt university medical center'), (2012, 'vanderbilt university'), (2012, 'vanderbilt university medical center'), (2013, 'vanderbilt university'), (2013, 'vanderbilt university medical center'), (2014, 'vanderbilt university'), (2014, 'vanderbilt university medical center'), (2015, 'vanderbilt university'), (2016, 'vanderbilt university'), (2016, 'vanderbilt university medical center'), (2017, 'vanderbilt university'), (2017, 'vanderbilt university medical center'), (2018, 'vanderbilt university'), (2018, 'vanderbilt university medical center'), (2019, 'vanderbilt university'), (2019, 'vanderbilt university medical center'), (2020, 'vanderbilt university'), (2020, 'vanderbilt university medical center'), (2021, 'vanderbilt university'), (2021, 'vanderbilt university medical center')) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : garciamartinez +middlename : victor +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2015, 'university of north carolina at chapel hill'),) + +firstname : j +lastname : garcia +middlename : victor +year_range : (1999, 2017) +main_us_institutions_year : ((1999, 'university of texas southwestern medical center'), (2000, 'university of texas southwestern medical center'), (2001, 'university of texas southwestern medical center'), (2002, 'university of texas southwestern medical center'), (2003, 'university of texas southwestern medical center'), (2004, 'university of texas southwestern medical center'), (2005, 'university of texas southwestern medical center'), (2006, 'university of texas southwestern medical center'), (2007, 'university of texas southwestern medical center'), (2008, 'university of texas southwestern medical center'), (2009, 'university of texas southwestern medical center'), (2010, 'university of texas southwestern medical center'), (2012, 'university of north carolina at chapel hill'), (2013, 'university of north carolina at chapel hill'), (2014, 'university of north carolina at chapel hill'), (2016, 'university of north carolina at chapel hill'), (2017, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((1999, 'university of tennessee health science center'), (1999, 'university of texas southwestern medical center'), (2000, 'university of texas southwestern medical center'), (2001, 'university of texas southwestern medical center'), (2002, 'university of texas southwestern medical center'), (2003, 'university of texas southwestern medical center'), (2004, 'university of texas southwestern medical center'), (2005, 'university of texas southwestern medical center'), (2006, 'university of texas southwestern medical center'), (2007, 'university of texas southwestern medical center'), (2008, 'university of texas southwestern medical center'), (2009, 'university of texas southwestern medical center'), (2010, 'university of texas southwestern medical center'), (2012, 'university of north carolina at chapel hill'), (2013, 'university of north carolina at chapel hill'), (2014, 'university of north carolina at chapel hill'), (2016, 'university of north carolina at chapel hill'), (2017, 'university of north carolina at chapel hill')) + +5/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jun +lastname : liu +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'massachusetts institute of technology'),) +all_us_institutions_year : ((1999, 'massachusetts institute of technology'),) + +firstname : jun +lastname : li +middlename : None +year_range : (2006, 2014) +main_us_institutions_year : None +all_us_institutions_year : ((2011, 'chinese academy of sciences'),) + +6/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : arthur +lastname : weiss +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of california san francisco'),) +all_us_institutions_year : ((2015, 'university of california san francisco'),) + +firstname : arthur +lastname : weissman +middlename : d +year_range : (1987, 2011) +main_us_institutions_year : ((1987, 'national institute on drug abuse'), (1988, 'national institute on drug abuse'), (1989, 'national institute on drug abuse'), (1990, 'national institute on drug abuse'), (1991, 'national institute on drug abuse'), (1993, 'national institute on drug abuse'), (1994, 'national institute on drug abuse'), (2011, 'national institute on drug abuse')) +all_us_institutions_year : ((1987, 'national institute on drug abuse'), (1988, 'national institute on drug abuse'), (1989, 'national institute on drug abuse'), (1990, 'national institute on drug abuse'), (1991, 'national institute on drug abuse'), (1993, 'national institute on drug abuse'), (1994, 'national institute on drug abuse'), (2011, 'national institute on drug abuse')) + +6/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : barbara +lastname : osborne +middlename : a +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of massachusetts amherst'),) +all_us_institutions_year : ((2002, 'university of massachusetts amherst'),) + +firstname : barbara +lastname : osborn +middlename : h +year_range : (1992, 2010) +main_us_institutions_year : ((1992, 'duke university'), (1999, 'duke university'), (2002, 'duke university')) +all_us_institutions_year : ((1992, 'duke university'), (1999, 'duke university'), (2002, 'duke university')) + +6/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : green +middlename : i +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of pennsylvania'),) +all_us_institutions_year : ((1998, 'university of pennsylvania'),) + +firstname : mark +lastname : greenwood +middlename : j +year_range : (2000, 2010) +main_us_institutions_year : ((2000, 'spectrum health'), (2005, 'spectrum health'), (2006, 'spectrum health'), (2007, 'spectrum health'), (2009, 'spectrum health'), (2010, 'spectrum health')) +all_us_institutions_year : ((2000, 'spectrum health'), (2005, 'spectrum health'), (2006, 'spectrum health'), (2007, 'spectrum health'), (2009, 'spectrum health'), (2010, 'spectrum health')) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : wesley +lastname : voorhies +middlename : c van +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of washington'),) +all_us_institutions_year : ((2011, 'university of washington'),) + +firstname : wesley +lastname : voorhis +middlename : c van +year_range : (1992, 2021) +main_us_institutions_year : ((1992, 'university of washington'), (1993, 'university of washington'), (1996, 'university of washington'), (1997, 'university of washington'), (1998, 'university of washington'), (1999, 'university of washington'), (2000, 'university of washington'), (2001, 'university of washington'), (2002, 'university of washington'), (2003, 'university of washington'), (2004, 'university of washington'), (2005, 'university of washington'), (2006, 'university of washington'), (2007, 'university of washington'), (2008, 'university of washington'), (2009, 'university of washington'), (2010, 'university of washington'), (2011, 'university of washington'), (2012, 'university of washington'), (2013, 'university of washington'), (2014, 'university of washington'), (2015, 'university of washington'), (2016, 'university of washington'), (2017, 'university of washington'), (2018, 'university of washington'), (2019, 'university of washington'), (2020, 'university of washington'), (2021, 'university of washington')) +all_us_institutions_year : ((1992, 'university of washington'), (1993, 'university of washington'), (1996, 'university of washington'), (1997, 'university of washington'), (1998, 'university of washington'), (1999, 'university of washington'), (2000, 'university of washington'), (2001, 'university of washington'), (2002, 'university of washington'), (2002, 'yale university'), (2003, 'university of washington'), (2004, 'university of washington'), (2005, 'university of washington'), (2006, 'university of washington'), (2007, 'university of washington'), (2008, 'university of washington'), (2009, 'university of washington'), (2010, 'university of washington'), (2011, 'university of washington'), (2012, 'university of washington'), (2013, 'university of washington'), (2014, 'university of washington'), (2015, 'university of washington'), (2016, 'university of washington'), (2017, 'university of washington'), (2018, 'university of washington'), (2019, 'university of washington'), (2020, 'university of washington'), (2021, 'university of washington')) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : indira +lastname : raman +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'northwestern university'),) +all_us_institutions_year : ((2012, 'northwestern university'),) + +firstname : v +lastname : ram +middlename : j +year_range : (1979, 1992) +main_us_institutions_year : ((1982, 'northeastern university'), (1983, 'northeastern university'), (1984, 'mclean hospital'), (1984, 'harvard university'), (1984, 'northeastern university'), (1987, 'northeastern university'), (1991, 'northeastern university')) +all_us_institutions_year : ((1982, 'northeastern university'), (1983, 'northeastern university'), (1984, 'harvard university'), (1984, 'mclean hospital'), (1984, 'northeastern university'), (1987, 'northeastern university'), (1991, 'northeastern university')) + +7/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : collins +middlename : d +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of california los angeles'),) +all_us_institutions_year : ((1999, 'university of california los angeles'),) + +firstname : michael +lastname : cohen +middlename : d +year_range : (2003, 2021) +main_us_institutions_year : ((2003, 'university of california los angeles'), (2021, 'university of california los angeles')) +all_us_institutions_year : ((2003, 'university of california los angeles'), (2021, 'university of california los angeles')) + +7/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rebecca +lastname : fry +middlename : c +year_range : (2013,) +main_us_institutions_year : ((2013, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2013, 'university of north carolina at chapel hill'),) + +firstname : rebecca +lastname : holmberg +middlename : c +year_range : (1999, 2020) +main_us_institutions_year : ((1999, 'university of north carolina at chapel hill'), (2003, 'university of north carolina at chapel hill'), (2004, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((1999, 'university of north carolina at chapel hill'), (2003, 'university of north carolina at chapel hill'), (2004, 'university of north carolina at chapel hill')) + +7/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : gill +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'stanford university'),) +all_us_institutions_year : ((2015, 'stanford university'),) + +firstname : john +lastname : gillen +middlename : None +year_range : (1994, 1998) +main_us_institutions_year : ((1994, 'henry ford health system'),) +all_us_institutions_year : ((1994, 'henry ford health system'),) + +7/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gang +lastname : li +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'university of california los angeles'),) +all_us_institutions_year : ((2005, 'university of california los angeles'),) + +firstname : gang +lastname : liu +middlename : None +year_range : (2018, 2020) +main_us_institutions_year : ((2018, 'indiana university'), (2019, 'indiana university'), (2020, 'indiana university')) +all_us_institutions_year : ((2018, 'indiana university'), (2019, 'indiana university'), (2020, 'indiana university')) + +7/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : zink +middlename : chris +year_range : (2008,) +main_us_institutions_year : ((2008, 'johns hopkins university'),) +all_us_institutions_year : ((2008, 'johns hopkins university'),) + +firstname : christine +lastname : zink +middlename : None +year_range : (1989, 2021) +main_us_institutions_year : ((1989, 'johns hopkins university'), (1999, 'johns hopkins university'), (2008, 'johns hopkins university'), (2021, 'johns hopkins university school of medicine'), (2021, 'johns hopkins university')) +all_us_institutions_year : ((1989, 'johns hopkins university'), (1999, 'johns hopkins university'), (2008, 'johns hopkins university'), (2020, 'johns hopkins university'), (2020, 'johns hopkins university school of medicine'), (2021, 'johns hopkins university'), (2021, 'johns hopkins university school of medicine')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeffrey +lastname : siegel +middlename : a +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of texas at austin'),) +all_us_institutions_year : ((2010, 'university of texas at austin'),) + +firstname : jeffry +lastname : siegel +middlename : a +year_range : (2004, 2005) +main_us_institutions_year : ((2004, 'wellington management company'), (2005, 'wellington management company')) +all_us_institutions_year : ((2004, 'wellington management company'), (2005, 'wellington management company')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jianxing +lastname : ma +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of oklahoma health sciences center'),) +all_us_institutions_year : ((2014, 'university of oklahoma health sciences center'),) + +firstname : jianxin +lastname : ma +middlename : None +year_range : (2020, 2021) +main_us_institutions_year : ((2020, 'centers for disease control and prevention'), (2021, 'centers for disease control and prevention')) +all_us_institutions_year : ((2020, 'centers for disease control and prevention'), (2021, 'centers for disease control and prevention')) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ying +lastname : chen +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'southern illinois university at carbondale'),) +all_us_institutions_year : ((2014, 'southern illinois university at carbondale'),) + +firstname : yi +lastname : chen +middlename : hsing +year_range : (2016, 2018) +main_us_institutions_year : ((2016, 'memorial hospital of south bend'), (2018, 'memorial hospital of south bend')) +all_us_institutions_year : ((2016, 'memorial hospital of south bend'), (2018, 'memorial hospital of south bend')) + +7/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rakesh +lastname : jain +middlename : k +year_range : (1998,) +main_us_institutions_year : ((1998, 'massachusetts institute of technology'),) +all_us_institutions_year : ((1998, 'massachusetts institute of technology'),) + +firstname : r +lastname : jain +middlename : k +year_range : (1990, 1991) +main_us_institutions_year : ((1990, 'georgetown university'),) +all_us_institutions_year : ((1990, 'georgetown university'),) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : melissa +lastname : mahoney +middlename : j +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of colorado at boulder'),) +all_us_institutions_year : ((2010, 'university of colorado at boulder'),) + +firstname : m +lastname : mahoney +middlename : j +year_range : (1973, 2000) +main_us_institutions_year : ((1975, 'yale university'), (1976, 'yale university'), (1982, 'yale university'), (1985, 'yale university'), (1986, 'yale university'), (1987, 'yale university'), (1988, 'yale university'), (1991, 'yale university'), (1992, 'yale university'), (1994, 'yale university'), (1995, 'yale university'), (1999, 'yale university'), (2000, 'yale university')) +all_us_institutions_year : ((1975, 'yale university'), (1976, 'yale university'), (1982, 'yale university'), (1985, 'university of california san francisco'), (1985, 'university of connecticut'), (1985, 'yale university'), (1986, 'yale university'), (1987, 'yale university'), (1988, 'yale university'), (1991, 'yale university'), (1992, 'yale university'), (1994, 'yale university'), (1995, 'yale university'), (1999, 'yale university'), (2000, 'yale university')) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : satariano +middlename : a +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of california berkeley'),) +all_us_institutions_year : ((1998, 'university of california berkeley'),) + +firstname : william +lastname : satariano +middlename : None +year_range : (2005, 2010) +main_us_institutions_year : ((2005, 'university of california san francisco'), (2009, 'university of california san francisco'), (2010, 'university of california san francisco')) +all_us_institutions_year : ((2005, 'university of california san francisco'), (2009, 'university of california san francisco'), (2010, 'university of california san francisco')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : peggy +lastname : vesser +middlename : ingram +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of tennessee health science center'),) +all_us_institutions_year : ((2002, 'university of tennessee health science center'),) + +firstname : peggy +lastname : veeser +middlename : ingram +year_range : (1999, 2007) +main_us_institutions_year : ((1999, 'university of tennessee'), (2007, 'university of tennessee')) +all_us_institutions_year : ((1999, 'university of tennessee'), (2007, 'university of tennessee')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : livesey +middlename : c +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of the pacific'),) +all_us_institutions_year : ((2004, 'university of the pacific'),) + +firstname : john +lastname : livesey +middlename : c +year_range : (1984, 1996) +main_us_institutions_year : ((1984, 'oregon state university'), (1984, 'university of washington'), (1986, 'university of washington'), (1988, 'university of washington'), (1989, 'university of washington'), (1990, 'university of washington'), (1992, 'university of washington'), (1994, 'university of washington'), (1995, 'university of washington'), (1996, 'university of washington')) +all_us_institutions_year : ((1984, 'oregon state university'), (1984, 'university of washington'), (1986, 'university of washington'), (1988, 'university of washington'), (1989, 'university of washington'), (1990, 'university of washington'), (1991, 'university of washington medical center'), (1992, 'university of washington'), (1992, 'university of washington medical center'), (1993, 'university of washington'), (1993, 'university of washington medical center'), (1994, 'university of washington'), (1995, 'university of washington'), (1996, 'university of washington')) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : webb +middlename : clinton +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of michigan'),) +all_us_institutions_year : ((1995, 'university of michigan'),) + +firstname : r +lastname : webb +middlename : clinton +year_range : (2010, 2016) +main_us_institutions_year : ((2011, 'georgia regents university'), (2016, 'georgia regents university')) +all_us_institutions_year : ((2011, 'georgia regents university'), (2016, 'georgia regents university')) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : l +lastname : jones +middlename : colette +year_range : (1998,) +main_us_institutions_year : ((1998, 'university of san diego'),) +all_us_institutions_year : ((1998, 'university of san diego'),) + +firstname : l +lastname : jones +middlename : colette +year_range : (1986, 1990) +main_us_institutions_year : ((1990, 'university of nebraska omaha'),) +all_us_institutions_year : ((1990, 'university of nebraska omaha'),) + +8/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : macrina +middlename : m +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of alabama at birmingham'),) +all_us_institutions_year : ((2004, 'university of alabama at birmingham'),) + +firstname : david +lastname : macrina +middlename : m +year_range : (1981, 1995) +main_us_institutions_year : ((1981, 'university of illinois at urbana champaign'), (1985, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign'), (1988, 'university of illinois at urbana champaign')) +all_us_institutions_year : ((1981, 'university of illinois at urbana champaign'), (1985, 'university of illinois at urbana champaign'), (1986, 'university of illinois at urbana champaign'), (1987, 'university of illinois at urbana champaign'), (1988, 'university of illinois at urbana champaign')) + +8/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : madeleine +lastname : hellman +middlename : a +year_range : (2011,) +main_us_institutions_year : ((2011, 'nova southeastern university'),) +all_us_institutions_year : ((2011, 'nova southeastern university'),) + +firstname : m +lastname : hellman +middlename : None +year_range : (2010, 2020) +main_us_institutions_year : ((2014, 'nova southeastern university'), (2015, 'nova southeastern university'), (2017, 'nova southeastern university'), (2018, 'nova southeastern university'), (2020, 'nova southeastern university')) +all_us_institutions_year : ((2014, 'nova southeastern university'), (2015, 'nova southeastern university'), (2017, 'nova southeastern university'), (2018, 'nova southeastern university'), (2020, 'nova southeastern university')) + +8/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : h +lastname : ananthaswamy +middlename : n +year_range : (1994,) +main_us_institutions_year : ((1994, 'university of texas graduate school of biomedical sciences at houston'),) +all_us_institutions_year : ((1994, 'university of texas graduate school of biomedical sciences at houston'),) + +firstname : honnavara +lastname : ananthaswamy +middlename : n +year_range : (1981, 2010) +main_us_institutions_year : ((1989, 'university of texas md anderson cancer center'), (1990, 'university of texas md anderson cancer center'), (1991, 'university of texas md anderson cancer center'), (1994, 'university of texas md anderson cancer center'), (1995, 'university of texas md anderson cancer center'), (1996, 'university of texas md anderson cancer center'), (1997, 'university of texas md anderson cancer center'), (1998, 'university of texas md anderson cancer center'), (1999, 'university of texas md anderson cancer center'), (2000, 'university of texas md anderson cancer center'), (2001, 'university of texas md anderson cancer center'), (2002, 'university of texas md anderson cancer center'), (2003, 'university of texas md anderson cancer center'), (2004, 'university of texas md anderson cancer center'), (2005, 'university of texas md anderson cancer center'), (2006, 'university of texas md anderson cancer center'), (2007, 'university of texas md anderson cancer center'), (2008, 'university of texas md anderson cancer center'), (2010, 'university of texas md anderson cancer center')) +all_us_institutions_year : ((1989, 'university of texas md anderson cancer center'), (1990, 'university of texas md anderson cancer center'), (1991, 'university of texas md anderson cancer center'), (1994, 'university of texas md anderson cancer center'), (1995, 'university of texas md anderson cancer center'), (1996, 'university of texas md anderson cancer center'), (1997, 'university of texas md anderson cancer center'), (1998, 'university of texas md anderson cancer center'), (1999, 'university of texas md anderson cancer center'), (2000, 'university of texas md anderson cancer center'), (2001, 'university of texas md anderson cancer center'), (2002, 'university of texas md anderson cancer center'), (2003, 'university of texas md anderson cancer center'), (2004, 'university of texas md anderson cancer center'), (2005, 'university of texas md anderson cancer center'), (2006, 'university of texas md anderson cancer center'), (2007, 'university of texas md anderson cancer center'), (2008, 'university of texas md anderson cancer center'), (2010, 'university of texas md anderson cancer center'), (2017, 'university of texas md anderson cancer center')) + +9/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : leona +lastname : vandevusse +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'marquette university'),) +all_us_institutions_year : ((2010, 'marquette university'),) + +firstname : leona +lastname : vandevusse +middlename : None +year_range : (1997, 2019) +main_us_institutions_year : ((1997, 'marquette university college of nursing'), (1999, 'marquette university college of nursing'), (2000, 'marquette university college of nursing'), (2006, 'marquette university college of nursing'), (2007, 'marquette university college of nursing'), (2009, 'marquette university college of nursing'), (2010, 'marquette university college of nursing'), (2019, 'marquette university college of nursing')) +all_us_institutions_year : ((1997, 'marquette university college of nursing'), (1999, 'marquette university college of nursing'), (2000, 'marquette university college of nursing'), (2006, 'marquette university college of nursing'), (2007, 'marquette university college of nursing'), (2009, 'marquette university college of nursing'), (2010, 'marquette university college of nursing'), (2019, 'marquette university college of nursing')) + +10/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : english +middlename : william +year_range : (2005,) +main_us_institutions_year : ((2005, 'florida state university'),) +all_us_institutions_year : ((2005, 'florida state university'),) + +firstname : robert +lastname : english +middlename : a +year_range : (2006, 2019) +main_us_institutions_year : ((2012, 'university of kentucky'), (2013, 'university of kentucky'), (2014, 'university of kentucky'), (2015, 'university of florida'), (2016, 'university of kentucky'), (2017, 'university of kentucky'), (2018, 'university of kentucky'), (2019, 'university of kentucky')) +all_us_institutions_year : ((2012, 'university of kentucky'), (2013, 'university of kentucky'), (2014, 'university of kentucky'), (2015, 'university of florida'), (2016, 'university of kentucky'), (2017, 'university of kentucky'), (2018, 'university of kentucky'), (2019, 'university of kentucky')) + +11/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 1481.138816777865 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_medicine_christoph_degree0_graduates_8515.log b/src/dataprep/temp/trainlink_mag_proquest_medicine_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..790e56b --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_medicine_christoph_degree0_graduates_8515.log @@ -0,0 +1,594 @@ +Namespace(testing=False, verbose=1, field=['medicine'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=50000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [71924100] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0007546861966451009 minutes + + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +Time elapsed: 677.535956176122 minutes + +Starting active labeling... +firstname : kristin +lastname : hollister +middlename : None +year : 2013 +year_papertitle : ((2013, 'insights into the role of bcl6 in follicular th cells using a new conditional mutant mouse model'), (2013, 'role of bcl6 and pd 1 in cd4 memory t cell development p1165'), (2013, 'the transcription factor twist1 limits t helper 17 and t follicular helper cell development by repressing the gene encoding the interleukin 6 receptor α chain'), (2014, 'a negative feedback loop mediated by the bcl6 cullin 3 complex limits tfh cell differentiation'), (2014, 'elucidating the role of bcl6 in helper t cell activation proliferation and differentiation'), (2014, 'the role of follicular helper t cells and the germinal center in hiv 1 gp120 dna prime and gp120 protein boost vaccination'), (2014, 'virus encoded ectopic cd74 enhances poxvirus vaccine efficacy'), (2015, 'clues to follicular helper t cell function from bcl6 target genes irm15p 457'), (2017, 'bcl6 promotes follicular helper t cell differentiation and pd 1 expression in a blimp1 independent manner in mice'), (2017, 'levels of circulating follicular helper t cells and the prognostic significance of cd40 ligand on survival in patients with alcoholic liver disease')) +keywords : frozenset({'virology', 'neuroscience', 'cell biology', 'immunology', 'genetics', 'molecular biology'}) + +firstname : kristin +lastname : hollister +middlename : n +year : 2014 +year_papertitle : ((2014, 'elucidating the role of bcl6 in helper t cell activation proliferation and differentiation'),) +keywords : frozenset({'immunology', 'microbiology'}) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : minoli +lastname : perera +middlename : a +year : 1998 +year_papertitle : ((1998, 'immunohistochemical localization of darpp32 in striatal projection neurons and striatal interneurons in pigeons'), (2002, 'novel nonsteroidal ligands with high binding affinity and potent functional activity for the androgen receptor'), (2003, 'key structural features of nonsteroidal ligands for binding and activation of the androgen receptor'), (2003, 'the pharmacology pharmacokinetics and metabolism of a novel nonsteroidal selective androgen receptor modulator')) +keywords : frozenset({'bioinformatics', 'pharmacology', 'neuroscience', 'cell biology', 'stereochemistry'}) + +firstname : minoli +lastname : perera +middlename : a +year : 2003 +year_papertitle : ((2003, 'the pharmacology pharmacokinetics and metabolism of a novel nonsteroidal selective androgen receptor modulator'),) +keywords : frozenset({'pharmacology', 'pharmaceuticals'}) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : julie +lastname : nair +middlename : mcculloh +year : 2015 +year_papertitle : ((2015, 'alcohol misuse among nursing students'), (2015, 'substance abuse policy among nursing students a scoping review'), (2016, 'alcohol use misuse and abuse among nursing students a photovoice study'), (2018, 'measures of wellness in young adult college students an integrative review'), (2020, 'increasing nursing research capacity the roles and contributions of nurse scientists within healthcare systems in the greater philadelphia region'), (2020, 'novice nurse support group a pilot study'), (2021, 'facilitators and challenges in the adoption of a virtual nurse visit in the home health setting')) +keywords : frozenset({'medical education', 'nursing'}) + +firstname : julie +lastname : nair +middlename : mcculloh +year : 2014 +year_papertitle : ((2014, 'alcohol use misuse and abuse among nursing students a photovoice study'),) +keywords : frozenset({'health education', 'nursing'}) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : upendra +lastname : dahal +middlename : p +year : 2009 +year_papertitle : ((2009, 'acid catalyzed conjugate additions to 3 fluorobutenone'), (2011, 'small molecule quantification by liquid chromatography mass spectrometry for metabolites of drugs and drug candidates'), (2011, 'the kinetic mechanism for cytochrome p450 metabolism of type ii binding compounds evidence supporting direct reduction'), (2012, 'comparative study of the affinity and metabolism of type i and type ii binding quinoline carboxamide analogues by cytochrome p450 3a4'), (2012, 'effect of iron nitrogen coordination type ii binding on drug metabolism by cytochrome p450 studies to understand the kinetics metabolic stability and regioselectivity'), (2013, 'benchmarking in vitro covalent binding burden as a tool to assess potential toxicity caused by nonspecific covalent binding of covalent drugs'), (2014, 'chemical and computational methods for the characterization of covalent reactive groups for the prospective design of irreversible inhibitors'), (2015, 'a tag free collisionally induced fragmentation approach to detect drug adducted proteins by mass spectrometry'), (2016, 'intrinsic reactivity profile of electrophilic moieties to guide covalent drug design n α acetyl l lysine as an amine nucleophile'), (2016, 'the role of protein protein and protein membrane interactions on p450 function')) +keywords : frozenset({'combinatorial chemistry', 'pharmacology', 'biochemistry', 'medicinal chemistry', 'cell biology', 'chromatography', 'stereochemistry', 'organic chemistry'}) + +firstname : upendra +lastname : dahal +middlename : purush +year : 2012 +year_papertitle : ((2012, 'effect of iron nitrogen coordination type ii binding on drug metabolism by cytochrome p450 studies to understand the kinetics metabolic stability and regioselectivity'),) +keywords : frozenset({'pharmacology', 'biochemistry', 'organic chemistry'}) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jiayu +lastname : liao +middlename : None +year : 1996 +year_papertitle : ((1996, 'enhancement of antiproliferative activity of gamma interferon by the specific inhibition of tyrosine dephosphorylation of stat1'), (1997, 'specific inhibition of stat3 signal transduction by pias3'), (1998, 'inhibition of stat1 mediated gene activation by pias1'), (2000, 'distinct roles of the nh2 and cooh terminal domains of the protein inhibitor of activated signal transducer and activator of transcription stat 1 pias1 in cytokine induced pias1 stat1 interaction'), (2000, 'protein inhibitor of activated stat 1 signal transducer and activator of transcription 1 is a nuclear receptor coregulator expressed in human testis'), (2003, 'sphingosine 1 phosphate pathway therapeutics a lipid ligand receptor paradigm'), (2003, 'three sweet receptor genes are clustered in human chromosome 1')) +keywords : frozenset({'biochemistry', 'cell biology', 'computational biology', 'genetics', 'molecular biology', 'cancer research'}) + +firstname : jiayu +lastname : liao +middlename : None +year : 1999 +year_papertitle : ((1999, 'protein inhibitors of activated stats pias in cytokine signaling'),) +keywords : frozenset({'molecular biology', 'immunology'}) + +4/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : danielle +lastname : teel +middlename : f w +year : 2008 +year_papertitle : ((2008, 'design and validation of an infrared badal optometer for laser speckle'), (2008, 'longitudinal chromatic aberration of the human infant eye'), (2010, 'accommodation and vergence responses to hyperopic demands during infancy and childhood'), (2012, 'the influence of accommodation and vergence coupling during visual development'), (2014, 'differences between wavefront and subjective refraction for infrared light')) +keywords : frozenset({'artificial intelligence', 'computer vision', 'optics', 'classical mechanics', 'optometry'}) + +firstname : danielle +lastname : teel +middlename : f w +year : 2013 +year_papertitle : ((2013, 'the role of interactions between accommodation and vergence in human visual development'),) +keywords : frozenset({'optics', 'ophthalmology'}) + +5/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : min +lastname : li +middlename : None +year : 2002 +year_papertitle : ((2002, 'effects of upright and supine orientation on tongue position during silence'), (2002, 'motion based post processing of deformable contours'), (2004, 'a general framework for 2d multiframe and 3d surface to surface motion estimation'), (2004, 'spline based motion recovery for 3d surfaces using nonrigid shape properties'), (2005, 'automatic contour tracking in ultrasound images'), (2006, 'adaptive appearance based face recognition'), (2007, 'comparison of speech production in upright and supine position')) +keywords : frozenset({'artificial intelligence', 'audiology', 'acoustics', 'computer vision', 'orthodontics', 'communication', 'mathematical analysis'}) + +firstname : min +lastname : liu +middlename : None +year : 2006 +year_papertitle : ((2006, 'speaking the unspeakable chinese women s condom use communication'),) +keywords : frozenset({'communication', 'public health', 'womens studies'}) + +6/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : edward +lastname : meyers +middlename : None +year : 1981 +year_papertitle : ((1981, 'case report gallium study showing a rare form of multiple myeloma'), (1981, 'unusual tc 99m pipida images'), (1983, 'renal and liver scans showing polycystic disease')) +keywords : frozenset({'nuclear magnetic resonance', 'radiology', 'nuclear medicine', 'pathology'}) + +firstname : edward +lastname : meyer +middlename : None +year : 2014 +year_papertitle : ((2014, 'behavioral biochemical and physiological components of stress enhanced fear learning in an animal model of post traumatic stress disorder'),) +keywords : frozenset({'molecular biology', 'neurosciences', 'physiology'}) + +6/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : pamela +lastname : mayberry +middlename : s +year : 1999 +year_papertitle : ((1999, 'what we didn t learn because of who would not talk to us'), (2001, 'caring too much cultural lag in african americans perceptions of filial responsibilities')) +keywords : frozenset({'medical education', 'social psychology', 'developmental psychology'}) + +firstname : pamela +lastname : may +middlename : e +year : 2015 +year_papertitle : ((2015, 'engagement in activities and cognitive functioning among older adults in the health and retirement study'),) +keywords : frozenset({'psychology', 'gerontology'}) + +6/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dawn +lastname : cecil +middlename : k +year : 2000 +year_papertitle : ((2000, 'the effectiveness of adult basic education and life skills programs in reducing recidivism a review'), (2000, 'the effectiveness of adult basic education and life skills programs in reducing recidivism a review and assessment of the research'), (2001, 'marital rape a student assessment of rape laws and the marital exemption'), (2006, 'violence privilege and power images of female delinquents in film'), (2007, 'looking beyond caged heat media images of women in prison')) +keywords : frozenset({'clinical psychology', 'social psychology', 'applied psychology', 'computer security', 'media studies'}) + +firstname : dawn +lastname : mechanichamilton +middlename : j +year : 2010 +year_papertitle : ((2010, 'neural plasticity following anterior temporal lobectomy'),) +keywords : frozenset({'clinical psychology', 'neurosciences'}) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jing +lastname : feng +middlename : None +year : 2005 +year_papertitle : ((2005, 'life threatening blood loss from scratching provoked by pruritus in the bulky perineal nevocytoma variant of giant congenital melanocytic nevus in a child'), (2006, 'fatal childhood calciphylaxis in a 10 year old and literature review')) +keywords : frozenset({'dermatology', 'surgery'}) + +firstname : hengsheng +lastname : feng +middlename : None +year : 2008 +year_papertitle : ((2008, 'in vitro determination of drug diffusion coefficients in viscous media using pulsatile microdialysis theory and method development'),) +keywords : frozenset({'pharmacology', 'pharmacy sciences'}) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hao +lastname : li +middlename : None +year : 2018 +year_papertitle : ((2018, 'impact of hiv 1 crf55_01b infection on cd4 counts and viral load in men who have sex with men naive to antiretroviral treatment'), (2020, 'viral rna level serum antibody responses and transmission risk in recovered covid 19 patients with recurrent positive sars cov 2 rna test results a population based observational cohort study'), (2021, 'molecular surveillance of hiv 1 newly diagnosed infections in shenzhen china from 2011 to 2018'), (2021, 'trends of hiv syphilis hsv 2 seropositive rate and factors associated with hsv 2 infection in men who have sex with men in shenzhen china a retrospective study')) +keywords : frozenset({'gastroenterology', 'demography', 'internal medicine'}) + +firstname : yanen +lastname : li +middlename : None +year : 2007 +year_papertitle : ((2007, 'medicaid risk adjustment model with diagnosis and pharmacy based adjusters does it work'),) +keywords : frozenset({'health care', 'gerontology'}) + +6/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michel +lastname : taylor +middlename : a +year : 2018 +year_papertitle : ((2018, 'improved reduction of the tibiofibular syndesmosis with tightrope compared to screw fixation results of a randomized controlled study'), (2018, 'increased early revision rate with the infinity total ankle prosthesis'), (2018, 'keeping it in the fairway golf handicap following total ankle arthroplasty'), (2018, 'optimizing outpatient total ankle replacement from clinic to pain management'), (2018, 'preoperative picture improves postoperative satisfaction in bunion surgery'), (2018, 'total ankle arthroplasty with simultaneous versus secondary hindfoot arthrodesis'), (2019, 'a comparison of cyst formation and management in mobile bearing and fixed bearing total ankle arthroplasty'), (2019, 'hindfoot arthrodesis screw position and trajectory effect on talus subsidence when performed with total ankle arthroplasty'), (2019, 'risk factors for failure of total ankle arthroplasty with a minimum five years of follow up')) +keywords : frozenset({'orthodontics', 'physical therapy', 'surgery'}) + +firstname : michele +lastname : taylor +middlename : marie +year : 2012 +year_papertitle : ((2012, 'characterization of impulsive like behavior produced by developmental deltamethrin exposure role of dopaminergic dysfunction'),) +keywords : frozenset({'neurosciences', 'surgery', 'toxicology'}) + +6/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sharon +lastname : alford +middlename : hensley +year : 2019 +year_papertitle : ((2019, 'cost of leukopenia and neutropenia in metastatic breast cancer within last 12 36 and 60 months using a curated disease model'), (2019, 'impact of using real world outcomes versus clinical evidence and list prices on value assessments'), (2020, 'lenalidomide use in multiple myeloma'), (2020, 'trends in alk inhibitors for non small cell lung cancer')) +keywords : frozenset({'internal medicine', 'intensive care medicine', 'actuarial science', 'cancer research', 'oncology'}) + +firstname : sharon +lastname : alford +middlename : m hensley +year : 2009 +year_papertitle : ((2009, 'molecular epidemiology racial ethnic differences and chemoprevention of breast cancer population based studies from metropolitan detroit'),) +keywords : frozenset({'molecular biology', 'epidemiology', 'public health'}) + +6/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : huifang +lastname : li +middlename : None +year : 2010 +year_papertitle : ((2010, 'cdk5 nuclear localization is p27 dependent in nerve cells implications for cell cycle suppression and caspase 3 activation'), (2010, 'cdk5 suppresses the neuronal cell cycle by disrupting the e2f1 dp1 complex'), (2010, 'nucleocytoplasmic cdk5 links cell cycle and cell death in post mitotic neurons in alzheimer s disease'), (2012, 'cdk5 levels oscillate during the neuronal cell cycle cdh1 ubiquitination triggers proteosome dependent degradation during s phase'), (2012, 'neuronal cell cycle regulation of cdk5 in alzheimer s disease'), (2014, 'cdk5 activator protein p25 preferentially binds and activates gsk3β'), (2015, 'correction cdk5 activator protein p25 preferentially binds and activates gsk3β'), (2015, 'cyclin dependent kinase 5 decreases in gastric cancer and its nuclear accumulation suppresses gastric tumorigenesis'), (2015, 'the roles of cdk5 mediated subcellular localization of foxo1 in neuronal death'), (2016, 'quercetin stabilizes apolipoprotein e and reduces brain aβ levels in amyloid model mice')) +keywords : frozenset({'endocrinology', 'neuroscience', 'pathology', 'cell biology', 'molecular biology', 'cancer research', 'internal medicine'}) + +firstname : huifang +lastname : li +middlename : None +year : 1997 +year_papertitle : ((1997, 'mechanisms of acetylcholine modulation of layer ii iii cell excitability and synaptic transmission in rat somatosensory neocortex'),) +keywords : frozenset({'pharmacology', 'neurology'}) + +6/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jun +lastname : xu +middlename : None +year : 2004 +year_papertitle : ((2004, 'a novel epitope of n cam defines precursors of human adherent nk cells'), (2007, 'essential role of the tnf tnfr2 cognate interaction in mouse dendritic cell natural killer cell crosstalk'), (2008, 'p1 369 the impaired suppression function of peripheral blood cd4 cd25hi regulatory t cells in patients with alzheimer s disease'), (2009, 'indirubin 3 monoxime inhibits β amyloid induced neurotoxicity in neuroblastoma sh sy5y cells'), (2010, 'intracerebroventricular injection of tnf alpha antibody reduces amyloid plaques and neurofibrillary tangles in aged app ps1 mice via rapid recruitment of peripheral dendritic cells'), (2011, 'anti tnf α reduces amyloid plaques and tau phosphorylation and induces cd11c positive dendritic like cell in the app ps1 transgenic mouse brains'), (2011, 'cognitive improvement with intrathecal administration of infliximab in a woman with alzheimer s disease'), (2011, 'short amyloid beta immunogens show strong immunogenicity and avoid stimulating pro inflammatory pathways in bone marrow derived dendritic cells from c57bl 6j mice in vitro')) +keywords : frozenset({'pediatrics', 'pharmacology', 'pathology', 'cell biology', 'immunology', 'surgery', 'cancer research'}) + +firstname : jun +lastname : xu +middlename : None +year : 2004 +year_papertitle : ((2004, 'characterization of peripheral and hepatic insulin sensitivity by metabolic flux analysis'),) +keywords : frozenset({'molecular biology', 'physiology'}) + +6/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : nancy +lastname : bush +middlename : ann oberheim +year : 2013 +year_papertitle : ((2013, 'neuro medical oncology'), (2015, 'the longitudinal evaluation of quality of life in patients with newly diagnosed malignant glioma p4 213'), (2016, 'pseudoprogression in neuro oncology'), (2016, 'pseudoprogression in neuro oncology overview pathophysiology and interpretation'), (2016, 'treatment strategies for low grade glioma in adults'), (2017, 'current and future strategies for treatment of glioma'), (2017, 'diffuse low grade gliomas'), (2017, 'diffuse non midline glioma with h3f3a k27m mutation a prognostic and treatment dilemma'), (2017, 'do evolutionary changes in astrocytes contribute to the computational power of the hominid brain'), (2017, 'the effect of molecular diagnostics on the treatment of glioma')) +keywords : frozenset({'medical physics', 'radiology', 'physical therapy', 'psychoanalysis', 'intensive care medicine', 'neuroscience', 'pathology', 'surgery', 'cancer research'}) + +firstname : nancy +lastname : bush +middlename : ann oberheim +year : 2008 +year_papertitle : ((2008, 'the functional organization of astrocytes in normal and epileptic brain'),) +keywords : frozenset({'neurosciences'}) + +6/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : patricia +lastname : stein +middlename : None +year : 2010 +year_papertitle : ((2010, 'managing the patient receiving moderate sedation analgesia'), (2010, 'silence can hurt patients'), (2010, 'the busy manager s guide to delegation'), (2011, 'clinical nursing pocket guide'), (2012, 'complications in surgery'), (2013, 'care of the older adult in surgery'), (2013, 'perioperative medication safety practices')) +keywords : frozenset({'management', 'anesthesia', 'psychoanalysis', 'intensive care medicine', 'nursing', 'surgery', 'general surgery', 'family medicine'}) + +firstname : patricia +lastname : stein +middlename : reeber +year : 1991 +year_papertitle : ((1991, 'life events self esteem and powerlessness among adolescents'),) +keywords : frozenset({'developmental psychology', 'nursing'}) + +7/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : amy +lastname : griffin +middlename : l +year : 2000 +year_papertitle : ((2000, 'developing a geographic visualization tool to support earth science learning'), (2000, 'medial septal microinfusion of scopolamine disrupts hippocampal activity and trace jaw movement conditioning'), (2001, 'feeling it out the use of haptic visualization for exploratory geographic analysis'), (2002, 'reversible septal inactivation disrupts hippocampal slow wave and unit activity and impairs trace conditioning in rabbits oryctolagus cuniculus'), (2004, 'differential mastication kinematics of the rabbit in response to food and water implications for conditioned movement'), (2004, 'inactivation of the anterior cingulate cortex impairs extinction of rabbit jaw movement conditioning and prevents extinction related inhibition of hippocampal activity'), (2004, 'theta contingent trial presentation accelerates learning rate and enhances hippocampal plasticity during trace eyeblink conditioning'), (2005, 'nonpharmacological amelioration of age related learning deficits the impact of hippocampal θ triggered training'), (2006, 'strapping of the hemiplegic upper limb delays onset of shoulder pain a randomised controlled trial'), (2006, 'strapping the hemiplegic shoulder prevents development of pain during rehabilitation a randomized controlled trial')) +keywords : frozenset({'anesthesia', 'multimedia', 'endocrinology', 'anatomy', 'neuroscience', 'data science', 'knowledge management', 'physical medicine and rehabilitation', 'human computer interaction', 'physical therapy', 'internal medicine'}) + +firstname : amy +lastname : griffin +middlename : louise +year : 2004 +year_papertitle : ((2004, 'understanding how scientists use data display devices for interactive visual computing with geographical models'),) +keywords : frozenset({'public health', 'geography', 'computer science'}) + +7/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : janet +lastname : santo +middlename : abboud dal +year : 2009 +year_papertitle : ((2009, 'characteristics of teens with and without work permits'), (2010, 'effects of work permits on illegal employment among youth workers findings of a school based survey on child labor violations')) +keywords : frozenset({'demographic economics', 'pediatrics', 'gerontology'}) + +firstname : janet +lastname : santo +middlename : abboud dal +year : 1993 +year_papertitle : ((1993, 'childhood unintentional injuries factors predicting common injuries among preschoolers'),) +keywords : frozenset({'public health'}) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : omar +lastname : duramad +middlename : None +year : 2000 +year_papertitle : ((2000, 'erk5 is a novel type of mitogen activated protein kinase containing a transcriptional activation domain'), (2000, 'the tnf receptor family member cd30 is not essential for negative selection'), (2003, 'il 10 regulates plasmacytoid dendritic cell response to cpg containing immunostimulatory sequences'), (2005, 'inhibitors of tlr 9 act on multiple cell subsets in mouse and man in vitro and prevent death in vivo from systemic inflammation'), (2005, 'nucleic acids of mammalian origin can act as endogenous ligands for toll like receptors and may promote systemic lupus erythematosus'), (2005, 'tslp activated dendritic cells induce an inflammatory t helper type 2 cell response through ox40 ligand'), (2006, 'ox40 ligand shuts down il 10 producing regulatory t cells'), (2006, 'specialization kinetics and repertoire of type 1 interferon responses by human plasmacytoid predendritic cells'), (2007, 'mekk3 is essential for lipopolysaccharide induced interleukin 6 and granulocyte macrophage colony stimulating factor production in macrophages'), (2007, 'the adaptor protein card9 is required for innate immune responses to intracellular pathogens')) +keywords : frozenset({'cell biology', 'immunology', 'molecular biology'}) + +firstname : omar +lastname : duramad +middlename : None +year : 2007 +year_papertitle : ((2007, 'characterization of tumor associated foxp3 regulatory t cells and de novo induction by the tumor microenvironment'),) +keywords : frozenset({'immunology'}) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : maria +lastname : aylwin +middlename : de la luz +year : 1997 +year_papertitle : ((1997, 'nmda receptors contribute to primary visceral afferent transmission in the nucleus of the solitary tract'), (1998, 'non nmda and nmda receptors in the synaptic pathway between area postrema and nucleus tractus solitarius')) +keywords : frozenset({'anatomy', 'neuroscience'}) + +firstname : maria +lastname : aylwin +middlename : de la luz +year : 1994 +year_papertitle : ((1994, 'effects of mutations at the agonist binding site on the gating of the mouse nicotinic acetylcholine receptor'),) +keywords : frozenset({'neurosciences', 'physiology', 'biophysics'}) + +9/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : vernon +lastname : grant +middlename : m +year : 2016 +year_papertitle : ((2016, 'assessing adult and child correlates of physical activity in the healthy children strong families intervention 2693 board 216 june 3 11'), (2017, 'healthy children strong families 2 a randomized controlled trial of a healthy lifestyle intervention for american indian families designed using community based approaches'), (2018, 'overnight sleep duration and obesity in 2 5 year old american indian children'), (2018, 'understanding correlates of physical activity in american indian families the healthy children strong families 2 study'), (2020, 'sleep and physical activity patterns in urban american indian children')) +keywords : frozenset({'gerontology', 'physical therapy', 'demography'}) + +firstname : vernon +lastname : grant +middlename : matthew +year : 2015 +year_papertitle : ((2015, 'developing and pilot testing community based strategies for increasing physical activity in children in the 3 sup rd sup 4 sup th sup 5 sup th sup and 6 sup th sup grade on an american indian reservation'),) +keywords : frozenset({'physical education', 'health education', 'public health', 'native american studies'}) + +9/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : khoa +lastname : tran +middlename : d +year : 1998 +year_papertitle : ((1998, 'detection of an interleukin 1 intracellular receptor antagonist mrna variant'), (2001, 'volume specific cutoffs are necessary for reproducible application of prostate specific antigen density of the transition zone in prostate cancer detection'), (2002, 'characterization of a fission yeast subunit of an rna polymerase i essential transcription initiation factor sprrn7h taf i 68 that bridges yeast and mammals association with sprrn11h and the core ribosomal rna gene promoter'), (2004, 'myringotomy and tympanostomy tube placement in children with sickle cell disease'), (2005, 'child behavior and quality of life in pediatric obstructive sleep apnea')) +keywords : frozenset({'pediatrics', 'pathology', 'genetics', 'molecular biology', 'physical therapy', 'urology'}) + +firstname : khoa +lastname : tran +middlename : dang +year : 2010 +year_papertitle : ((2010, 'transcriptional regulation of early progenitor competence in the i drosophila i central nervous system'),) +keywords : frozenset({'biology', 'neurosciences'}) + +10/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : harriet +lastname : wichowski +middlename : conley +year : 1988 +year_papertitle : ((1988, 'control theory measuring drug performance with a new optimization algorithm'), (1992, 'identification and validation of a new nursing diagnosis sick role conflict'), (1995, 'how nurses react to and cope with the uncertainty of unfamiliar technology validation for continuing education')) +keywords : frozenset({'clinical psychology', 'psychiatry', 'mathematical optimization', 'nursing'}) + +firstname : harriet +lastname : wichowski +middlename : joyce conley +year : 1991 +year_papertitle : ((1991, 'professional adaptation the case of nursing'),) +keywords : frozenset({'nursing', 'labor relations', 'sociology'}) + +10/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jennifer +lastname : cohen +middlename : d +year : 2009 +year_papertitle : ((2009, 'cardiac magnetic resonance imaging versus transesophageal echocardiography for the evaluation of mitral valve pathology prior to surgical intervention the magnasound study'), (2012, 'determining the risks of magnetic resonance imaging at 1 5 tesla for patients with pacemakers and implantable cardioverter defibrillators')) +keywords : frozenset({'radiology', 'cardiology', 'internal medicine'}) + +firstname : jennifer +lastname : cohen +middlename : diane +year : 2009 +year_papertitle : ((2009, 'engagement of map kinase and mtor signaling by the tsc 2 tumor suppressor in renal cancer'),) +keywords : frozenset({'pharmacology', 'oncology', 'surgery', 'toxicology'}) + +11/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ronald +lastname : andrews +middlename : p +year : 1988 +year_papertitle : ((1988, 'a comparison of cyclobenzaprine and placebo in the management of fibrositis'), (1995, 'american college of rheumatology guidelines for performing office synovial fluid examinations')) +keywords : frozenset({'general surgery', 'physical therapy', 'anesthesia', 'internal medicine'}) + +firstname : ronald +lastname : andrews +middlename : paul +year : 2001 +year_papertitle : ((2001, 'regulation of the very late antigen 4 mediated adhesive activity of normal and non releaser basophils roles for src syk and phosphatidylinositol 3 kinase'),) +keywords : frozenset({'cellular biology', 'pathology'}) + +11/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 3120.620472387473 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_philosophy_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_philosophy_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..cfcb7f0 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_philosophy_christoph_degree0_advisors_9015.log @@ -0,0 +1,704 @@ +Namespace(testing=False, verbose=1, field=['philosophy'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [138885662] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008432308832804362 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 29.384467128912608 minutes + +Starting active labeling... +firstname : bruce +lastname : marshall +middlename : d +year_range : (2012,) +main_us_institutions_year : ((2012, 'southern methodist university'),) +all_us_institutions_year : ((2012, 'southern methodist university'),) + +firstname : bruce +lastname : marsh +middlename : d +year_range : (1973, 2020) +main_us_institutions_year : ((1975, 'johns hopkins university'), (1978, 'johns hopkins university'), (1981, 'johns hopkins university'), (1983, 'johns hopkins university'), (1984, 'johns hopkins university'), (1985, 'johns hopkins university'), (1986, 'johns hopkins university'), (1987, 'johns hopkins university'), (1988, 'johns hopkins university'), (1989, 'johns hopkins university'), (1990, 'johns hopkins university'), (1991, 'johns hopkins university'), (1993, 'johns hopkins university'), (1995, 'johns hopkins university'), (1996, 'johns hopkins university'), (1998, 'johns hopkins university'), (1999, 'johns hopkins university'), (2002, 'johns hopkins university'), (2003, 'johns hopkins university'), (2004, 'johns hopkins university'), (2005, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university'), (2015, 'johns hopkins university'), (2016, 'johns hopkins university'), (2018, 'johns hopkins university'), (2019, 'johns hopkins university'), (2020, 'johns hopkins university')) +all_us_institutions_year : ((1975, 'johns hopkins university'), (1978, 'johns hopkins university'), (1981, 'johns hopkins university'), (1983, 'johns hopkins university'), (1984, 'johns hopkins university'), (1985, 'johns hopkins university'), (1986, 'california institute of technology'), (1986, 'johns hopkins university'), (1987, 'johns hopkins university'), (1988, 'johns hopkins university'), (1989, 'johns hopkins university'), (1990, 'johns hopkins university'), (1991, 'johns hopkins university'), (1993, 'johns hopkins university'), (1995, 'johns hopkins university'), (1996, 'johns hopkins university'), (1998, 'johns hopkins university'), (1999, 'johns hopkins university'), (2002, 'johns hopkins university'), (2003, 'johns hopkins university'), (2004, 'johns hopkins university'), (2005, 'johns hopkins university'), (2006, 'johns hopkins university'), (2007, 'johns hopkins university'), (2008, 'johns hopkins university'), (2009, 'johns hopkins university'), (2010, 'johns hopkins university'), (2011, 'johns hopkins university'), (2012, 'johns hopkins university'), (2013, 'johns hopkins university'), (2014, 'johns hopkins university'), (2015, 'johns hopkins university'), (2016, 'johns hopkins university'), (2018, 'johns hopkins university'), (2019, 'johns hopkins university'), (2020, 'johns hopkins university')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : james +lastname : child +middlename : w +year_range : (2001,) +main_us_institutions_year : ((2001, 'bowling green state university'),) +all_us_institutions_year : ((2001, 'bowling green state university'),) + +firstname : james +lastname : childs +middlename : m +year_range : (1987, 2020) +main_us_institutions_year : ((1987, 'trinity lutheran seminary'), (2004, 'trinity lutheran seminary'), (2009, 'trinity lutheran seminary'), (2011, 'trinity lutheran seminary'), (2012, 'trinity lutheran seminary'), (2013, 'trinity lutheran seminary'), (2014, 'trinity lutheran seminary'), (2018, 'trinity lutheran seminary'), (2019, 'trinity lutheran seminary'), (2020, 'trinity lutheran seminary')) +all_us_institutions_year : ((1987, 'trinity lutheran seminary'), (2004, 'trinity lutheran seminary'), (2009, 'trinity lutheran seminary'), (2011, 'trinity lutheran seminary'), (2012, 'trinity lutheran seminary'), (2013, 'trinity lutheran seminary'), (2014, 'trinity lutheran seminary'), (2018, 'trinity lutheran seminary'), (2019, 'trinity lutheran seminary'), (2020, 'trinity lutheran seminary')) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : frank +lastname : newman +middlename : c +year_range : (1991,) +main_us_institutions_year : ((1991, 'university of california berkeley'),) +all_us_institutions_year : ((1991, 'university of california berkeley'),) + +firstname : frank +lastname : neumann +middlename : None +year_range : (1924, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2007, 'rockefeller university'), (2012, 'rockefeller university'), (2017, 'rockefeller university')) + +0/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : arthur +lastname : green +middlename : None +year_range : (2001,) +main_us_institutions_year : ((2001, 'brandeis university'),) +all_us_institutions_year : ((2001, 'brandeis university'),) + +firstname : arthur +lastname : greenberg +middlename : None +year_range : (1974, 2020) +main_us_institutions_year : ((1980, 'university of pennsylvania'), (1982, 'hospital of the university of pennsylvania'), (1984, 'university of pittsburgh'), (1985, 'university of pittsburgh'), (1986, 'university of pittsburgh'), (1988, 'university of pittsburgh'), (1989, 'university of pittsburgh'), (1990, 'university of pittsburgh'), (1992, 'university of pittsburgh'), (1996, 'university of pittsburgh'), (1997, 'university of pittsburgh'), (1998, 'duke university'), (2000, 'university of pittsburgh'), (2001, 'duke university'), (2002, 'university of pittsburgh'), (2005, 'duke university'), (2006, 'duke university'), (2007, 'duke university'), (2008, 'duke university'), (2009, 'duke university'), (2011, 'duke university'), (2012, 'duke university'), (2013, 'duke university'), (2015, 'duke university'), (2016, 'duke university'), (2017, 'duke university'), (2020, 'duke university')) +all_us_institutions_year : ((1980, 'university of pennsylvania'), (1982, 'hospital of the university of pennsylvania'), (1984, 'university of pittsburgh'), (1985, 'university of pittsburgh'), (1986, 'university of pittsburgh'), (1988, 'university of pittsburgh'), (1989, 'university of pittsburgh'), (1990, 'university of pittsburgh'), (1992, 'university of pittsburgh'), (1996, 'university of pittsburgh'), (1997, 'mercy medical center'), (1997, 'university of pittsburgh'), (1998, 'duke university'), (2000, 'university of pittsburgh'), (2001, 'duke university'), (2002, 'university of pittsburgh'), (2005, 'duke university'), (2006, 'duke university'), (2007, 'duke university'), (2008, 'duke university'), (2009, 'duke university'), (2011, 'duke university'), (2012, 'duke university'), (2013, 'duke university'), (2015, 'duke university'), (2016, 'duke university'), (2017, 'duke university'), (2020, 'duke university')) + +0/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : mcneill +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'depaul university'),) +all_us_institutions_year : ((2006, 'depaul university'),) + +firstname : d +lastname : mcneil +middlename : c +year_range : (1992, 2017) +main_us_institutions_year : ((1992, 'depaul university'), (1994, 'depaul university'), (1996, 'depaul university'), (1997, 'depaul university'), (1998, 'depaul university'), (2005, 'depaul university'), (2012, 'depaul university')) +all_us_institutions_year : ((1992, 'depaul university'), (1994, 'depaul university'), (1996, 'depaul university'), (1997, 'depaul university'), (1998, 'depaul university'), (2005, 'depaul university'), (2012, 'depaul university')) + +0/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : friedman +middlename : None +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of illinois chicago'),) +all_us_institutions_year : ((1990, 'university of illinois chicago'),) + +firstname : richard +lastname : fried +middlename : m +year_range : (1976, 2010) +main_us_institutions_year : ((1980, 'university of illinois at chicago'), (1981, 'university of illinois at chicago'), (1983, 'university of illinois at chicago'), (1986, 'university of illinois at chicago'), (1991, 'university of illinois at chicago'), (1993, 'university of illinois at chicago'), (1997, 'university of illinois at chicago'), (2003, 'university of illinois at chicago'), (2007, 'university of illinois at chicago'), (2009, 'university of illinois at chicago'), (2010, 'university of illinois at chicago')) +all_us_institutions_year : ((1980, 'university of illinois at chicago'), (1981, 'university of illinois at chicago'), (1983, 'university of illinois at chicago'), (1986, 'university of illinois at chicago'), (1991, 'university of illinois at chicago'), (1993, 'university of illinois at chicago'), (1997, 'university of illinois at chicago'), (2003, 'university of illinois at chicago'), (2007, 'university of illinois at chicago'), (2009, 'university of illinois at chicago'), (2010, 'university of illinois at chicago')) + +1/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : williamson +middlename : l +year_range : (2001,) +main_us_institutions_year : ((2001, 'baylor university'),) +all_us_institutions_year : ((2001, 'baylor university'),) + +firstname : will +lastname : williams +middlename : None +year_range : (2009, 2013) +main_us_institutions_year : ((2009, 'baylor university'), (2012, 'baylor university'), (2013, 'baylor university')) +all_us_institutions_year : ((2009, 'baylor university'), (2010, 'baylor university'), (2012, 'baylor university'), (2013, 'baylor university')) + +1/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : alexander +middlename : j g +year_range : (1999,) +main_us_institutions_year : ((1999, 'new york university'),) +all_us_institutions_year : ((1999, 'new york university'),) + +firstname : jonathan +lastname : alexander +middlename : j g +year_range : (1987, 2003) +main_us_institutions_year : ((1990, 'new york university'), (1991, 'new york university'), (1994, 'new york university'), (1998, 'new york university'), (2002, 'new york university')) +all_us_institutions_year : ((1990, 'new york university'), (1991, 'new york university'), (1994, 'new york university'), (1998, 'new york university'), (2002, 'new york university')) + +1/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : cyril +lastname : regan +middlename : j o +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of notre dame'),) +all_us_institutions_year : ((2014, 'university of notre dame'),) + +firstname : cyril +lastname : oregan +middlename : None +year_range : (1994, 2020) +main_us_institutions_year : ((2006, 'university of notre dame'), (2007, 'university of notre dame'), (2008, 'university of notre dame'), (2010, 'university of notre dame'), (2012, 'university of notre dame'), (2014, 'university of notre dame'), (2015, 'university of notre dame'), (2016, 'university of notre dame'), (2018, 'university of notre dame'), (2019, 'university of notre dame'), (2020, 'university of notre dame')) +all_us_institutions_year : ((2006, 'university of notre dame'), (2007, 'university of notre dame'), (2008, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame'), (2012, 'university of notre dame'), (2014, 'university of notre dame'), (2015, 'university of notre dame'), (2016, 'university of notre dame'), (2017, 'university of notre dame'), (2018, 'university of notre dame'), (2019, 'university of notre dame'), (2020, 'university of notre dame')) + +2/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : rocco +middlename : della +year_range : (2013,) +main_us_institutions_year : ((2013, 'yale university'),) +all_us_institutions_year : ((2013, 'yale university'),) + +firstname : michael +lastname : rocca +middlename : della +year_range : (1993, 2021) +main_us_institutions_year : ((1993, 'yale university'), (1996, 'yale university'), (1998, 'yale university'), (2003, 'yale university'), (2005, 'yale university'), (2007, 'yale university'), (2011, 'yale university'), (2012, 'yale university'), (2014, 'yale university')) +all_us_institutions_year : ((1993, 'yale university'), (1996, 'yale university'), (1998, 'yale university'), (2003, 'yale university'), (2005, 'yale university'), (2007, 'yale university'), (2011, 'yale university'), (2012, 'yale university'), (2014, 'yale university')) + +3/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : peter +lastname : godfreysmith +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'harvard university'),) +all_us_institutions_year : ((2010, 'harvard university'),) + +firstname : peter +lastname : gordon +middlename : e +year_range : (1999, 2021) +main_us_institutions_year : ((2004, 'harvard university'), (2005, 'harvard university'), (2006, 'harvard university'), (2008, 'harvard university'), (2011, 'harvard university'), (2012, 'harvard university'), (2015, 'harvard university'), (2016, 'harvard university'), (2019, 'harvard university'), (2021, 'harvard university')) +all_us_institutions_year : ((2004, 'harvard university'), (2005, 'harvard university'), (2006, 'harvard university'), (2008, 'harvard university'), (2011, 'harvard university'), (2012, 'harvard university'), (2015, 'harvard university'), (2016, 'harvard university'), (2019, 'harvard university'), (2021, 'harvard university')) + +4/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : warren +lastname : goldfarb +middlename : None +year_range : (2005,) +main_us_institutions_year : ((2005, 'harvard university'),) +all_us_institutions_year : ((2005, 'harvard university'),) + +firstname : warren +lastname : goldstein +middlename : s +year_range : (2001, 2021) +main_us_institutions_year : ((2005, 'university of central florida'), (2006, 'university of central florida'), (2010, 'harvard university'), (2011, 'harvard university'), (2012, 'harvard university'), (2014, 'harvard university'), (2015, 'harvard university'), (2016, 'harvard university')) +all_us_institutions_year : ((2005, 'university of central florida'), (2006, 'university of central florida'), (2010, 'harvard university'), (2011, 'harvard university'), (2012, 'harvard university'), (2014, 'harvard university'), (2015, 'harvard university'), (2016, 'harvard university')) + +4/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jonathan +lastname : kim +middlename : h +year_range : (2013,) +main_us_institutions_year : ((2013, 'biola university'),) +all_us_institutions_year : ((2013, 'biola university'),) + +firstname : jonathan +lastname : kimmelman +middlename : None +year_range : (1994, 2021) +main_us_institutions_year : ((1994, 'yale university'), (1999, 'yale university'), (2000, 'yale university')) +all_us_institutions_year : ((1994, 'yale university'), (1999, 'yale university'), (2000, 'yale university')) + +4/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : lieb +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of illinois chicago'),) +all_us_institutions_year : ((2008, 'university of illinois chicago'),) + +firstname : michael +lastname : liebrenz +middlename : None +year_range : (2007, 2021) +main_us_institutions_year : ((2014, 'columbia university medical center'), (2015, 'columbia university medical center')) +all_us_institutions_year : ((2014, 'columbia university medical center'), (2015, 'columbia university medical center')) + +4/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : schiffer +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'city university of new york'),) +all_us_institutions_year : ((1994, 'city university of new york'),) + +firstname : stephen +lastname : schiffer +middlename : None +year_range : (1986, 1988) +main_us_institutions_year : ((1986, 'university of arizona'), (1988, 'university of arizona')) +all_us_institutions_year : ((1986, 'university of arizona'), (1988, 'university of arizona')) + +4/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : reiner +lastname : schurmann +middlename : None +year_range : (1992,) +main_us_institutions_year : ((1992, 'new school for social research'),) +all_us_institutions_year : ((1992, 'new school for social research'),) + +firstname : reiner +lastname : schurmann +middlename : None +year_range : (1973, 2019) +main_us_institutions_year : ((1973, 'duquesne university'), (1978, 'the new school'), (1980, 'the new school'), (1983, 'the new school')) +all_us_institutions_year : ((1973, 'duquesne university'), (1978, 'the new school'), (1979, 'the new school'), (1980, 'the new school'), (1983, 'the new school'), (1984, 'the new school')) + +4/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kaisa +lastname : puhakka +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'california institute of integral studies'),) +all_us_institutions_year : ((2015, 'california institute of integral studies'),) + +firstname : kaisa +lastname : puhakka +middlename : None +year_range : (1988, 1995) +main_us_institutions_year : ((1988, 'university of toledo medical center'), (1990, 'university of west georgia'), (1994, 'university of west georgia'), (1995, 'university of west georgia')) +all_us_institutions_year : ((1985, 'university of toledo medical center'), (1988, 'university of toledo medical center'), (1990, 'university of west georgia'), (1994, 'university of west georgia'), (1995, 'university of west georgia')) + +5/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : alexander +lastname : rosenberg +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'university of california riverside'),) +all_us_institutions_year : ((1994, 'university of california riverside'),) + +firstname : alex +lastname : rosenberg +middlename : None +year_range : (1952, 1985) +main_us_institutions_year : ((1962, 'massachusetts institute of technology'), (1966, 'cornell university'), (1970, 'cornell university'), (1972, 'cornell university'), (1973, 'cornell university'), (1976, 'cornell university'), (1978, 'cornell university'), (1978, 'pennsylvania state university'), (1980, 'cornell university'), (1982, 'cornell university'), (1985, 'cornell university')) +all_us_institutions_year : ((1962, 'massachusetts institute of technology'), (1966, 'cornell university'), (1970, 'cornell university'), (1972, 'cornell university'), (1973, 'cornell university'), (1976, 'cornell university'), (1978, 'cornell university'), (1978, 'pennsylvania state university'), (1980, 'cornell university'), (1982, 'cornell university'), (1985, 'cornell university')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robin +lastname : young +middlename : darling +year_range : (2006,) +main_us_institutions_year : ((2006, 'catholic university of america'),) +all_us_institutions_year : ((2006, 'catholic university of america'),) + +firstname : robin +lastname : young +middlename : darling +year_range : (1990, 2016) +main_us_institutions_year : ((2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame')) +all_us_institutions_year : ((2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robin +lastname : young +middlename : darling +year_range : (2003,) +main_us_institutions_year : ((2003, 'catholic university of america'),) +all_us_institutions_year : ((2003, 'catholic university of america'),) + +firstname : robin +lastname : young +middlename : darling +year_range : (1990, 2016) +main_us_institutions_year : ((2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame')) +all_us_institutions_year : ((2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame')) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : rosenthal +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'city university of new york'),) +all_us_institutions_year : ((2007, 'city university of new york'),) + +firstname : david +lastname : rosenthal +middlename : m +year_range : (1961, 2020) +main_us_institutions_year : ((2004, 'university of georgia'), (2005, 'university of georgia'), (2006, 'university of georgia'), (2007, 'portland state university'), (2008, 'portland state university'), (2009, 'university of illinois at urbana champaign'), (2009, 'agricultural research service'), (2009, 'university of georgia'), (2010, 'university of georgia'), (2011, 'united states department of agriculture'), (2012, 'university of illinois at urbana champaign'), (2012, 'united states department of agriculture'), (2013, 'ohio university'), (2014, 'ohio university'), (2015, 'ohio university'), (2016, 'ohio university'), (2017, 'university of illinois at urbana champaign'), (2017, 'ohio university'), (2018, 'ohio university'), (2019, 'ohio university'), (2020, 'ohio university')) +all_us_institutions_year : ((2004, 'university of georgia'), (2005, 'university of georgia'), (2006, 'university of georgia'), (2007, 'portland state university'), (2007, 'university of georgia'), (2008, 'portland state university'), (2009, 'agricultural research service'), (2009, 'university of georgia'), (2009, 'university of illinois at urbana champaign'), (2010, 'university of georgia'), (2011, 'united states department of agriculture'), (2012, 'united states department of agriculture'), (2012, 'university of illinois at urbana champaign'), (2013, 'ohio university'), (2013, 'united states department of agriculture'), (2014, 'ohio university'), (2015, 'ohio university'), (2016, 'ohio university'), (2016, 'portland state university'), (2016, 'university of illinois at urbana champaign'), (2017, 'ohio university'), (2017, 'university of illinois at urbana champaign'), (2018, 'ohio university'), (2019, 'ohio university'), (2020, 'ohio university')) + +7/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : russell +lastname : west +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'asbury theological seminary'),) +all_us_institutions_year : ((2007, 'asbury theological seminary'),) + +firstname : r +lastname : west +middlename : a +year_range : (1974, 2021) +main_us_institutions_year : ((1974, 'united states geological survey'), (1974, 'california institute of technology'), (1979, 'university of colorado boulder'), (1980, 'university of colorado boulder'), (1982, 'university of colorado boulder'), (1983, 'university of colorado boulder'), (1985, 'university of colorado boulder'), (1986, 'california institute of technology'), (1988, 'california institute of technology'), (1989, 'california institute of technology'), (1990, 'california institute of technology'), (1991, 'california institute of technology'), (1992, 'california institute of technology'), (1994, 'california institute of technology'), (1995, 'california institute of technology'), (1997, 'california institute of technology'), (1998, 'california institute of technology'), (1999, 'california institute of technology'), (2000, 'california institute of technology'), (2001, 'california institute of technology'), (2003, 'california institute of technology'), (2004, 'california institute of technology'), (2006, 'california institute of technology'), (2007, 'california institute of technology'), (2008, 'california institute of technology'), (2009, 'california institute of technology'), (2010, 'california institute of technology'), (2011, 'california institute of technology'), (2012, 'california institute of technology'), (2013, 'california institute of technology'), (2014, 'california institute of technology'), (2015, 'california institute of technology'), (2016, 'california institute of technology'), (2017, 'california institute of technology'), (2018, 'california institute of technology'), (2019, 'california institute of technology'), (2021, 'california institute of technology')) +all_us_institutions_year : ((1974, 'california institute of technology'), (1974, 'united states geological survey'), (1979, 'university of colorado boulder'), (1980, 'university of colorado boulder'), (1982, 'university of colorado boulder'), (1983, 'university of colorado boulder'), (1985, 'university of colorado boulder'), (1986, 'california institute of technology'), (1987, 'california institute of technology'), (1988, 'california institute of technology'), (1989, 'california institute of technology'), (1990, 'california institute of technology'), (1991, 'california institute of technology'), (1992, 'california institute of technology'), (1994, 'california institute of technology'), (1995, 'california institute of technology'), (1996, 'california institute of technology'), (1997, 'california institute of technology'), (1997, 'university of colorado boulder'), (1998, 'california institute of technology'), (1999, 'california institute of technology'), (2000, 'california institute of technology'), (2001, 'california institute of technology'), (2003, 'california institute of technology'), (2004, 'california institute of technology'), (2006, 'california institute of technology'), (2007, 'california institute of technology'), (2008, 'california institute of technology'), (2009, 'california institute of technology'), (2010, 'california institute of technology'), (2011, 'california institute of technology'), (2012, 'california institute of technology'), (2013, 'california institute of technology'), (2014, 'california institute of technology'), (2015, 'california institute of technology'), (2016, 'california institute of technology'), (2017, 'california institute of technology'), (2018, 'california institute of technology'), (2018, 'jet propulsion laboratory'), (2019, 'california institute of technology'), (2021, 'california institute of technology')) + +7/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : karen +lastname : tucker +middlename : b westerfield +year_range : (2015,) +main_us_institutions_year : ((2015, 'boston university'),) +all_us_institutions_year : ((2015, 'boston university'),) + +firstname : karen +lastname : tucker +middlename : b westerfield +year_range : (1995, 2018) +main_us_institutions_year : ((2000, 'duke university'), (2001, 'duke university'), (2003, 'duke university')) +all_us_institutions_year : ((2000, 'duke university'), (2001, 'duke university'), (2003, 'duke university'), (2011, 'boston university'), (2012, 'boston university')) + +7/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : darlene +lastname : weaver +middlename : fozard +year_range : (2015,) +main_us_institutions_year : ((2015, 'duquesne university'),) +all_us_institutions_year : ((2015, 'duquesne university'),) + +firstname : darlene +lastname : weaver +middlename : fozard +year_range : (1998, 2020) +main_us_institutions_year : ((2001, 'villanova university'), (2002, 'university of pennsylvania'), (2003, 'villanova university'), (2006, 'villanova university')) +all_us_institutions_year : ((2001, 'villanova university'), (2002, 'university of pennsylvania'), (2003, 'villanova university'), (2006, 'villanova university'), (2007, 'villanova university')) + +8/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kevin +lastname : sharpe +middlename : None +year_range : (1998,) +main_us_institutions_year : ((1998, 'union institute'),) +all_us_institutions_year : ((1998, 'union institute'),) + +firstname : kevin +lastname : sharpe +middlename : w +year_range : (2015, 2017) +main_us_institutions_year : ((2015, 'st cloud state university'), (2017, 'st cloud state university')) +all_us_institutions_year : ((2015, 'st cloud state university'), (2017, 'st cloud state university')) + +9/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : salah +lastname : hassan +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'michigan state university'),) +all_us_institutions_year : ((2008, 'michigan state university'),) + +firstname : salah +lastname : hassan +middlename : m +year_range : (1992, 2018) +main_us_institutions_year : ((2008, 'ithaca college'),) +all_us_institutions_year : ((2008, 'ithaca college'),) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ian +lastname : markham +middlename : None +year_range : (2004,) +main_us_institutions_year : ((2004, 'hartford seminary'),) +all_us_institutions_year : ((2004, 'hartford seminary'),) + +firstname : ian +lastname : markham +middlename : None +year_range : (1988, 2019) +main_us_institutions_year : ((2008, 'virginia theological seminary'), (2010, 'virginia theological seminary'), (2012, 'virginia theological seminary'), (2014, 'virginia theological seminary'), (2015, 'virginia theological seminary'), (2019, 'virginia theological seminary')) +all_us_institutions_year : ((2008, 'virginia theological seminary'), (2010, 'virginia theological seminary'), (2012, 'virginia theological seminary'), (2014, 'virginia theological seminary'), (2015, 'virginia theological seminary'), (2019, 'virginia theological seminary')) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : martin +middlename : None +year_range : (1997,) +main_us_institutions_year : ((1997, 'boston university'),) +all_us_institutions_year : ((1997, 'boston university'),) + +firstname : michael +lastname : martin +middlename : t +year_range : (1980, 2019) +main_us_institutions_year : ((2004, 'bowling green state university'),) +all_us_institutions_year : ((2004, 'bowling green state university'),) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : christine +lastname : downing +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'pacifica graduate institute'),) +all_us_institutions_year : ((2011, 'pacifica graduate institute'),) + +firstname : christine +lastname : downing +middlename : None +year_range : (1964, 2005) +main_us_institutions_year : ((1975, 'american academy of religion'), (1976, 'san diego state university')) +all_us_institutions_year : ((1975, 'american academy of religion'), (1976, 'san diego state university')) + +9/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : gray +middlename : t +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of washington'),) +all_us_institutions_year : ((2002, 'university of washington'),) + +firstname : richard +lastname : gray +middlename : None +year_range : (1960, 2021) +main_us_institutions_year : ((1987, 'university of california'), (1992, 'medstar washington hospital center'), (1994, 'medstar washington hospital center'), (1995, 'medstar washington hospital center'), (1998, 'new york medical college'), (1998, 'medstar washington hospital center'), (1999, 'medstar washington hospital center'), (2001, 'medstar washington hospital center'), (2002, 'medstar washington hospital center'), (2004, 'mayo clinic'), (2005, 'mayo clinic'), (2006, 'mayo clinic'), (2010, 'mayo clinic'), (2011, 'mayo clinic'), (2012, 'mayo clinic'), (2013, 'mayo clinic'), (2014, 'mayo clinic'), (2015, 'mayo clinic'), (2017, 'mayo clinic')) +all_us_institutions_year : ((1987, 'university of california'), (1992, 'medstar washington hospital center'), (1994, 'medstar washington hospital center'), (1995, 'medstar washington hospital center'), (1996, 'medstar washington hospital center'), (1997, 'medstar washington hospital center'), (1998, 'medstar washington hospital center'), (1998, 'new york medical college'), (1999, 'medstar washington hospital center'), (1999, 'new york medical college'), (2000, 'mayo clinic'), (2000, 'medstar washington hospital center'), (2000, 'tutor com'), (2001, 'medstar washington hospital center'), (2002, 'mayo clinic'), (2002, 'medstar washington hospital center'), (2003, 'mayo clinic'), (2004, 'mayo clinic'), (2005, 'mayo clinic'), (2006, 'mayo clinic'), (2007, 'mayo clinic'), (2007, 'medstar washington hospital center'), (2008, 'mayo clinic'), (2009, 'cleveland clinic'), (2009, 'mayo clinic'), (2010, 'mayo clinic'), (2011, 'mayo clinic'), (2012, 'mayo clinic'), (2013, 'mayo clinic'), (2014, 'mayo clinic'), (2014, 'qatar airways'), (2015, 'mayo clinic'), (2016, 'mayo clinic'), (2017, 'mayo clinic'), (2018, 'mayo clinic'), (2018, 'memorial sloan kettering cancer center'), (2019, 'mayo clinic'), (2020, 'mayo clinic'), (2021, 'mayo clinic')) + +9/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : judith +lastname : lochhead +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'stony brook university'),) +all_us_institutions_year : ((2015, 'stony brook university'),) + +firstname : judy +lastname : lochhead +middlename : None +year_range : (1996, 2019) +main_us_institutions_year : ((2014, 'stony brook university'), (2019, 'stony brook university')) +all_us_institutions_year : ((2014, 'stony brook university'), (2019, 'stony brook university'), (2020, 'stony brook university')) + +10/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : peter +lastname : ludlow +middlename : None +year_range : (1990,) +main_us_institutions_year : ((1990, 'stony brook university'),) +all_us_institutions_year : ((1990, 'stony brook university'),) + +firstname : peter +lastname : ludlow +middlename : None +year_range : (2001, 2003) +main_us_institutions_year : ((2003, 'washington jefferson college'),) +all_us_institutions_year : ((2003, 'washington jefferson college'),) + +11/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : reed +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'dallas theological seminary'),) +all_us_institutions_year : ((1994, 'dallas theological seminary'),) + +firstname : john +lastname : reed +middlename : shelton +year_range : (1969, 2017) +main_us_institutions_year : ((1969, 'university of north carolina at chapel hill'), (1973, 'university of north carolina at chapel hill'), (1980, 'university of north carolina at chapel hill'), (1982, 'university of north carolina at chapel hill'), (1983, 'university of north carolina at chapel hill'), (1984, 'university of north carolina at chapel hill'), (1986, 'university of north carolina at chapel hill'), (1987, 'university of north carolina at chapel hill'), (1989, 'university of north carolina at chapel hill'), (1990, 'university of north carolina at chapel hill'), (1991, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((1969, 'university of north carolina at chapel hill'), (1973, 'university of north carolina at chapel hill'), (1980, 'university of north carolina at chapel hill'), (1982, 'university of north carolina at chapel hill'), (1983, 'university of north carolina at chapel hill'), (1984, 'university of north carolina at chapel hill'), (1986, 'university of north carolina at chapel hill'), (1987, 'university of north carolina at chapel hill'), (1989, 'university of north carolina at chapel hill'), (1990, 'university of north carolina at chapel hill'), (1991, 'university of north carolina at chapel hill')) + +11/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 217.61115560531616 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_philosophy_christoph_degree0_graduates_8515.log b/src/dataprep/temp/trainlink_mag_proquest_philosophy_christoph_degree0_graduates_8515.log new file mode 100644 index 0000000..96aea1c --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_philosophy_christoph_degree0_graduates_8515.log @@ -0,0 +1,614 @@ +Namespace(testing=False, verbose=1, field=['philosophy'], train_name='christoph_degree0', startyear=1985, endyear=2015, loadstartyear=1985, loadendyear=2015, mergemode='1:1', recall=0.9, institution='False', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='True', retrain='True', linking_type='graduates', samplesize=50000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [138885662] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0004653771718343099 minutes + + + SELECT goid + , year + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , coauthors + , year_papertitle + FROM ( + SELECT goid + , degree_year AS year + , fullname + , SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) AS firstname + , REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(fullname, length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(fullname) AS l_fullname + , length(SUBSTR(TRIM(fullname),1,instr(trim(fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(fullname, RTRIM(fullname, REPLACE(fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + , degree_year || "//" || thesistitle as year_papertitle + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + , advisors as coauthors + FROm pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1985 and year <= 2015 AND length(firstname) > 1 + + + + SELECT f.AuthorId + , f.year + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.year_papertitle + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , institutions as institution + , main_us_institutions_career + , coauthors + , keywords + , year_papertitle + FROM author_info_linking + ) AS g USING(AuthorId) + WHERE length(firstname) > 1 AND year >= 1985 - 5 AND year <= 2015 + 5 + -- ## use this to condition on people that have at least at some point their main affiliation in the US + AND g.main_us_institutions_career IS NOT NULL + AND g.institution != "chinese academy of sciences" + + +INFO:dedupe.canopy_index:Removing stop word ar +INFO:dedupe.canopy_index:Removing stop word in +INFO:dedupe.canopy_index:Removing stop word en +INFO:dedupe.canopy_index:Removing stop word an +INFO:dedupe.canopy_index:Removing stop word ma +INFO:dedupe.canopy_index:Removing stop word ha +INFO:dedupe.canopy_index:Removing stop word al +INFO:dedupe.canopy_index:Removing stop word on +INFO:dedupe.canopy_index:Removing stop word ll +INFO:dedupe.canopy_index:Removing stop word st +INFO:dedupe.canopy_index:Removing stop word el +INFO:dedupe.canopy_index:Removing stop word re +INFO:dedupe.canopy_index:Removing stop word ra +INFO:dedupe.canopy_index:Removing stop word er +INFO:dedupe.canopy_index:Removing stop word ch +INFO:dedupe.canopy_index:Removing stop word an +INFO:dedupe.canopy_index:Removing stop word an +INFO:dedupe.canopy_index:Removing stop word ri +INFO:dedupe.canopy_index:Removing stop word ne +INFO:dedupe.canopy_index:Removing stop word le +INFO:dedupe.canopy_index:Removing stop word la +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:SimplePredicate: (suffixArray, middlename) +Time elapsed: 7.893500101566315 minutes + +Starting active labeling... +firstname : thomas +lastname : hofweber +middlename : None +year : 1999 +year_papertitle : ((1999, 'ontology and objectivity'), (2000, 'empty names fiction and the puzzles of non existence'), (2000, 'proof theoretic reduction as a philosopher s tool'), (2001, 'a subject with no object strategies for nominalistic interpretation of mathematics'), (2005, 'a puzzle about ontology'), (2005, 'conceptions of truth'), (2005, 'number determiners numbers and arithmetic'), (2005, 'supervenience and object dependant properties'), (2006, 'le ambigue virtu della forma logica'), (2006, 'schiffer s new theory of propositions')) +keywords : frozenset({'calculus', 'linguistics', 'epistemology', 'arithmetic', 'humanities'}) + +firstname : thomas +lastname : hofweber +middlename : None +year : 1999 +year_papertitle : ((1999, 'ontology and objectivity'),) +keywords : frozenset({'philosophy', 'linguistics'}) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : lauren +lastname : ashwell +middlename : None +year : 2013 +year_papertitle : ((2009, 'desires and dispositions'), (2013, 'deep dark or transparent knowing our desires'), (2014, 'the metaphysics of desire and dispositions'), (2018, 'possibilities of misidentification')) +keywords : frozenset({'social science', 'epistemology'}) + +firstname : lauren +lastname : ashwell +middlename : None +year : 2009 +year_papertitle : ((2009, 'desires and dispositions'),) +keywords : frozenset({'philosophy', 'epistemology'}) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:TfidfNGramSearchPredicate: (0.8, lastname) +firstname : marc +lastname : brettler +middlename : zvi +year : 1989 +year_papertitle : ((1989, 'god is king understanding an israelite metaphor'), (1989, 'jud 1 1 2 10 from appendix to prologue'), (1989, 'the book of judges literature as politics'), (1990, 'murder and difference gender genre and scholarship on sisera s death and death dissymmetry the politics of coherence in the book of judges review'), (1990, 'rabbi simeon ben lakish at the gladiator s banquet rabbinic observations on the roman arena'), (1990, 'samuel and the deuteronomist a literary study of the deuteronomic history 1 samuel robert polzin'), (1990, 'the confessions of jeremiah their interpretation and role in chapters 1 25 kathleen m o connor'), (1990, 'the first historians the hebrew bible and history baruch halpern'), (1991, 'the structure of 1 kings 1 11'), (1992, 'john day molech a god of human sacrifice in the old testament university of cambridge oriental publications 41 cambridge cambridge university press 1989 ix 115 pp')) +keywords : frozenset({'ancient history', 'classics', 'linguistics', 'law', 'theology', 'epistemology', 'archaeology', 'literature', 'religious studies', 'humanities'}) + +firstname : marc +lastname : brettler +middlename : zvi +year : 1987 +year_papertitle : ((1987, 'god is king understanding an israelite metaphor enthronement biblical religion'),) +keywords : frozenset({'theology'}) + +2/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:SimplePredicate: (wholeFieldPredicate, lastname) +firstname : maria +lastname : garcia +middlename : del carmen +year : 1982 +year_papertitle : ((1982, 'slow inward calcium currents have no obvious role in muscle excitation contraction coupling'),) +keywords : frozenset({'biophysics'}) + +firstname : maria +lastname : garciapadilla +middlename : del carmen +year : 1993 +year_papertitle : ((1993, 'doing philosophy and the education of teachers'),) +keywords : frozenset({'education philosophy', 'teacher education', 'philosophy'}) + +3/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : keller +middlename : h +year : 1995 +year_papertitle : ((1995, 'public access to the internet'), (1997, 'coordinating the internet'), (1999, 'the first 100 feet options for internet and broadband access')) +keywords : frozenset({'marketing', 'telecommunications', 'management', 'public relations'}) + +firstname : james +lastname : kellerman +middlename : allen +year : 1996 +year_papertitle : ((1996, 'the dramatic prologue of plato s symposium as introduction to the dialogue s philosophy'),) +keywords : frozenset({'classical literature', 'philosophy', 'classical studies'}) + +3/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mahon +lastname : obrien +middlename : None +year : 2010 +year_papertitle : ((2010, 're assessing the affair the heidegger controversy revisited'), (2011, 'heidegger and authenticity from resoluteness to releasement'), (2014, 'leaping ahead of heidegger subjectivity and intersubjectivity in being and time'), (2014, 'martin heidegger the event reviewed by'), (2015, 'heidegger history and the holocaust'), (2016, 'the time of revolution kairos and chronos in heidegger')) +keywords : frozenset({'ancient history', 'theology', 'epistemology', 'art history', 'cognitive science'}) + +firstname : mahon +lastname : brien +middlename : james o +year : 2010 +year_papertitle : ((2010, 'authenticity from resoluteness to releasement'),) +keywords : frozenset({'philosophy'}) + +3/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : sizgorich +middlename : None +year : 2009 +year_papertitle : ((2009, 'for christian eyes only the intended audience of the martyrdom of antony rawḥ'), (2009, 'sanctified violence monotheist militancy as the tie that bound christian rome and islam'), (2010, 'muhammad s grave death rites and the making of islamic society by leor halevi'), (2011, 'riot in alexandria tradition and group dynamics in late antique pagan and christian communities by edward j watts the transformation of the classical heritage 46 berkeley university of california press 2010 xv 290 pp 55 00 cloth')) +keywords : frozenset({'religious studies', 'theology', 'literature', 'classics'}) + +firstname : thomas +lastname : song +middlename : byunghyun +year : 1997 +year_papertitle : ((1997, 'the loftiness of god the humility of man and restoration in isaiah 57 14 21 a text linguistic analysis of their convergence'),) +keywords : frozenset({'theology', 'biblical studies'}) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:SimplePredicate: (suffixArray, middlename) +INFO:dedupe.training:SimplePredicate: (suffixArray, lastname) +firstname : scott +lastname : cook +middlename : None +year : 1997 +year_papertitle : ((1995, 'unity and diversity in the musical thought of warring states china'), (1997, 'xun zi on ritual and music'), (1997, 'zhuang zi and his carving of the confucian ox'), (2002, 'the lushi chunqiu and the resolution of philosophical dissonance'), (2003, 'hiding the world in the world uneven discourses on the zhuangzi'), (2004, 'the debate over coercive rulership and the human way in light of recently excavated warring states texts')) +keywords : frozenset({'social psychology', 'law', 'genealogy', 'remote sensing', 'aesthetics', 'literature', 'art history', 'humanities'}) + +firstname : scott +lastname : cook +middlename : bradley +year : 1995 +year_papertitle : ((1995, 'unity and diversity in the musical thought of warring states china'),) +keywords : frozenset({'music', 'philosophy', 'history'}) + +4/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : caie +middlename : None +year : 2012 +year_papertitle : ((2012, 'belief and indeterminacy'), (2012, 'vagueness and semantic indiscriminability'), (2013, 'rational probabilistic incoherence'), (2014, 'calibration and probabilism'), (2014, 'metasemantics and metaphysical indeterminacy'), (2015, 'credence in the image of chance'), (2016, 'agreement theorems for self locating belief'), (2018, 'a problem for credal consequentialism'), (2018, 'agreement and updating for self locating belief'), (2018, 'benardete s paradox and the logic of counterfactuals')) +keywords : frozenset({'mathematical economics', 'calculus', 'discrete mathematics', 'epistemology', 'remote sensing', 'algorithm', 'artificial intelligence', 'positive economics'}) + +firstname : michael +lastname : caie +middlename : None +year : 2011 +year_papertitle : ((2011, 'paradox and belief'),) +keywords : frozenset({'philosophy', 'epistemology'}) + +5/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:LevenshteinSearchPredicate: (1, lastname) +firstname : juejun +lastname : hu +middlename : None +year : 2006 +year_papertitle : ((2006, 'fabrication of single crystalline silicon nanowires by scratching a silicon surface with catalytic metal particles'), (2006, 'metal particle induced highly localized site specific etching of si and formation of single crystalline si nanowires in aqueous fluoride solution'), (2006, 'multispectral 1 d photonic crystal photodetector'), (2006, 'multispectral photonic crystal photo sensor'), (2006, 'multispectral pixel performance using a one dimensional photonic crystal design'), (2007, 'fabrication and testing of planar chalcogenide waveguide integrated microfluidic sensor'), (2007, 'low loss integrated planar chalcogenide waveguides for microfluidic chemical sensing'), (2007, 'refractive index modifications in chalcogenide films induced by sub bandgap near ir femtosecond pulses'), (2007, 'si cmos compatible lift off fabrication of low loss planar chalcogenide waveguides'), (2007, 'studies on structural electrical and optical properties of cu doped as se te chalcogenide glasses')) +keywords : frozenset({'inorganic chemistry', 'analytical chemistry', 'optoelectronics', 'optics', 'nanotechnology'}) + +firstname : haibin +lastname : hu +middlename : None +year : 1996 +year_papertitle : ((1996, 'a study of parental information seeking and implications for school choice'),) +keywords : frozenset({'education philosophy', 'secondary education', 'educational administration'}) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : bridget +lastname : clarke +middlename : None +year : 2010 +year_papertitle : ((2010, 'moral reflection william ransome'), (2010, 'virtue and disagreement'), (2012, 'sabina lovibond iris murdoch gender and philosophy reviewed by'), (2014, 'political emotions why love matters for justice by nussbaum martha c')) +keywords : frozenset({'law', 'art history', 'optics', 'epistemology'}) + +firstname : justin +lastname : clarkedoane +middlename : None +year : 2011 +year_papertitle : ((2011, 'morality and mathematics'),) +keywords : frozenset({'ethics', 'philosophy', 'epistemology', 'mathematics'}) + +6/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : wongi +lastname : park +middlename : None +year : 2015 +year_papertitle : ((2015, 'book review experiencing irony in the first gospel suspense surprise and curiosity written by karl mcdaniel new york bloomsbury 2013 vi 195'), (2016, 'studies in matthew s gospel literary design intertextuality and social setting written by wim j c weren 2014')) +keywords : frozenset({'art history', 'literature'}) + +firstname : wonbin +lastname : park +middlename : None +year : 2006 +year_papertitle : ((2006, 'emmanuel levinas s ethic of the other i kenosis i and the theodicy questions'),) +keywords : frozenset({'theology', 'philosophy'}) + +6/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jane +lastname : jakobsen +middlename : r +year : 1981 +year_papertitle : ((1981, 'the development of teaching programs in geriatric dentistry in the united states from 1974 to 1979'), (1983, 'a low cost method for conducting a statewide survey of oral health'), (1983, 'suicide statistics of dentists in iowa 1968 to 1980'), (1983, 'transportation problems and dental care of nursing home residents'), (1984, 'removable prosthodontic treatment needs a survey'), (1984, 'teaching programs in geriatric dentistry'), (1985, 'dental disease prevalence in a prison population'), (1985, 'dentofacial relationships in persons with unoperated clefts comparisons between three cleft types'), (1985, 'longitudinal changes in three normal facial types'), (1986, 'dentists who practice with spouses or relatives a pilot study of practitioners in iowa')) +keywords : frozenset({'orthodontics', 'medical education', 'family medicine', 'dentistry', 'demography', 'medical emergency', 'nursing'}) + +firstname : janet +lastname : jakobsen +middlename : ruth +year : 1992 +year_papertitle : ((1992, 'the gendered division of moral labor and the possibilities for a responsible feminist ethic'),) +keywords : frozenset({'religion', 'womens studies', 'philosophy'}) + +6/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gary +lastname : aylesworth +middlename : e +year : 1992 +year_papertitle : ((1992, 'stephen k white political theory and postmodernism'), (1992, 'stephen k white political theory and postmodernism reviewed by'), (1995, 'r philip buckley husserl heidegger and the crisis of philosophical responsibility reviewed by')) +keywords : frozenset({'religious studies', 'epistemology'}) + +firstname : gary +lastname : aylesworth +middlename : eben +year : 1986 +year_papertitle : ((1986, 'from grounds to play a comparative analysis of wittgenstein and heidegger'),) +keywords : frozenset({'philosophy'}) + +6/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : astrida +lastname : tantillo +middlename : orle +year : 1998 +year_papertitle : ((1998, 'goethe s botany and his philosophy of gender'), (1998, 'goethes romane aufbruch in die moderne'), (2001, 'a new reading of werther as goethe s critique of rousseau'), (2002, 'goethe s elective affinities and the critics'), (2002, 'the will to create'), (2003, 'falsche tendenzen der staatsdiener goethe und der dichter'), (2004, 'northern dreams of the south imagining italy in the eighteenth century')) +keywords : frozenset({'economic history', 'ancient history', 'classics', 'anthropology', 'literature', 'art history'}) + +firstname : astrida +lastname : tantillo +middlename : orle +year : 1994 +year_papertitle : ((1994, 'nature s artistry goethe s science and die wahlverwandtschaften'),) +keywords : frozenset({'philosophy', 'science history', 'germanic literature'}) + +7/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : kennedy +middlename : d +year : 1990 +year_papertitle : ((1990, 'the political meaning of christianity an interpretation by glenn tinder baton rouge and london louisiana state university press 1989 29 95 isbn 0 8071 1510 x'),) +keywords : frozenset({'theology'}) + +firstname : thomas +lastname : kennedy +middlename : allen +year : 1997 +year_papertitle : ((1997, 'project starfish a church and community based public school mentoring program'),) +keywords : frozenset({'public policy', 'school counseling', 'theology', 'secondary education', 'religion'}) + +8/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : andrew +lastname : davis +middlename : r +year : 2013 +year_papertitle : ((2013, 'tel dan in its northern cultic context'), (2013, 'the literary effect of gender discord in the book of ruth'), (2013, 'translating חנם in job 1 9 and 2 3 on the relationship between job s piety and his interiority'), (2015, 'wrestling jacob in the book of genesis and august wilson s fences'), (2016, 'rereading 1 kings 17 21 in light of ancient medical texts'), (2017, 'a near eastern treaty parallel to ezekiel s dry bones')) +keywords : frozenset({'environmental ethics', 'social psychology', 'ancient history', 'linguistics', 'psychoanalysis', 'anthropology', 'literature'}) + +firstname : andrew +lastname : davis +middlename : alexander +year : 2009 +year_papertitle : ((2009, 'living method from the regulative to the constitutive idea in hegel s logic'),) +keywords : frozenset({'philosophy'}) + +8/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chris +lastname : eliasmith +middlename : None +year : 1996 +year_papertitle : ((1996, 'the third contender a critical examination of the dynamicist theory of cognition'), (1997, 'computation and dynamical models of mind'), (1997, 'waves particles and explanatory coherence'), (1998, 'dynamical models and van gelder s dynamicism two different things'), (1998, 'the metaphysics of science an account of modern science in terms of principles laws and theories craig dilworth boston studies in the philosophy of science vol 173 dordrecht kluwer academic publishers 1995 x 235 pp 98 00'), (1999, 'developing and applying a toolkit from a general neurocomputational framework'), (2000, 'rethinking central pattern generators a general approach'), (2001, 'attractive and in discrete'), (2001, 'beyond bumps spiking networks that store sets of functions'), (2001, 'integrating structure and meaning a distributed model of analogical mapping')) +keywords : frozenset({'epistemology', 'applied mathematics', 'machine learning', 'algorithm', 'control theory', 'artificial intelligence', 'cognitive science'}) + +firstname : christopher +lastname : eliasmith +middlename : david +year : 2000 +year_papertitle : ((2000, 'how neurons mean a neurocomputational theory of representational content'),) +keywords : frozenset({'neurology', 'philosophy', 'cognitive therapy'}) + +8/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : anthony +lastname : matteo +middlename : m +year : 1988 +year_papertitle : ((1988, 'can belief in god be basic'), (1988, 'preserving life public policy and the life not worth living by richard sherlock chicago loyola university press 1987 xiv 332 pages 15 95')) +keywords : frozenset({'economic history', 'epistemology'}) + +firstname : anthony +lastname : matteo +middlename : michael +year : 1987 +year_papertitle : ((1987, 'joseph marechal and the transcendental turn in catholic thought'),) +keywords : frozenset({'philosophy'}) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : chris +lastname : eliasmith +middlename : None +year : 1996 +year_papertitle : ((1996, 'the third contender a critical examination of the dynamicist theory of cognition'), (1997, 'computation and dynamical models of mind'), (1997, 'waves particles and explanatory coherence'), (1998, 'dynamical models and van gelder s dynamicism two different things'), (1998, 'the metaphysics of science an account of modern science in terms of principles laws and theories craig dilworth boston studies in the philosophy of science vol 173 dordrecht kluwer academic publishers 1995 x 235 pp 98 00'), (1999, 'developing and applying a toolkit from a general neurocomputational framework'), (2000, 'rethinking central pattern generators a general approach'), (2001, 'attractive and in discrete'), (2001, 'beyond bumps spiking networks that store sets of functions'), (2001, 'integrating structure and meaning a distributed model of analogical mapping')) +keywords : frozenset({'epistemology', 'applied mathematics', 'machine learning', 'algorithm', 'control theory', 'artificial intelligence', 'cognitive science'}) + +firstname : christopher +lastname : eliasmith +middlename : david +year : 2000 +year_papertitle : ((2000, 'how neurons mean a neurocomputational theory of representational content'),) +keywords : frozenset({'neurology', 'philosophy', 'cognitive therapy'}) + +8/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : anthony +lastname : matteo +middlename : m +year : 1988 +year_papertitle : ((1988, 'can belief in god be basic'), (1988, 'preserving life public policy and the life not worth living by richard sherlock chicago loyola university press 1987 xiv 332 pages 15 95')) +keywords : frozenset({'economic history', 'epistemology'}) + +firstname : anthony +lastname : matteo +middlename : michael +year : 1987 +year_papertitle : ((1987, 'joseph marechal and the transcendental turn in catholic thought'),) +keywords : frozenset({'philosophy'}) + +9/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : blochschulman +middlename : None +year : 2010 +year_papertitle : ((2010, 'who s afraid of politics on the need to teach political engagement'), (2012, 'argumentation step by step learning critical thinking through deliberate practice'), (2015, 'asking another question democratic thinking inside and outside the classroom a forthcoming interview with elizabeth minnich and si kahn'), (2015, 'i am not trying to be defiant i am trying to be your partner how to help students navigate educational institutions that do not value democratic practice'), (2015, 'what kind of community an inquiry into teaching practices that move beyond exclusion'), (2016, 'a critique of methods in the scholarship of teaching and learning in philosophy'), (2016, 'asking bigger questions an invitation to further conversation'), (2016, 'beyond add teaching and learning and stir epistemologies of ignorance teaching and learning in philosophy and the need for resistance'), (2016, 'engaging in social partnerships democratic practices for campus community partnerships'), (2016, 'scholarship of teaching and learning in the arts and humanities moving the conversation forward special section editors introduction')) +keywords : frozenset({'visual arts', 'psychoanalysis', 'multimedia', 'public relations', 'epistemology', 'pedagogy', 'cognitive science'}) + +firstname : stephen +lastname : block +middlename : a +year : 2012 +year_papertitle : ((2012, 'rhetoric reason and the problem of rule aristotle and j s mill on speech and politics'),) +keywords : frozenset({'philosophy', 'political science'}) + +10/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yeonjeong +lastname : kim +middlename : None +year : 2013 +year_papertitle : ((2013, 'agreement and similarity in self other perceptions of moral character'), (2014, 'moral character in the workplace'), (2015, 'moral character and workplace deviance recent research and current trends'), (2015, 'taking one for the team motivating prosocial volunteering by varying the size and the un certainty of its impact on collective welfare'), (2016, 'guilt proneness is a marker of integrity and employment suitability'), (2017, 'academic achievement of latino immigrant adolescents the effects of negative school social relationships school safety and educational expectation'), (2017, 'behavioral circumscription and the folk psychology of belief a study in ethno mentalizing'), (2017, 'the gettier intuition from south america to asia'), (2018, 'do relational and structural characteristics of negative school environments independently predict immigrant adolescents academic achievement'), (2019, 'nothing at stake in knowledge')) +keywords : frozenset({'social psychology', 'epistemology'}) + +firstname : yeong +lastname : kim +middlename : gyu +year : 2003 +year_papertitle : ((2003, 'the role of worship in church renewal a case study of daejeon choongshin presbyterian church'),) +keywords : frozenset({'theology'}) + +10/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : marta +lastname : hanson +middlename : None +year : 2003 +year_papertitle : ((2003, 'h t huang science and civilisation in china volume 6 biology and biological technology part 6 fermentations and food science new york cambridge university press 2000 pp xxviii 741 150 00'), (2003, 'reviews of books science and civilisation in china volume 6 biology and biological technology part 6 fermentations and food science h t huang'), (2003, 'the golden mirror in the imperial court of the qianlong emperor 1739 1742'), (2004, 'joanna grant a chinese physician wang ji and the stone mountain medical case histories needham research institute series london and new york routledgecurzon 2003 pp xi 209 illus 55 00 hardback 0 415 29758 3'), (2006, 'benjamin a elman on their own terms science in china 1550 1900 cambridge harvard university press 2005 pp xxxviii 567 55 00'), (2006, 'enhancing the practitioner s sense of time place and practice the history of chinese medicine for practitioners workshop'), (2006, 'northern purgatives southern restoratives ming medical regionalism'), (2007, 'needham s heavenly volumes and earthly tomes'), (2008, 'maoist public health campaigns chinese medicine and sars')) +keywords : frozenset({'social science', 'ancient history', 'traditional medicine', 'gender studies', 'classics', 'ethnology', 'law', 'theology', 'medical education', 'anthropology', 'literature'}) + +firstname : mark +lastname : hanson +middlename : jeffrey +year : 1993 +year_papertitle : ((1993, 'implications of nonfoundationalist moral epistemologies for theological ethics in public moral discourse'),) +keywords : frozenset({'theology', 'religion', 'philosophy'}) + +10/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : seung +lastname : lee +middlename : jae +year : 2004 +year_papertitle : ((2004, 'a nonneutralizing anti hiv 1 antibody turns into a neutralizing antibody when expressed on the surface of hiv 1 susceptible cells a new way to fight hiv'), (2006, 'a nonneutralizing anti hiv type 1 antibody turns into a broad neutralizing antibody when expressed on the surface of hiv type 1 susceptible cells ii inhibition of hiv type 1 captured and transferred by dc sign')) +keywords : frozenset({'immunology', 'virology'}) + +firstname : seungkee +lastname : lee +middlename : None +year : 2000 +year_papertitle : ((2000, 'determinate and indeterminate judgments and the unity of kant s critical philosophy'),) +keywords : frozenset({'philosophy'}) + +10/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jeff +lastname : engelhardt +middlename : None +year : 2015 +year_papertitle : ((2015, 'emergent substances physical properties action explanations'), (2015, 'property reductive emergent dualism'), (2015, 'what is the exclusion problem'), (2016, 'what we talk about when we talk about content externalism'), (2017, 'interactive inclusive substance dualism'), (2017, 'mental causation is not just downward causation'), (2019, 'false double consciousness hermeneutical resources from the rush limbaugh show'), (2019, 'linguistic labor and its division'), (2019, 'resources rules and oppression')) +keywords : frozenset({'psychoanalysis', 'criminology', 'epistemology', 'linguistics'}) + +firstname : jeffrey +lastname : engelhardt +middlename : michael +year : 2011 +year_papertitle : ((2011, 'the metaphysical role of causal roles'),) +keywords : frozenset({'philosophy', 'metaphysics'}) + +10/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +INFO:rlr.crossvalidation:using cross validation to find optimum alpha... +INFO:rlr.crossvalidation:optimum alpha: 0.010000, score 0.6832997421289402 +INFO:dedupe.training:Final predicate set: +INFO:dedupe.training:TfidfNGramSearchPredicate: (0.8, lastname) +INFO:dedupe.training:TfidfNGramSearchPredicate: (0.8, middlename) +Done in 15.337178341547649 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_physics_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_physics_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..3b78778 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_physics_christoph_degree0_advisors_9015.log @@ -0,0 +1,773 @@ +Namespace(testing=False, verbose=1, field=['physics'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [121332964] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008437474568684896 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 258.5345541477203 minutes + +Starting active labeling... +firstname : kaiming +lastname : ho +middlename : None +year_range : (1993,) +main_us_institutions_year : ((1993, 'iowa state university'),) +all_us_institutions_year : ((1993, 'iowa state university'),) + +firstname : kaiming +lastname : ho +middlename : None +year_range : (2017, 2020) +main_us_institutions_year : ((2017, 'iowa state university'), (2018, 'iowa state university'), (2019, 'iowa state university')) +all_us_institutions_year : ((2017, 'iowa state university'), (2017, 'united states department of energy'), (2018, 'iowa state university'), (2018, 'united states department of energy'), (2019, 'ames laboratory'), (2019, 'iowa state university'), (2019, 'united states department of energy')) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : j +lastname : russ +middlename : s +year_range : (1993,) +main_us_institutions_year : ((1993, 'carnegie mellon university'),) +all_us_institutions_year : ((1993, 'carnegie mellon university'),) + +firstname : j +lastname : russ +middlename : None +year_range : (2016, 2019) +main_us_institutions_year : ((2016, 'carnegie mellon university'), (2018, 'carnegie mellon university')) +all_us_institutions_year : ((2016, 'carnegie mellon university'), (2018, 'carnegie mellon university')) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hyatt +lastname : gibbs +middlename : m +year_range : (1991,) +main_us_institutions_year : ((1991, 'university of arizona'),) +all_us_institutions_year : ((1991, 'university of arizona'),) + +firstname : hyatt +lastname : gibbs +middlename : m +year_range : (1982, 2012) +main_us_institutions_year : ((1982, 'university of arizona'), (1984, 'university of arizona'), (1985, 'university of arizona'), (1986, 'university of arizona'), (1987, 'university of arizona'), (1988, 'university of arizona'), (1989, 'university of arizona'), (1990, 'university of arizona'), (1991, 'university of arizona'), (1993, 'university of arizona'), (1994, 'university of arizona'), (1998, 'university of arizona'), (2001, 'university of arizona'), (2002, 'university of arizona'), (2003, 'university of arizona'), (2004, 'university of arizona'), (2005, 'university of arizona'), (2006, 'university of arizona'), (2007, 'university of arizona'), (2009, 'university of arizona'), (2010, 'university of arizona'), (2011, 'university of arizona'), (2012, 'university of arizona')) +all_us_institutions_year : ((1982, 'university of arizona'), (1984, 'university of arizona'), (1985, 'university of arizona'), (1986, 'university of arizona'), (1987, 'university of arizona'), (1988, 'university of arizona'), (1989, 'university of arizona'), (1990, 'university of arizona'), (1991, 'university of arizona'), (1992, 'university of arizona'), (1993, 'university of arizona'), (1994, 'university of arizona'), (1995, 'university of arizona'), (1996, 'university of arizona'), (1997, 'university of arizona'), (1998, 'university of arizona'), (1999, 'university of arizona'), (2000, 'university of arizona'), (2001, 'university of arizona'), (2002, 'university of arizona'), (2003, 'university of arizona'), (2004, 'university of arizona'), (2005, 'university of arizona'), (2006, 'university of arizona'), (2007, 'university of arizona'), (2009, 'university of arizona'), (2010, 'university of arizona'), (2011, 'university of arizona'), (2012, 'university of arizona')) + +0/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : kuzyk +middlename : g +year_range : (2006,) +main_us_institutions_year : ((2006, 'washington state university'),) +all_us_institutions_year : ((2006, 'washington state university'),) + +firstname : mark +lastname : kuzyk +middlename : c +year_range : (2008, 2010) +main_us_institutions_year : ((2008, 'washington state university'), (2010, 'washington state university')) +all_us_institutions_year : ((2008, 'washington state university'), (2010, 'washington state university')) + +1/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : harris +middlename : e +year_range : (2000,) +main_us_institutions_year : ((2000, 'stanford university'),) +all_us_institutions_year : ((2000, 'stanford university'),) + +firstname : stephen +lastname : harris +middlename : j +year_range : (1973, 2021) +main_us_institutions_year : ((1981, 'general motors'), (1982, 'general motors'), (1983, 'general motors'), (1984, 'general motors'), (1986, 'general motors'), (1987, 'general motors'), (1988, 'general motors'), (1989, 'general motors'), (1990, 'general motors'), (1992, 'general motors'), (1993, 'general motors'), (1994, 'general motors'), (1995, 'general motors'), (1996, 'general motors'), (1996, 'stanford university'), (1997, 'general motors'), (1998, 'general motors'), (1999, 'general motors'), (2000, 'ford motor company'), (2001, 'ford motor company'), (2002, 'ford motor company'), (2003, 'ford motor company'), (2004, 'ford motor company'), (2005, 'ford motor company'), (2006, 'ford motor company'), (2007, 'ford motor company'), (2009, 'general motors'), (2010, 'general motors'), (2011, 'general motors'), (2012, 'general motors'), (2013, 'lawrence berkeley national laboratory'), (2014, 'lawrence berkeley national laboratory'), (2015, 'lawrence berkeley national laboratory'), (2017, 'lawrence berkeley national laboratory'), (2018, 'lawrence berkeley national laboratory'), (2019, 'lawrence berkeley national laboratory'), (2020, 'lawrence berkeley national laboratory'), (2021, 'lawrence berkeley national laboratory')) +all_us_institutions_year : ((1981, 'general motors'), (1982, 'general motors'), (1983, 'general motors'), (1984, 'general motors'), (1985, 'general motors'), (1986, 'general motors'), (1987, 'general motors'), (1987, 'massachusetts institute of technology'), (1988, 'general motors'), (1989, 'general motors'), (1990, 'general motors'), (1991, 'general motors'), (1992, 'general motors'), (1993, 'general motors'), (1994, 'general motors'), (1995, 'general motors'), (1996, 'general motors'), (1996, 'stanford university'), (1997, 'general motors'), (1997, 'stanford university'), (1998, 'general motors'), (1999, 'general motors'), (2000, 'ford motor company'), (2001, 'ford motor company'), (2002, 'ford motor company'), (2003, 'ford motor company'), (2004, 'ford motor company'), (2005, 'ford motor company'), (2006, 'ford motor company'), (2007, 'ford motor company'), (2009, 'general motors'), (2010, 'general motors'), (2011, 'general motors'), (2012, 'general motors'), (2013, 'general motors'), (2013, 'lawrence berkeley national laboratory'), (2013, 'stanford university'), (2014, 'lawrence berkeley national laboratory'), (2015, 'general motors'), (2015, 'lawrence berkeley national laboratory'), (2016, 'lawrence berkeley national laboratory'), (2017, 'lawrence berkeley national laboratory'), (2018, 'lawrence berkeley national laboratory'), (2018, 'massachusetts institute of technology'), (2019, 'lawrence berkeley national laboratory'), (2019, 'stanford university'), (2020, 'lawrence berkeley national laboratory'), (2020, 'stanford university'), (2021, 'lawrence berkeley national laboratory')) + +1/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : miller +middlename : coleman +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of maryland college park'),) +all_us_institutions_year : ((2006, 'university of maryland college park'),) + +firstname : m +lastname : miller +middlename : coleman +year_range : (1994, 2021) +main_us_institutions_year : ((1994, 'university of illinois at urbana champaign'), (1995, 'university of chicago'), (1997, 'university of chicago'), (1998, 'university of chicago'), (1999, 'university of chicago'), (2000, 'university of maryland college park'), (2000, 'university of chicago'), (2001, 'university of maryland college park'), (2002, 'university of maryland college park'), (2004, 'university of maryland college park'), (2005, 'university of maryland college park'), (2006, 'university of maryland college park'), (2007, 'university of maryland college park'), (2008, 'university of maryland college park'), (2009, 'university of maryland college park'), (2010, 'university of maryland college park'), (2011, 'university of maryland college park'), (2012, 'university of maryland college park'), (2013, 'university of maryland college park'), (2014, 'university of maryland college park'), (2015, 'university of maryland college park'), (2016, 'university of maryland college park'), (2017, 'university of maryland college park'), (2018, 'university of maryland college park'), (2019, 'university of maryland college park'), (2020, 'university of maryland college park'), (2021, 'university of maryland college park')) +all_us_institutions_year : ((1993, 'university of illinois at urbana champaign'), (1994, 'university of illinois at urbana champaign'), (1995, 'university of chicago'), (1996, 'university of chicago'), (1997, 'university of chicago'), (1998, 'university of chicago'), (1999, 'university of chicago'), (1999, 'university of maryland college park'), (2000, 'university of chicago'), (2000, 'university of maryland college park'), (2001, 'university of maryland college park'), (2002, 'university of maryland college park'), (2003, 'university of maryland college park'), (2004, 'university of maryland college park'), (2005, 'university of maryland college park'), (2006, 'goddard space flight center'), (2006, 'university of maryland college park'), (2007, 'goddard space flight center'), (2007, 'university of maryland college park'), (2008, 'goddard space flight center'), (2008, 'university of maryland college park'), (2009, 'university of maryland college park'), (2010, 'space science institute'), (2010, 'university of maryland college park'), (2011, 'space science institute'), (2011, 'university of maryland college park'), (2012, 'space science institute'), (2012, 'university of maryland college park'), (2013, 'johns hopkins university'), (2013, 'university of maryland college park'), (2014, 'university of maryland college park'), (2015, 'space science institute'), (2015, 'university of maryland college park'), (2016, 'space science institute'), (2016, 'university of maryland college park'), (2017, 'space science institute'), (2017, 'university of maryland college park'), (2018, 'johns hopkins university'), (2018, 'space science institute'), (2018, 'university of maryland college park'), (2019, 'university of maryland college park'), (2020, 'university of maryland college park'), (2021, 'university of maryland college park')) + +1/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : cheng +middlename : z d +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of akron'),) +all_us_institutions_year : ((2015, 'university of akron'),) + +firstname : stephen +lastname : cheng +middlename : z d +year_range : (1986, 2021) +main_us_institutions_year : ((1986, 'rensselaer polytechnic institute'), (1987, 'rensselaer polytechnic institute'), (1988, 'rensselaer polytechnic institute'), (1989, 'university of akron'), (1990, 'university of akron'), (1991, 'university of akron'), (1992, 'university of akron'), (1993, 'university of akron'), (1994, 'university of akron'), (1995, 'university of akron'), (1996, 'university of akron'), (1997, 'university of akron'), (1998, 'university of akron'), (1999, 'university of akron'), (2000, 'university of akron'), (2001, 'university of akron'), (2002, 'university of akron'), (2003, 'university of akron'), (2004, 'university of akron'), (2005, 'university of akron'), (2006, 'university of akron'), (2007, 'university of akron'), (2008, 'university of akron'), (2009, 'university of akron'), (2010, 'university of akron'), (2011, 'university of akron'), (2012, 'university of akron'), (2013, 'university of akron'), (2014, 'university of akron'), (2015, 'university of akron'), (2016, 'university of akron'), (2017, 'university of akron'), (2018, 'university of akron'), (2019, 'university of akron'), (2020, 'university of akron'), (2021, 'university of akron')) +all_us_institutions_year : ((1986, 'rensselaer polytechnic institute'), (1987, 'rensselaer polytechnic institute'), (1988, 'oak ridge national laboratory'), (1988, 'rensselaer polytechnic institute'), (1988, 'university of akron'), (1988, 'university of tennessee'), (1989, 'university of akron'), (1990, 'university of akron'), (1991, 'university of akron'), (1992, 'university of akron'), (1993, 'university of akron'), (1994, 'university of akron'), (1995, 'university of akron'), (1996, 'university of akron'), (1997, 'university of akron'), (1997, 'university of michigan'), (1998, 'case western reserve university'), (1998, 'university of akron'), (1999, 'case western reserve university'), (1999, 'university of akron'), (2000, 'university of akron'), (2001, 'university of akron'), (2002, 'university of akron'), (2003, 'university of akron'), (2004, 'university of akron'), (2005, 'university of akron'), (2006, 'university of akron'), (2007, 'university of akron'), (2008, 'national science foundation'), (2008, 'university of akron'), (2009, 'city university of new york'), (2009, 'oak ridge national laboratory'), (2009, 'university of akron'), (2009, 'university of tennessee'), (2010, 'oak ridge national laboratory'), (2010, 'university of akron'), (2010, 'university of tennessee'), (2011, 'university of akron'), (2012, 'university of akron'), (2013, 'university of akron'), (2014, 'university of akron'), (2015, 'university of akron'), (2016, 'university of akron'), (2017, 'university of akron'), (2018, 'university of akron'), (2019, 'university of akron'), (2020, 'university of akron'), (2021, 'university of akron')) + +2/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : moses +lastname : chan +middlename : h w +year_range : (2009,) +main_us_institutions_year : ((2009, 'pennsylvania state university'),) +all_us_institutions_year : ((2009, 'pennsylvania state university'),) + +firstname : moses +lastname : chan +middlename : h w +year_range : (1975, 2021) +main_us_institutions_year : ((1975, 'duke university'), (1976, 'duke university'), (1977, 'duke university'), (1981, 'pennsylvania state university'), (1982, 'pennsylvania state university'), (1984, 'pennsylvania state university'), (1986, 'pennsylvania state university'), (1987, 'pennsylvania state university'), (1988, 'pennsylvania state university'), (1990, 'pennsylvania state university'), (1991, 'pennsylvania state university'), (1992, 'pennsylvania state university'), (1993, 'pennsylvania state university'), (1994, 'pennsylvania state university'), (1995, 'pennsylvania state university'), (1996, 'pennsylvania state university'), (1997, 'pennsylvania state university'), (1998, 'pennsylvania state university'), (1999, 'pennsylvania state university'), (2000, 'pennsylvania state university'), (2001, 'pennsylvania state university'), (2002, 'pennsylvania state university'), (2003, 'pennsylvania state university'), (2004, 'pennsylvania state university'), (2005, 'pennsylvania state university'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2018, 'pennsylvania state university'), (2019, 'pennsylvania state university'), (2020, 'pennsylvania state university'), (2021, 'pennsylvania state university')) +all_us_institutions_year : ((1975, 'duke university'), (1976, 'duke university'), (1977, 'duke university'), (1981, 'pennsylvania state university'), (1982, 'pennsylvania state university'), (1984, 'pennsylvania state university'), (1986, 'pennsylvania state university'), (1987, 'pennsylvania state university'), (1988, 'pennsylvania state university'), (1990, 'pennsylvania state university'), (1991, 'pennsylvania state university'), (1992, 'pennsylvania state university'), (1993, 'pennsylvania state university'), (1993, 'university of pittsburgh'), (1994, 'pennsylvania state university'), (1995, 'pennsylvania state university'), (1996, 'pennsylvania state university'), (1997, 'pennsylvania state university'), (1998, 'pennsylvania state university'), (1999, 'pennsylvania state university'), (2000, 'pennsylvania state university'), (2001, 'pennsylvania state university'), (2002, 'pennsylvania state university'), (2003, 'pennsylvania state university'), (2004, 'pennsylvania state university'), (2004, 'university of washington'), (2005, 'pennsylvania state university'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'pennsylvania state university'), (2018, 'pennsylvania state university'), (2019, 'pennsylvania state university'), (2020, 'pennsylvania state university'), (2021, 'pennsylvania state university')) + +3/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : charles +lastname : doering +middlename : r +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of michigan'),) +all_us_institutions_year : ((2009, 'university of michigan'),) + +firstname : charles +lastname : doering +middlename : r +year_range : (1986, 2021) +main_us_institutions_year : ((1987, 'los alamos national laboratory'), (1988, 'clarkson university'), (1989, 'clarkson university'), (1990, 'clarkson university'), (1991, 'clarkson university'), (1992, 'clarkson university'), (1993, 'clarkson university'), (1994, 'clarkson university'), (1995, 'los alamos national laboratory'), (1995, 'clarkson university'), (1996, 'los alamos national laboratory'), (1997, 'los alamos national laboratory'), (1998, 'university of michigan'), (1999, 'university of michigan'), (2002, 'university of michigan'), (2003, 'university of michigan'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) +all_us_institutions_year : ((1987, 'clarkson university'), (1987, 'los alamos national laboratory'), (1988, 'clarkson university'), (1989, 'clarkson university'), (1989, 'los alamos national laboratory'), (1990, 'clarkson university'), (1991, 'clarkson university'), (1992, 'clarkson university'), (1993, 'clarkson university'), (1994, 'clarkson university'), (1994, 'southern methodist university'), (1995, 'clarkson university'), (1995, 'los alamos national laboratory'), (1996, 'los alamos national laboratory'), (1997, 'los alamos national laboratory'), (1998, 'university of michigan'), (1999, 'university of michigan'), (2000, 'university of michigan'), (2001, 'university of michigan'), (2002, 'university of michigan'), (2003, 'university of michigan'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2011, 'university of minnesota'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) + +4/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : branislav +lastname : nikolic +middlename : k +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of delaware'),) +all_us_institutions_year : ((2014, 'university of delaware'),) + +firstname : branislav +lastname : nikolic +middlename : k +year_range : (1999, 2021) +main_us_institutions_year : ((1999, 'stony brook university'), (2000, 'stony brook university'), (2001, 'georgetown university'), (2002, 'georgetown university'), (2003, 'georgetown university'), (2004, 'university of delaware'), (2005, 'university of delaware'), (2006, 'university of delaware'), (2007, 'university of delaware'), (2008, 'university of delaware'), (2009, 'university of delaware'), (2010, 'university of delaware'), (2011, 'university of delaware'), (2012, 'university of delaware'), (2013, 'university of delaware'), (2014, 'university of delaware'), (2015, 'university of delaware'), (2016, 'university of delaware'), (2017, 'university of delaware'), (2018, 'university of delaware'), (2019, 'university of delaware'), (2020, 'university of delaware'), (2021, 'university of delaware')) +all_us_institutions_year : ((1999, 'stony brook university'), (2000, 'stony brook university'), (2001, 'georgetown university'), (2002, 'georgetown university'), (2003, 'georgetown university'), (2004, 'university of delaware'), (2005, 'university of delaware'), (2006, 'university of delaware'), (2007, 'university of delaware'), (2008, 'university of delaware'), (2009, 'university of delaware'), (2010, 'university of delaware'), (2011, 'university of delaware'), (2012, 'university of delaware'), (2013, 'university of delaware'), (2014, 'university of delaware'), (2015, 'university of delaware'), (2016, 'university of delaware'), (2017, 'university of delaware'), (2018, 'university of delaware'), (2019, 'university of delaware'), (2020, 'kavli institute for theoretical physics'), (2020, 'university of delaware'), (2021, 'kavli institute for theoretical physics'), (2021, 'university of delaware')) + +5/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : huili +lastname : xing +middlename : grace +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of notre dame'),) +all_us_institutions_year : ((2012, 'university of notre dame'),) + +firstname : huili +lastname : xing +middlename : grace +year_range : (1999, 2021) +main_us_institutions_year : ((1999, 'university of california santa barbara'), (2000, 'university of california santa barbara'), (2001, 'university of california santa barbara'), (2002, 'university of california santa barbara'), (2003, 'university of california santa barbara'), (2004, 'university of california santa barbara'), (2006, 'university of notre dame'), (2007, 'university of notre dame'), (2008, 'university of notre dame'), (2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame'), (2012, 'university of notre dame'), (2013, 'university of notre dame'), (2014, 'university of notre dame'), (2015, 'university of notre dame'), (2016, 'cornell university'), (2017, 'cornell university'), (2018, 'cornell university'), (2019, 'cornell university'), (2020, 'cornell university'), (2021, 'cornell university')) +all_us_institutions_year : ((1999, 'university of california santa barbara'), (2000, 'university of california santa barbara'), (2001, 'university of california santa barbara'), (2002, 'university of california santa barbara'), (2003, 'university of california santa barbara'), (2004, 'university of california santa barbara'), (2005, 'university of california santa barbara'), (2006, 'university of notre dame'), (2007, 'university of notre dame'), (2008, 'university of notre dame'), (2009, 'university of notre dame'), (2010, 'university of notre dame'), (2011, 'university of notre dame'), (2012, 'university of notre dame'), (2013, 'university of notre dame'), (2014, 'university of notre dame'), (2015, 'cornell university'), (2015, 'university of notre dame'), (2016, 'cornell university'), (2016, 'university of california san diego'), (2016, 'university of notre dame'), (2017, 'cornell university'), (2017, 'university of notre dame'), (2018, 'cornell university'), (2019, 'cornell university'), (2019, 'university of notre dame'), (2020, 'cornell university'), (2021, 'cornell university')) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : tzyh +lastname : tarn +middlename : jong +year_range : (2006,) +main_us_institutions_year : ((2006, 'washington university in st louis'),) +all_us_institutions_year : ((2006, 'washington university in st louis'),) + +firstname : tzyh +lastname : tarn +middlename : jong +year_range : (1970, 2013) +main_us_institutions_year : ((1970, 'washington university in st louis'), (1971, 'washington university in st louis'), (1972, 'washington university in st louis'), (1973, 'washington university in st louis'), (1975, 'washington university in st louis'), (1984, 'washington university in st louis'), (1985, 'washington university in st louis'), (1988, 'washington university in st louis'), (1991, 'washington university in st louis'), (1992, 'washington university in st louis'), (1995, 'washington university in st louis'), (1996, 'washington university in st louis'), (1998, 'washington university in st louis'), (1999, 'washington university in st louis'), (2000, 'washington university in st louis'), (2001, 'washington university in st louis'), (2002, 'washington university in st louis'), (2004, 'washington university in st louis'), (2005, 'washington university in st louis'), (2006, 'washington university in st louis'), (2008, 'washington university in st louis'), (2009, 'washington university in st louis'), (2011, 'washington university in st louis'), (2012, 'washington university in st louis'), (2013, 'washington university in st louis')) +all_us_institutions_year : ((1970, 'washington university in st louis'), (1971, 'washington university in st louis'), (1972, 'washington university in st louis'), (1973, 'washington university in st louis'), (1975, 'washington university in st louis'), (1984, 'washington university in st louis'), (1985, 'washington university in st louis'), (1988, 'washington university in st louis'), (1991, 'washington university in st louis'), (1992, 'washington university in st louis'), (1994, 'washington university in st louis'), (1995, 'washington university in st louis'), (1996, 'washington university in st louis'), (1998, 'washington university in st louis'), (1999, 'washington university in st louis'), (2000, 'washington university in st louis'), (2001, 'washington university in st louis'), (2002, 'washington university in st louis'), (2004, 'washington university in st louis'), (2005, 'washington university in st louis'), (2006, 'washington university in st louis'), (2008, 'washington university in st louis'), (2009, 'washington university in st louis'), (2011, 'washington university in st louis'), (2012, 'washington university in st louis'), (2013, 'washington university in st louis')) + +7/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : cory +middlename : g +year_range : (2002,) +main_us_institutions_year : ((2002, 'massachusetts institute of technology'),) +all_us_institutions_year : ((2002, 'massachusetts institute of technology'),) + +firstname : david +lastname : cory +middlename : g +year_range : (1985, 2020) +main_us_institutions_year : ((1985, 'case western reserve university'), (1987, 'case western reserve university'), (1988, 'case western reserve university'), (1989, 'united states naval research laboratory'), (1990, 'united states naval research laboratory'), (1991, 'united states naval research laboratory'), (1992, 'united states naval research laboratory'), (1993, 'massachusetts institute of technology'), (1994, 'united states naval research laboratory'), (1994, 'massachusetts institute of technology'), (1995, 'massachusetts institute of technology'), (1996, 'massachusetts institute of technology'), (1997, 'massachusetts institute of technology'), (1998, 'massachusetts institute of technology'), (1999, 'massachusetts institute of technology'), (2000, 'massachusetts institute of technology'), (2001, 'massachusetts institute of technology'), (2002, 'massachusetts institute of technology'), (2003, 'massachusetts institute of technology'), (2004, 'massachusetts institute of technology'), (2005, 'massachusetts institute of technology'), (2006, 'massachusetts institute of technology'), (2007, 'massachusetts institute of technology'), (2008, 'massachusetts institute of technology'), (2009, 'massachusetts institute of technology'), (2010, 'massachusetts institute of technology')) +all_us_institutions_year : ((1985, 'case western reserve university'), (1987, 'case western reserve university'), (1988, 'case western reserve university'), (1989, 'case western reserve university'), (1989, 'united states naval research laboratory'), (1990, 'united states naval research laboratory'), (1991, 'united states naval research laboratory'), (1992, 'united states naval research laboratory'), (1993, 'massachusetts institute of technology'), (1994, 'massachusetts institute of technology'), (1994, 'united states naval research laboratory'), (1995, 'massachusetts institute of technology'), (1996, 'massachusetts institute of technology'), (1997, 'massachusetts institute of technology'), (1998, 'massachusetts institute of technology'), (1999, 'massachusetts institute of technology'), (2000, 'massachusetts institute of technology'), (2001, 'massachusetts institute of technology'), (2002, 'massachusetts institute of technology'), (2003, 'massachusetts institute of technology'), (2004, 'massachusetts institute of technology'), (2005, 'massachusetts institute of technology'), (2006, 'massachusetts institute of technology'), (2007, 'massachusetts institute of technology'), (2008, 'massachusetts institute of technology'), (2009, 'massachusetts institute of technology'), (2010, 'massachusetts institute of technology'), (2011, 'massachusetts institute of technology'), (2011, 'schlumberger'), (2013, 'massachusetts institute of technology')) + +8/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : yu +lastname : wang +middlename : u +year_range : (2015,) +main_us_institutions_year : ((2015, 'michigan technological university'),) +all_us_institutions_year : ((2015, 'michigan technological university'),) + +firstname : yu +lastname : wang +middlename : u +year_range : (2000, 2021) +main_us_institutions_year : ((2000, 'rutgers university'), (2001, 'rutgers university'), (2002, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'virginia tech'), (2006, 'virginia tech'), (2007, 'virginia tech'), (2009, 'virginia tech'), (2011, 'michigan technological university'), (2012, 'michigan technological university'), (2013, 'michigan technological university'), (2014, 'michigan technological university'), (2015, 'michigan technological university'), (2016, 'michigan technological university'), (2017, 'michigan technological university'), (2018, 'michigan technological university'), (2019, 'michigan technological university'), (2020, 'michigan technological university'), (2021, 'michigan technological university')) +all_us_institutions_year : ((2000, 'rutgers university'), (2001, 'rutgers university'), (2002, 'rutgers university'), (2003, 'general electric'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'general electric'), (2005, 'virginia tech'), (2006, 'virginia tech'), (2007, 'virginia tech'), (2008, 'general electric'), (2009, 'michigan technological university'), (2009, 'virginia tech'), (2010, 'virginia tech'), (2011, 'michigan technological university'), (2012, 'michigan technological university'), (2013, 'michigan technological university'), (2014, 'michigan technological university'), (2015, 'michigan technological university'), (2016, 'michigan technological university'), (2017, 'michigan technological university'), (2018, 'michigan technological university'), (2019, 'michigan technological university'), (2020, 'michigan technological university'), (2021, 'michigan technological university')) + +9/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : tony +lastname : huang +middlename : jun +year_range : (2010,) +main_us_institutions_year : ((2010, 'pennsylvania state university'),) +all_us_institutions_year : ((2010, 'pennsylvania state university'),) + +firstname : tony +lastname : huang +middlename : jun +year_range : (1993, 2021) +main_us_institutions_year : ((2002, 'university of california los angeles'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2005, 'university of california los angeles'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'pennsylvania state university'), (2017, 'duke university'), (2017, 'pennsylvania state university'), (2018, 'duke university'), (2019, 'duke university'), (2020, 'duke university'), (2021, 'duke university')) +all_us_institutions_year : ((2002, 'university of california los angeles'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2005, 'university of california los angeles'), (2006, 'pennsylvania state university'), (2007, 'pennsylvania state university'), (2008, 'pennsylvania state university'), (2009, 'pennsylvania state university'), (2010, 'pennsylvania state university'), (2011, 'pennsylvania state university'), (2012, 'pennsylvania state university'), (2013, 'pennsylvania state university'), (2014, 'pennsylvania state university'), (2015, 'pennsylvania state university'), (2016, 'duke university'), (2016, 'pennsylvania state university'), (2017, 'duke university'), (2017, 'pennsylvania state university'), (2018, 'duke university'), (2018, 'pennsylvania state university'), (2019, 'duke university'), (2020, 'duke university'), (2021, 'duke university')) + +10/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : claus +middlename : o +year_range : (2002,) +main_us_institutions_year : ((2002, 'virginia tech'),) +all_us_institutions_year : ((2002, 'virginia tech'),) + +firstname : richard +lastname : claus +middlename : o +year_range : (1977, 2018) +main_us_institutions_year : ((1980, 'virginia tech'), (1984, 'virginia tech'), (1986, 'virginia tech'), (1989, 'virginia tech'), (1990, 'virginia tech'), (1991, 'virginia tech'), (1992, 'virginia tech'), (1993, 'virginia tech'), (1994, 'virginia tech'), (1995, 'virginia tech'), (1996, 'virginia tech'), (1997, 'virginia tech'), (1998, 'virginia tech'), (1999, 'virginia tech'), (2000, 'virginia tech'), (2001, 'virginia tech'), (2002, 'virginia tech'), (2003, 'virginia tech'), (2004, 'virginia tech'), (2005, 'virginia tech'), (2006, 'virginia tech'), (2007, 'virginia tech'), (2008, 'virginia tech'), (2015, 'slac national accelerator laboratory'), (2016, 'slac national accelerator laboratory')) +all_us_institutions_year : ((1979, 'virginia tech'), (1980, 'virginia tech'), (1983, 'virginia tech'), (1984, 'virginia tech'), (1986, 'virginia tech'), (1987, 'virginia tech'), (1988, 'virginia tech'), (1989, 'virginia tech'), (1990, 'virginia tech'), (1991, 'virginia tech'), (1992, 'virginia tech'), (1993, 'virginia tech'), (1994, 'virginia tech'), (1995, 'virginia tech'), (1996, 'virginia tech'), (1997, 'virginia tech'), (1998, 'virginia tech'), (1999, 'virginia tech'), (2000, 'virginia tech'), (2001, 'virginia tech'), (2002, 'virginia tech'), (2003, 'virginia tech'), (2004, 'virginia tech'), (2005, 'virginia tech'), (2006, 'virginia tech'), (2007, 'virginia tech'), (2008, 'virginia tech'), (2015, 'slac national accelerator laboratory'), (2016, 'slac national accelerator laboratory')) + +11/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : donald +lastname : drew +middlename : a +year_range : (1992,) +main_us_institutions_year : ((1992, 'rensselaer polytechnic institute'),) +all_us_institutions_year : ((1992, 'rensselaer polytechnic institute'),) + +firstname : donald +lastname : drew +middlename : a +year_range : (1976, 2021) +main_us_institutions_year : ((1976, 'rensselaer polytechnic institute'), (1978, 'rensselaer polytechnic institute'), (1979, 'rensselaer polytechnic institute'), (1980, 'rensselaer polytechnic institute'), (1981, 'rensselaer polytechnic institute'), (1982, 'rensselaer polytechnic institute'), (1983, 'rensselaer polytechnic institute'), (1985, 'rensselaer polytechnic institute'), (1986, 'rensselaer polytechnic institute'), (1987, 'rensselaer polytechnic institute'), (1988, 'rensselaer polytechnic institute'), (1989, 'rensselaer polytechnic institute'), (1990, 'rensselaer polytechnic institute'), (1991, 'rensselaer polytechnic institute'), (1992, 'rensselaer polytechnic institute'), (1993, 'rensselaer polytechnic institute'), (1994, 'rensselaer polytechnic institute'), (1996, 'rensselaer polytechnic institute'), (1997, 'rensselaer polytechnic institute'), (1998, 'rensselaer polytechnic institute'), (1999, 'rensselaer polytechnic institute'), (2001, 'rensselaer polytechnic institute'), (2002, 'rensselaer polytechnic institute'), (2003, 'rensselaer polytechnic institute'), (2004, 'rensselaer polytechnic institute'), (2005, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2009, 'rensselaer polytechnic institute'), (2010, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute')) +all_us_institutions_year : ((1976, 'rensselaer polytechnic institute'), (1978, 'rensselaer polytechnic institute'), (1979, 'rensselaer polytechnic institute'), (1980, 'rensselaer polytechnic institute'), (1981, 'rensselaer polytechnic institute'), (1982, 'rensselaer polytechnic institute'), (1983, 'rensselaer polytechnic institute'), (1985, 'rensselaer polytechnic institute'), (1986, 'rensselaer polytechnic institute'), (1987, 'rensselaer polytechnic institute'), (1988, 'rensselaer polytechnic institute'), (1989, 'rensselaer polytechnic institute'), (1990, 'rensselaer polytechnic institute'), (1991, 'rensselaer polytechnic institute'), (1992, 'rensselaer polytechnic institute'), (1993, 'rensselaer polytechnic institute'), (1994, 'rensselaer polytechnic institute'), (1996, 'rensselaer polytechnic institute'), (1997, 'rensselaer polytechnic institute'), (1998, 'rensselaer polytechnic institute'), (1999, 'rensselaer polytechnic institute'), (2001, 'rensselaer polytechnic institute'), (2002, 'rensselaer polytechnic institute'), (2003, 'rensselaer polytechnic institute'), (2004, 'rensselaer polytechnic institute'), (2005, 'rensselaer polytechnic institute'), (2006, 'rensselaer polytechnic institute'), (2007, 'rensselaer polytechnic institute'), (2008, 'rensselaer polytechnic institute'), (2009, 'rensselaer polytechnic institute'), (2010, 'rensselaer polytechnic institute'), (2011, 'rensselaer polytechnic institute'), (2012, 'rensselaer polytechnic institute'), (2014, 'rensselaer polytechnic institute'), (2016, 'rensselaer polytechnic institute')) + +12/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : prochaskas +middlename : xavier +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of california santa cruz'),) +all_us_institutions_year : ((2010, 'university of california santa cruz'),) + +firstname : j +lastname : prochaska +middlename : xavier +year_range : (2008, 2021) +main_us_institutions_year : ((2008, 'university of california santa cruz'), (2009, 'university of california santa cruz'), (2010, 'university of california santa cruz'), (2011, 'university of california santa cruz'), (2012, 'university of california santa cruz'), (2013, 'university of california santa cruz'), (2014, 'university of california santa cruz'), (2015, 'university of california santa cruz'), (2016, 'university of california santa cruz'), (2017, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2019, 'university of california santa cruz'), (2020, 'university of california santa cruz'), (2021, 'university of california santa cruz')) +all_us_institutions_year : ((2008, 'university of california santa cruz'), (2009, 'california institute of technology'), (2009, 'university of california santa cruz'), (2010, 'california institute of technology'), (2010, 'university of california santa cruz'), (2011, 'university of california santa cruz'), (2012, 'university of california santa cruz'), (2013, 'california institute of technology'), (2013, 'university of california santa cruz'), (2014, 'university of california santa cruz'), (2015, 'university of california santa cruz'), (2016, 'university of california santa cruz'), (2017, 'university of california santa cruz'), (2018, 'university of california santa cruz'), (2019, 'university of california berkeley'), (2019, 'university of california santa cruz'), (2020, 'university of california santa cruz'), (2021, 'university of california santa cruz')) + +13/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : f +lastname : hersman +middlename : w +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of new hampshire main campus'),) +all_us_institutions_year : ((2007, 'university of new hampshire main campus'),) + +firstname : f +lastname : hersmann +middlename : w +year_range : (2003, 2005) +main_us_institutions_year : ((2005, 'university of new hampshire'),) +all_us_institutions_year : ((2005, 'university of new hampshire'),) + +14/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : v +lastname : kostelecky +middlename : alan +year_range : (2002,) +main_us_institutions_year : ((2002, 'indiana university'),) +all_us_institutions_year : ((2002, 'indiana university'),) + +firstname : v +lastname : kostelecký +middlename : alan +year_range : (1983, 2021) +main_us_institutions_year : ((1983, 'los alamos national laboratory'), (1984, 'los alamos national laboratory'), (1985, 'los alamos national laboratory'), (1986, 'indiana university'), (1988, 'indiana university'), (1989, 'indiana university'), (1990, 'indiana university'), (1991, 'indiana university'), (1992, 'indiana university'), (1993, 'indiana university'), (1994, 'indiana university'), (1995, 'indiana university'), (1996, 'indiana university'), (1997, 'indiana university'), (1998, 'indiana university'), (1999, 'indiana university'), (2000, 'indiana university'), (2001, 'indiana university'), (2002, 'indiana university'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2008, 'indiana university'), (2009, 'indiana university'), (2010, 'indiana university'), (2011, 'indiana university'), (2012, 'indiana university'), (2013, 'indiana university'), (2014, 'indiana university'), (2015, 'indiana university'), (2016, 'indiana university'), (2017, 'indiana university'), (2018, 'indiana university'), (2019, 'indiana university'), (2020, 'indiana university'), (2021, 'indiana university')) +all_us_institutions_year : ((1983, 'los alamos national laboratory'), (1984, 'los alamos national laboratory'), (1985, 'los alamos national laboratory'), (1986, 'indiana university'), (1988, 'indiana university'), (1989, 'indiana university'), (1990, 'indiana university'), (1991, 'indiana university'), (1992, 'indiana university'), (1993, 'colby college'), (1993, 'indiana university'), (1994, 'colby college'), (1994, 'indiana university'), (1995, 'colby college'), (1995, 'indiana university'), (1996, 'colby college'), (1996, 'indiana university'), (1997, 'indiana university'), (1998, 'indiana university'), (1999, 'indiana university'), (2000, 'indiana university'), (2001, 'indiana university'), (2002, 'indiana university'), (2003, 'indiana university'), (2004, 'indiana university'), (2005, 'indiana university'), (2006, 'indiana university'), (2007, 'indiana university'), (2008, 'indiana university'), (2009, 'indiana university'), (2010, 'indiana university'), (2011, 'indiana university'), (2012, 'indiana university'), (2013, 'indiana university'), (2014, 'indiana university'), (2015, 'indiana university'), (2016, 'indiana university'), (2017, 'indiana university'), (2018, 'indiana university'), (2019, 'indiana university'), (2020, 'indiana university'), (2021, 'indiana university')) + +15/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : h +lastname : kimble +middlename : j +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of texas at austin'),) +all_us_institutions_year : ((1990, 'university of texas at austin'),) + +firstname : hyung +lastname : kim +middlename : j +year_range : (1988, 2021) +main_us_institutions_year : ((1990, 'university of colorado boulder'), (1992, 'university of colorado boulder'), (1993, 'university of colorado boulder'), (1994, 'carnegie mellon university'), (1995, 'carnegie mellon university'), (1997, 'carnegie mellon university'), (1998, 'carnegie mellon university'), (1999, 'carnegie mellon university'), (2001, 'carnegie mellon university'), (2001, 'university of california berkeley'), (2003, 'university of california berkeley'), (2004, 'university of california berkeley'), (2008, 'carnegie mellon university'), (2009, 'university of california irvine'), (2010, 'carnegie mellon university'), (2011, 'carnegie mellon university'), (2011, 'university of california irvine'), (2012, 'carnegie mellon university'), (2012, 'salk institute for biological studies'), (2012, 'university of california irvine'), (2013, 'salk institute for biological studies'), (2014, 'carnegie mellon university'), (2015, 'carnegie mellon university'), (2016, 'carnegie mellon university'), (2017, 'carnegie mellon university'), (2018, 'carnegie mellon university'), (2019, 'carnegie mellon university'), (2019, 'university of nebraska medical center'), (2021, 'carnegie mellon university'), (2021, 'university of nebraska medical center')) +all_us_institutions_year : ((1990, 'university of colorado boulder'), (1992, 'university of colorado boulder'), (1993, 'university of colorado boulder'), (1994, 'carnegie mellon university'), (1994, 'university of colorado boulder'), (1995, 'carnegie mellon university'), (1997, 'carnegie mellon university'), (1998, 'carnegie mellon university'), (1999, 'carnegie mellon university'), (2001, 'carnegie mellon university'), (2001, 'university of california berkeley'), (2003, 'university of california berkeley'), (2004, 'university of california berkeley'), (2008, 'carnegie mellon university'), (2008, 'university of california irvine'), (2009, 'carnegie mellon university'), (2009, 'university of california irvine'), (2010, 'carnegie mellon university'), (2010, 'university of california irvine'), (2010, 'university of colorado boulder'), (2011, 'carnegie mellon university'), (2011, 'university of california irvine'), (2012, 'carnegie mellon university'), (2012, 'salk institute for biological studies'), (2012, 'university of california irvine'), (2013, 'salk institute for biological studies'), (2013, 'university of california irvine'), (2014, 'carnegie mellon university'), (2015, 'carnegie mellon university'), (2015, 'salk institute for biological studies'), (2016, 'carnegie mellon university'), (2016, 'salk institute for biological studies'), (2017, 'carnegie mellon university'), (2018, 'carnegie mellon university'), (2018, 'salk institute for biological studies'), (2018, 'university of nebraska medical center'), (2019, 'carnegie mellon university'), (2019, 'university of nebraska medical center'), (2021, 'carnegie mellon university'), (2021, 'university of nebraska medical center')) + +16/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ward +lastname : machester +middlename : b +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of michigan'),) +all_us_institutions_year : ((2012, 'university of michigan'),) + +firstname : w +lastname : manchester +middlename : b +year_range : (2000, 2021) +main_us_institutions_year : ((2003, 'university of michigan'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan')) +all_us_institutions_year : ((2003, 'national center for atmospheric research'), (2003, 'university of illinois at urbana champaign'), (2003, 'university of michigan'), (2004, 'university of michigan'), (2005, 'university of michigan'), (2006, 'university of michigan'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2009, 'university of michigan'), (2010, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2013, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2017, 'university of michigan'), (2018, 'university of michigan'), (2019, 'university of michigan'), (2020, 'university of michigan'), (2021, 'university of michigan')) + +16/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : superfine +middlename : chasen +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2010, 'university of north carolina at chapel hill'),) + +firstname : richard +lastname : spero +middlename : chasen +year_range : (2000, 2015) +main_us_institutions_year : ((2000, 'national institutes of health'), (2008, 'university of north carolina at chapel hill'), (2011, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((2000, 'national institutes of health'), (2008, 'university of north carolina at chapel hill'), (2010, 'university of north carolina at chapel hill'), (2011, 'university of north carolina at chapel hill'), (2013, 'university of north carolina at chapel hill'), (2017, 'university of north carolina at chapel hill')) + +17/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : douglas +lastname : durian +middlename : j +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of pennsylvania'),) +all_us_institutions_year : ((2015, 'university of pennsylvania'),) + +firstname : douglas +lastname : tobias +middlename : j +year_range : (1987, 2021) +main_us_institutions_year : ((1987, 'carnegie mellon university'), (1989, 'carnegie mellon university'), (1990, 'carnegie mellon university'), (1991, 'carnegie mellon university'), (1992, 'carnegie mellon university'), (1993, 'university of pennsylvania'), (1995, 'university of pennsylvania'), (1996, 'university of pennsylvania'), (1997, 'national institute of standards and technology'), (1997, 'university of pennsylvania'), (1998, 'national institute of standards and technology'), (1999, 'national institute of standards and technology'), (1999, 'university of california irvine'), (2000, 'university of california irvine'), (2001, 'university of california irvine'), (2002, 'university of california irvine'), (2003, 'university of california irvine'), (2004, 'university of california irvine'), (2005, 'university of california irvine'), (2006, 'university of california irvine'), (2007, 'university of maryland baltimore'), (2007, 'university of california irvine'), (2008, 'university of california irvine'), (2009, 'university of california irvine'), (2010, 'university of california irvine'), (2011, 'university of california irvine'), (2012, 'university of california irvine'), (2013, 'university of california irvine'), (2014, 'university of california irvine'), (2015, 'university of california irvine'), (2016, 'university of california irvine'), (2017, 'university of california irvine'), (2018, 'university of california irvine'), (2019, 'university of california irvine'), (2020, 'university of california irvine'), (2021, 'university of california irvine')) +all_us_institutions_year : ((1987, 'carnegie mellon university'), (1989, 'carnegie mellon university'), (1990, 'carnegie mellon university'), (1991, 'carnegie mellon university'), (1991, 'university of pennsylvania'), (1992, 'carnegie mellon university'), (1993, 'university of pennsylvania'), (1995, 'university of pennsylvania'), (1996, 'university of pennsylvania'), (1997, 'national institute of standards and technology'), (1997, 'university of pennsylvania'), (1998, 'national institute of standards and technology'), (1999, 'national institute of standards and technology'), (1999, 'university of california irvine'), (2000, 'university of california irvine'), (2001, 'university of california irvine'), (2002, 'university of california irvine'), (2003, 'university of california irvine'), (2004, 'environmental molecular sciences laboratory'), (2004, 'university of california irvine'), (2005, 'university of california irvine'), (2006, 'university of california irvine'), (2007, 'university of california irvine'), (2007, 'university of maryland baltimore'), (2008, 'university of california'), (2008, 'university of california irvine'), (2009, 'environmental molecular sciences laboratory'), (2009, 'university of california irvine'), (2010, 'university of california irvine'), (2010, 'university of maryland college park'), (2011, 'university of california irvine'), (2012, 'university of california irvine'), (2013, 'university of california irvine'), (2014, 'university of california irvine'), (2015, 'university of california'), (2015, 'university of california berkeley'), (2015, 'university of california irvine'), (2016, 'university of california irvine'), (2017, 'university of california irvine'), (2018, 'university of california berkeley'), (2018, 'university of california irvine'), (2019, 'university of california berkeley'), (2019, 'university of california irvine'), (2020, 'university of california irvine'), (2021, 'university of california irvine')) + +18/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : r +lastname : drake +middlename : paul +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of michigan'),) +all_us_institutions_year : ((2010, 'university of michigan'),) + +firstname : r +lastname : horja +middlename : paul +year_range : (2005, 2015) +main_us_institutions_year : ((2005, 'university of michigan'), (2006, 'oklahoma state university stillwater'), (2013, 'oklahoma state university stillwater')) +all_us_institutions_year : ((2002, 'university of michigan'), (2005, 'oklahoma state university stillwater'), (2005, 'university of michigan'), (2006, 'oklahoma state university stillwater'), (2010, 'oklahoma state university stillwater'), (2013, 'oklahoma state university stillwater')) + +18/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : charles +lastname : mayo +middlename : w +year_range : (2001,) +main_us_institutions_year : ((2001, 'north carolina state university'),) +all_us_institutions_year : ((2001, 'north carolina state university'),) + +firstname : c +lastname : ma +middlename : w +year_range : (2005, 2018) +main_us_institutions_year : ((2005, 'chinese academy of sciences'), (2006, 'chinese academy of sciences'), (2007, 'chinese academy of sciences'), (2008, 'chinese academy of sciences')) +all_us_institutions_year : ((2005, 'chinese academy of sciences'), (2006, 'chinese academy of sciences'), (2007, 'chinese academy of sciences'), (2008, 'chinese academy of sciences')) + +18/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : raymond +lastname : chiao +middlename : y +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of california merced'),) +all_us_institutions_year : ((2014, 'university of california merced'),) + +firstname : c +lastname : chi +middlename : y +year_range : (2015, 2021) +main_us_institutions_year : ((2015, 'columbia university'),) +all_us_institutions_year : ((2015, 'columbia university'),) + +18/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : c +lastname : shih +middlename : k +year_range : (2001,) +main_us_institutions_year : ((2001, 'university of texas at austin'),) +all_us_institutions_year : ((2001, 'university of texas at austin'),) + +firstname : c +lastname : shi +middlename : None +year_range : (2013, 2019) +main_us_institutions_year : ((2013, 'cornell university'), (2013, 'hess corporation'), (2019, 'cornell university')) +all_us_institutions_year : ((2013, 'cornell university'), (2013, 'hess corporation'), (2019, 'cornell university')) + +18/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : mauel +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'stevens institute of technology'),) +all_us_institutions_year : ((2009, 'stevens institute of technology'),) + +firstname : m +lastname : mauel +middlename : e +year_range : (1984, 2021) +main_us_institutions_year : ((1985, 'massachusetts institute of technology'), (1987, 'massachusetts institute of technology'), (1990, 'columbia university'), (1992, 'columbia university'), (1993, 'columbia university'), (1994, 'columbia university'), (1994, 'princeton plasma physics laboratory'), (1995, 'columbia university'), (1996, 'columbia university'), (1997, 'columbia university'), (1999, 'columbia university'), (2000, 'columbia university'), (2001, 'columbia university'), (2002, 'new york university'), (2003, 'columbia university'), (2004, 'columbia university'), (2005, 'columbia university'), (2006, 'columbia university'), (2007, 'massachusetts institute of technology'), (2007, 'columbia university'), (2008, 'massachusetts institute of technology'), (2008, 'columbia university'), (2009, 'columbia university'), (2010, 'columbia university'), (2011, 'columbia university'), (2012, 'columbia university'), (2013, 'columbia university'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2018, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university'), (2021, 'columbia university')) +all_us_institutions_year : ((1985, 'massachusetts institute of technology'), (1987, 'massachusetts institute of technology'), (1990, 'columbia university'), (1992, 'columbia university'), (1992, 'princeton university'), (1993, 'columbia university'), (1994, 'columbia university'), (1994, 'princeton plasma physics laboratory'), (1995, 'columbia university'), (1995, 'general atomics'), (1995, 'princeton plasma physics laboratory'), (1995, 'princeton university'), (1996, 'columbia university'), (1996, 'general atomics'), (1997, 'columbia university'), (1997, 'princeton university'), (1999, 'columbia university'), (2000, 'columbia university'), (2001, 'columbia university'), (2002, 'new york university'), (2003, 'columbia university'), (2004, 'columbia university'), (2005, 'columbia university'), (2006, 'columbia university'), (2007, 'columbia university'), (2007, 'massachusetts institute of technology'), (2008, 'columbia university'), (2008, 'massachusetts institute of technology'), (2009, 'columbia university'), (2010, 'columbia university'), (2011, 'columbia university'), (2012, 'columbia university'), (2013, 'columbia university'), (2014, 'columbia university'), (2015, 'columbia university'), (2016, 'columbia university'), (2017, 'columbia university'), (2018, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university'), (2021, 'columbia university')) + +18/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : richard +lastname : cavanaugh +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of illinois chicago'),) +all_us_institutions_year : ((2015, 'university of illinois chicago'),) + +firstname : r +lastname : cavanaugh +middlename : None +year_range : (2016, 2017) +main_us_institutions_year : ((2017, 'university of illinois at chicago'),) +all_us_institutions_year : ((2017, 'university of illinois at chicago'),) + +18/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michael +lastname : lewis +middlename : r +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of missouri columbia'),) +all_us_institutions_year : ((2004, 'university of missouri columbia'),) + +firstname : g +lastname : lewis +middlename : r +year_range : (2005, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2010, 'planetary science institute'),) + +18/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : w +lastname : mann +middlename : anthony +year_range : (1996,) +main_us_institutions_year : ((1996, 'tufts university'),) +all_us_institutions_year : ((1996, 'tufts university'),) + +firstname : w +lastname : mann +middlename : b +year_range : (1958, 1988) +main_us_institutions_year : ((1980, 'national institute of standards and technology'),) +all_us_institutions_year : ((1980, 'national institute of standards and technology'),) + +18/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : abushagur +middlename : a g +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of alabama in huntsville'),) +all_us_institutions_year : ((1993, 'university of alabama in huntsville'),) + +firstname : mustafa +lastname : abushagur +middlename : a g +year_range : (1979, 1985) +main_us_institutions_year : ((1980, 'university of rochester'), (1985, 'university of rochester')) +all_us_institutions_year : ((1980, 'university of rochester'), (1985, 'university of rochester'), (2008, 'university of rochester')) + +18/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : eric +lastname : stryland +middlename : w van +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of central florida'),) +all_us_institutions_year : ((2006, 'university of central florida'),) + +firstname : e +lastname : stryland +middlename : w van +year_range : (1979, 1991) +main_us_institutions_year : ((1979, 'university of north texas'), (1981, 'university of north texas'), (1982, 'university of north texas'), (1983, 'university of north texas'), (1984, 'university of north texas'), (1985, 'university of north texas'), (1987, 'university of north texas')) +all_us_institutions_year : ((1979, 'university of north texas'), (1980, 'university of north texas'), (1981, 'university of north texas'), (1982, 'university of north texas'), (1983, 'university of north texas'), (1984, 'university of north texas'), (1985, 'university of north texas'), (1986, 'university of north texas'), (1987, 'university of north texas')) + +18/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : humanic +middlename : j +year_range : (1999,) +main_us_institutions_year : ((1999, 'ohio state university'),) +all_us_institutions_year : ((1999, 'ohio state university'),) + +firstname : t +lastname : humanic +middlename : j +year_range : (1988, 1994) +main_us_institutions_year : ((1991, 'university of california berkeley'),) +all_us_institutions_year : ((1991, 'university of california berkeley'),) + +18/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : strovink +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'university of california berkeley'),) +all_us_institutions_year : ((2000, 'university of california berkeley'),) + +firstname : mark +lastname : strovink +middlename : None +year_range : (1980, 2008) +main_us_institutions_year : ((1980, 'university of california'),) +all_us_institutions_year : ((1980, 'university of california'), (1999, 'university of california')) + +18/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : harvey +lastname : shepard +middlename : k +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of new hampshire main campus'),) +all_us_institutions_year : ((1992, 'university of new hampshire main campus'),) + +firstname : harvey +lastname : shepard +middlename : None +year_range : (1992, 1994) +main_us_institutions_year : ((1992, 'santa fe institute'), (1994, 'los alamos national laboratory'), (1994, 'university of new hampshire')) +all_us_institutions_year : ((1992, 'santa fe institute'), (1994, 'los alamos national laboratory'), (1994, 'university of new hampshire')) + +19/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 393.1971865653992 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_political science_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_political science_christoph_degree0_advisors_9015.log index 1cd5a15..591c4eb 100644 --- a/src/dataprep/temp/trainlink_mag_proquest_political science_christoph_degree0_advisors_9015.log +++ b/src/dataprep/temp/trainlink_mag_proquest_political science_christoph_degree0_advisors_9015.log @@ -5,7 +5,7 @@ Testing is False I set the write connection to the main database. id_field is [17744445] and will be passed to sql queries. finished setup ... -Time elapsed: 0.0004648168881734212 minutes +Time elapsed: 0.0008443792661031088 minutes SELECT relationship_id @@ -136,1147 +136,5 @@ Time elapsed: 0.0004648168881734212 minutes WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL -Time elapsed: 33.53752032518387 minutes - -Starting active labeling... -firstname : james -lastname : tracy -middlename : d -year_range : (2000,) -main_us_institutions_year : ((2000, 'university of minnesota'),) -all_us_institutions_year : ((2000, 'university of minnesota'),) - -firstname : james -lastname : prosser -middlename : i -year_range : (1977, 2021) -main_us_institutions_year : None -all_us_institutions_year : ((2000, 'university of minnesota'),) - -0/10 positive, 0/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : nancy -lastname : grant -middlename : k -year_range : (1999,) -main_us_institutions_year : ((1999, 'university of akron'),) -all_us_institutions_year : ((1999, 'university of akron'),) - -firstname : nancy -lastname : ryanwenger -middlename : a -year_range : (1996, 2021) -main_us_institutions_year : ((1996, 'ohio state university'), (1997, 'ohio state university'), (1998, 'ohio state university'), (1999, 'ohio state university'), (2000, 'ohio state university'), (2001, 'ohio state university'), (2002, 'ohio state university'), (2003, 'ohio state university'), (2004, 'university of akron'), (2004, 'ohio state university'), (2005, 'ohio state university'), (2006, 'ohio state university'), (2007, 'ohio state university'), (2009, 'nationwide children s hospital'), (2010, 'nationwide children s hospital'), (2012, 'nationwide children s hospital'), (2013, 'nationwide children s hospital'), (2015, 'nationwide children s hospital'), (2016, 'nationwide children s hospital'), (2017, 'nationwide children s hospital'), (2020, 'ohio state university'), (2021, 'ohio state university')) -all_us_institutions_year : ((1996, 'ohio state university'), (1997, 'ohio state university'), (1998, 'ohio state university'), (1999, 'ohio state university'), (2000, 'ohio state university'), (2001, 'ohio state university'), (2002, 'ohio state university'), (2003, 'ohio state university'), (2004, 'ohio state university'), (2004, 'university of akron'), (2005, 'ohio state university'), (2006, 'ohio state university'), (2007, 'ohio state university'), (2009, 'nationwide children s hospital'), (2010, 'nationwide children s hospital'), (2012, 'nationwide children s hospital'), (2013, 'nationwide children s hospital'), (2015, 'nationwide children s hospital'), (2015, 'ohio state university'), (2016, 'nationwide children s hospital'), (2016, 'ohio state university'), (2017, 'nationwide children s hospital'), (2017, 'ohio state university'), (2017, 'university of nevada las vegas'), (2020, 'ohio state university'), (2021, 'ohio state university')) - -0/10 positive, 1/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : valerie -lastname : martinezebers -middlename : None -year_range : (2012,) -main_us_institutions_year : ((2012, 'university of north texas'),) -all_us_institutions_year : ((2012, 'university of north texas'),) - -firstname : valerie -lastname : martinez -middlename : None -year_range : (1994, 2001) -main_us_institutions_year : ((2001, 'texas christian university'),) -all_us_institutions_year : ((2001, 'texas christian university'),) - -0/10 positive, 2/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : h -lastname : turnball -middlename : rutherford -year_range : (1997,) -main_us_institutions_year : ((1997, 'university of kansas'),) -all_us_institutions_year : ((1997, 'university of kansas'),) - -firstname : h -lastname : turnbull -middlename : rutherford -year_range : (1975, 2018) -main_us_institutions_year : ((1979, 'university of north carolina at chapel hill'), (1982, 'university of kansas'), (1993, 'university of kansas'), (1995, 'university of kansas'), (1996, 'university of kansas'), (1999, 'university of kansas'), (2000, 'university of kansas'), (2001, 'university of kansas'), (2002, 'university of kansas'), (2003, 'university of kansas'), (2004, 'university of kansas'), (2005, 'university of kansas'), (2006, 'university of kansas'), (2007, 'university of kansas'), (2009, 'university of kansas'), (2010, 'university of kansas'), (2011, 'university of kansas'), (2012, 'university of kansas'), (2013, 'university of kansas'), (2014, 'university of kansas'), (2015, 'university of kansas'), (2016, 'university of kansas'), (2018, 'university of kansas'), (2018, 'university of north carolina at chapel hill')) -all_us_institutions_year : ((1979, 'university of north carolina at chapel hill'), (1982, 'university of kansas'), (1986, 'university of kansas'), (1993, 'university of kansas'), (1995, 'university of kansas'), (1996, 'university of kansas'), (1999, 'university of kansas'), (2000, 'university of kansas'), (2001, 'university of kansas'), (2002, 'university of kansas'), (2003, 'university of kansas'), (2004, 'university of kansas'), (2005, 'university of kansas'), (2006, 'university of kansas'), (2007, 'arthritis foundation'), (2007, 'university of kansas'), (2009, 'university of kansas'), (2010, 'university of kansas'), (2011, 'university of kansas'), (2012, 'university of kansas'), (2013, 'university of kansas'), (2014, 'university of kansas'), (2015, 'university of kansas'), (2016, 'university of kansas'), (2018, 'university of kansas'), (2018, 'university of north carolina at chapel hill')) - -0/10 positive, 3/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : judith -lastname : mclauglin -middlename : block -year_range : (1996,) -main_us_institutions_year : ((1996, 'harvard university'),) -all_us_institutions_year : ((1996, 'harvard university'),) - -firstname : judith -lastname : mclaughlin -middlename : block -year_range : (1985, 2004) -main_us_institutions_year : ((1985, 'harvard university'), (2004, 'harvard university')) -all_us_institutions_year : ((1985, 'harvard university'), (2004, 'harvard university')) - -1/10 positive, 3/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : steven -lastname : balla -middlename : j -year_range : (2011,) -main_us_institutions_year : ((2011, 'george washington university'),) -all_us_institutions_year : ((2011, 'george washington university'),) - -firstname : steven -lastname : wallach -middlename : j -year_range : (1994, 1997) -main_us_institutions_year : ((1997, 'hewlett packard'),) -all_us_institutions_year : ((1997, 'hewlett packard'),) - -1/10 positive, 3/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : ana -lastname : pi -middlename : maria -year_range : (1992,) -main_us_institutions_year : ((1992, 'miami institute of psychology of the caribbean center for advanced studies'),) -all_us_institutions_year : ((1992, 'miami institute of psychology of the caribbean center for advanced studies'),) - -firstname : ana -lastname : villegas -middlename : maria -year_range : (1998, 2018) -main_us_institutions_year : ((1998, 'montclair state university'), (2002, 'montclair state university'), (2005, 'montclair state university'), (2007, 'montclair state university'), (2008, 'montclair state university'), (2010, 'montclair state university'), (2012, 'montclair state university'), (2013, 'montclair state university'), (2015, 'montclair state university'), (2018, 'montclair state university')) -all_us_institutions_year : ((1997, 'montclair state university'), (1998, 'montclair state university'), (2002, 'montclair state university'), (2005, 'montclair state university'), (2007, 'montclair state university'), (2008, 'montclair state university'), (2010, 'montclair state university'), (2012, 'montclair state university'), (2013, 'montclair state university'), (2014, 'montclair state university'), (2015, 'montclair state university'), (2016, 'montclair state university'), (2017, 'montclair state university'), (2018, 'montclair state university')) - -1/10 positive, 4/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : william -lastname : cooper -middlename : joseph -year_range : (1997,) -main_us_institutions_year : ((1997, 'louisiana state university and agricultural mechanical college'),) -all_us_institutions_year : ((1997, 'louisiana state university and agricultural mechanical college'),) - -firstname : william -lastname : cooper -middlename : d -year_range : (1980, 2005) -main_us_institutions_year : ((1982, 'university of north carolina at greensboro'), (1984, 'university of north carolina at greensboro')) -all_us_institutions_year : ((1982, 'university of north carolina at greensboro'), (1984, 'university of north carolina at greensboro')) - -1/10 positive, 5/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : susan -lastname : collins -middlename : None -year_range : (1992,) -main_us_institutions_year : ((1992, 'harvard university'),) -all_us_institutions_year : ((1992, 'harvard university'),) - -firstname : susan -lastname : collins -middlename : m -year_range : (1989, 2015) -main_us_institutions_year : ((1996, 'georgetown university'), (1999, 'brookings institution'), (2000, 'brookings institution'), (2004, 'brookings institution'), (2008, 'university of michigan'), (2015, 'university of michigan')) -all_us_institutions_year : ((1988, 'brookings institution'), (1988, 'georgetown university'), (1988, 'national bureau of economic research'), (1996, 'georgetown university'), (1997, 'brookings institution'), (1997, 'georgetown university'), (1997, 'national bureau of economic research'), (1999, 'brookings institution'), (2000, 'brookings institution'), (2004, 'brookings institution'), (2007, 'brookings institution'), (2007, 'georgetown university'), (2007, 'national bureau of economic research'), (2007, 'university of michigan'), (2008, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan')) - -1/10 positive, 6/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : shultz -middlename : h -year_range : (1993,) -main_us_institutions_year : ((1993, 'fletcher school of law and diplomacy tufts university'),) -all_us_institutions_year : ((1993, 'fletcher school of law and diplomacy tufts university'),) - -firstname : richard -lastname : shultz -middlename : h -year_range : (1978, 2017) -main_us_institutions_year : ((1985, 'tufts university'), (1987, 'tufts university'), (1989, 'tufts university'), (1993, 'tufts university'), (2017, 'tufts university')) -all_us_institutions_year : ((1985, 'tufts university'), (1987, 'tufts university'), (1989, 'tufts university'), (1993, 'tufts university'), (2017, 'tufts university')) - -1/10 positive, 7/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : mary -lastname : berry -middlename : frances -year_range : (2004,) -main_us_institutions_year : ((2004, 'university of pennsylvania'),) -all_us_institutions_year : ((2004, 'university of pennsylvania'),) - -firstname : mary -lastname : berry -middlename : frances -year_range : (1968, 2007) -main_us_institutions_year : ((1968, 'eastern michigan university'),) -all_us_institutions_year : ((1968, 'eastern michigan university'),) - -2/10 positive, 7/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : darrell -lastname : west -middlename : None -year_range : (1997,) -main_us_institutions_year : ((1997, 'brown university'),) -all_us_institutions_year : ((1997, 'brown university'),) - -firstname : darrell -lastname : west -middlename : m -year_range : (2009, 2020) -main_us_institutions_year : ((2009, 'brookings institution'), (2014, 'brookings institution'), (2020, 'brookings institution')) -all_us_institutions_year : ((2009, 'brookings institution'), (2011, 'brookings institution'), (2014, 'brookings institution'), (2020, 'brookings institution')) - -2/10 positive, 8/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : j -lastname : strachan -middlename : cherie -year_range : (2005,) -main_us_institutions_year : ((2005, 'university at albany suny'),) -all_us_institutions_year : ((2005, 'university at albany suny'),) - -firstname : j -lastname : strachan -middlename : cherie -year_range : (2006, 2021) -main_us_institutions_year : ((2006, 'central michigan university'), (2008, 'central michigan university'), (2011, 'central michigan university'), (2012, 'central michigan university'), (2013, 'central michigan university'), (2016, 'central michigan university'), (2019, 'central michigan university'), (2020, 'central michigan university'), (2021, 'central michigan university')) -all_us_institutions_year : ((2006, 'central michigan university'), (2008, 'central michigan university'), (2011, 'central michigan university'), (2012, 'central michigan university'), (2013, 'central michigan university'), (2016, 'central michigan university'), (2019, 'central michigan university'), (2020, 'central michigan university'), (2021, 'central michigan university')) - -2/10 positive, 9/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : elizabeth -lastname : huaman -middlename : sumida -year_range : (2015,) -main_us_institutions_year : ((2015, 'arizona state university'),) -all_us_institutions_year : ((2015, 'arizona state university'),) - -firstname : elizabeth -lastname : huaman -middlename : sumida -year_range : (2019, 2020) -main_us_institutions_year : ((2019, 'university of minnesota'), (2020, 'university of minnesota')) -all_us_institutions_year : ((2019, 'university of minnesota'), (2020, 'university of minnesota')) - -2/10 positive, 10/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : olson -middlename : stuart -year_range : (2004,) -main_us_institutions_year : ((2004, 'florida international university'),) -all_us_institutions_year : ((2004, 'florida international university'),) - -firstname : richard -lastname : olson -middlename : stuart -year_range : (1985, 1988) -main_us_institutions_year : ((1985, 'arizona state university'), (1988, 'arizona state university')) -all_us_institutions_year : ((1985, 'arizona state university'), (1988, 'arizona state university')) - -2/10 positive, 11/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : margaret -lastname : owen -middlename : tresch -year_range : (1996,) -main_us_institutions_year : ((1996, 'university of texas southwestern medical center at dallas'),) -all_us_institutions_year : ((1996, 'university of texas southwestern medical center at dallas'),) - -firstname : margaret -lastname : owen -middlename : tresch -year_range : (1985, 2021) -main_us_institutions_year : ((2000, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2003, 'university of texas at dallas'), (2004, 'university of texas at dallas'), (2005, 'university of texas at dallas'), (2007, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2018, 'university of texas at dallas'), (2019, 'university of texas at dallas'), (2020, 'university of texas at dallas'), (2021, 'university of texas at dallas')) -all_us_institutions_year : ((2000, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2003, 'university of texas at dallas'), (2004, 'university of texas at dallas'), (2005, 'university of texas at dallas'), (2007, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2018, 'university of texas at dallas'), (2019, 'university of texas at dallas'), (2020, 'university of texas at dallas'), (2021, 'university of texas at dallas')) - -2/10 positive, 12/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : olson -middlename : stuart -year_range : (2004,) -main_us_institutions_year : ((2004, 'florida international university'),) -all_us_institutions_year : ((2004, 'florida international university'),) - -firstname : richard -lastname : olson -middlename : stuart -year_range : (1985, 1988) -main_us_institutions_year : ((1985, 'arizona state university'), (1988, 'arizona state university')) -all_us_institutions_year : ((1985, 'arizona state university'), (1988, 'arizona state university')) - -2/10 positive, 11/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : margaret -lastname : owen -middlename : tresch -year_range : (1996,) -main_us_institutions_year : ((1996, 'university of texas southwestern medical center at dallas'),) -all_us_institutions_year : ((1996, 'university of texas southwestern medical center at dallas'),) - -firstname : margaret -lastname : owen -middlename : tresch -year_range : (1985, 2021) -main_us_institutions_year : ((2000, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2003, 'university of texas at dallas'), (2004, 'university of texas at dallas'), (2005, 'university of texas at dallas'), (2007, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2018, 'university of texas at dallas'), (2019, 'university of texas at dallas'), (2020, 'university of texas at dallas'), (2021, 'university of texas at dallas')) -all_us_institutions_year : ((2000, 'university of texas at dallas'), (2002, 'university of texas at dallas'), (2003, 'university of texas at dallas'), (2004, 'university of texas at dallas'), (2005, 'university of texas at dallas'), (2007, 'university of texas at dallas'), (2009, 'university of texas at dallas'), (2010, 'university of texas at dallas'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2018, 'university of texas at dallas'), (2019, 'university of texas at dallas'), (2020, 'university of texas at dallas'), (2021, 'university of texas at dallas')) - -2/10 positive, 12/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : r -lastname : wong -middlename : bin -year_range : (2014,) -main_us_institutions_year : ((2014, 'university of california los angeles'),) -all_us_institutions_year : ((2014, 'university of california los angeles'),) - -firstname : r -lastname : wong -middlename : bin -year_range : (2002, 2003) -main_us_institutions_year : ((2002, 'university of california irvine'), (2003, 'university of california irvine')) -all_us_institutions_year : ((2002, 'university of california irvine'), (2003, 'university of california irvine')) - -3/10 positive, 12/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : mary -lastname : joseph -middlename : vincentia -year_range : (1999,) -main_us_institutions_year : ((1999, 'catholic university of america'),) -all_us_institutions_year : ((1999, 'catholic university of america'),) - -firstname : m -lastname : joseph -middlename : vincentia -year_range : (1980, 1989) -main_us_institutions_year : ((1980, 'the catholic university of america'), (1982, 'the catholic university of america'), (1987, 'the catholic university of america'), (1988, 'the catholic university of america'), (1989, 'the catholic university of america')) -all_us_institutions_year : ((1980, 'the catholic university of america'), (1982, 'the catholic university of america'), (1987, 'the catholic university of america'), (1988, 'the catholic university of america'), (1989, 'the catholic university of america'), (1991, 'the catholic university of america')) - -3/10 positive, 13/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : h -lastname : brands -middlename : w -year_range : (1997,) -main_us_institutions_year : ((1997, 'texas a m university college station'),) -all_us_institutions_year : ((1997, 'texas a m university college station'),) - -firstname : h -lastname : brands -middlename : w -year_range : (1990, 2002) -main_us_institutions_year : ((1990, 'texas a m university'), (1992, 'texas a m university'), (1993, 'texas a m university'), (1999, 'texas a m university'), (2002, 'texas a m university')) -all_us_institutions_year : ((1990, 'texas a m university'), (1992, 'texas a m university'), (1993, 'texas a m university'), (1999, 'texas a m university'), (2002, 'texas a m university')) - -3/10 positive, 13/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : mary -lastname : joseph -middlename : vincentia -year_range : (1999,) -main_us_institutions_year : ((1999, 'catholic university of america'),) -all_us_institutions_year : ((1999, 'catholic university of america'),) - -firstname : m -lastname : joseph -middlename : vincentia -year_range : (1980, 1989) -main_us_institutions_year : ((1980, 'the catholic university of america'), (1982, 'the catholic university of america'), (1987, 'the catholic university of america'), (1988, 'the catholic university of america'), (1989, 'the catholic university of america')) -all_us_institutions_year : ((1980, 'the catholic university of america'), (1982, 'the catholic university of america'), (1987, 'the catholic university of america'), (1988, 'the catholic university of america'), (1989, 'the catholic university of america'), (1991, 'the catholic university of america')) - -3/10 positive, 13/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : h -lastname : brands -middlename : w -year_range : (1997,) -main_us_institutions_year : ((1997, 'texas a m university college station'),) -all_us_institutions_year : ((1997, 'texas a m university college station'),) - -firstname : h -lastname : brands -middlename : w -year_range : (1990, 2002) -main_us_institutions_year : ((1990, 'texas a m university'), (1992, 'texas a m university'), (1993, 'texas a m university'), (1999, 'texas a m university'), (2002, 'texas a m university')) -all_us_institutions_year : ((1990, 'texas a m university'), (1992, 'texas a m university'), (1993, 'texas a m university'), (1999, 'texas a m university'), (2002, 'texas a m university')) - -3/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : norma -lastname : moruzzi -middlename : claire -year_range : (2010,) -main_us_institutions_year : ((2010, 'university of illinois chicago'),) -all_us_institutions_year : ((2010, 'university of illinois chicago'),) - -firstname : norma -lastname : moruzzi -middlename : claire -year_range : (1994, 2016) -main_us_institutions_year : ((2006, 'university of illinois at chicago'), (2007, 'university of illinois at chicago'), (2010, 'university of illinois at chicago'), (2013, 'university of illinois at chicago')) -all_us_institutions_year : ((2006, 'university of illinois at chicago'), (2007, 'university of illinois at chicago'), (2010, 'university of illinois at chicago'), (2013, 'university of illinois at chicago')) - -4/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : wendy -lastname : blome -middlename : whiting -year_range : (2015,) -main_us_institutions_year : ((2015, 'catholic university of america'),) -all_us_institutions_year : ((2015, 'catholic university of america'),) - -firstname : wendy -lastname : blome -middlename : whiting -year_range : (1998, 2016) -main_us_institutions_year : ((2009, 'the catholic university of america'), (2010, 'the catholic university of america'), (2011, 'the catholic university of america'), (2012, 'the catholic university of america'), (2013, 'the catholic university of america'), (2014, 'the catholic university of america'), (2016, 'the catholic university of america')) -all_us_institutions_year : ((2009, 'the catholic university of america'), (2010, 'the catholic university of america'), (2011, 'the catholic university of america'), (2012, 'the catholic university of america'), (2013, 'the catholic university of america'), (2014, 'the catholic university of america'), (2016, 'the catholic university of america')) - -5/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : stephen -lastname : linder -middlename : h -year_range : (1994,) -main_us_institutions_year : ((1994, 'university of texas school of public health'),) -all_us_institutions_year : ((1994, 'university of texas school of public health'),) - -firstname : stephen -lastname : linder -middlename : h -year_range : (1981, 2021) -main_us_institutions_year : ((1981, 'tulane university'), (1982, 'tulane university'), (1984, 'tulane university'), (1984, 'university of texas at austin'), (1987, 'university of texas health science center at houston'), (1988, 'university of texas health science center at houston'), (1989, 'university of texas health science center at houston'), (1989, 'university of texas at austin'), (1990, 'university of texas at austin'), (1994, 'university of texas health science center at houston'), (1995, 'university of texas health science center at houston'), (1999, 'university of texas at austin'), (2001, 'university of texas health science center at houston'), (2002, 'university of texas health science center at houston'), (2003, 'university of texas health science center at houston'), (2003, 'university of texas at austin'), (2006, 'university of texas at austin'), (2007, 'university of texas health science center at houston'), (2008, 'university of texas at austin'), (2010, 'university of texas at austin'), (2011, 'university of texas at austin'), (2012, 'university of texas health science center at houston'), (2013, 'university of texas at austin'), (2014, 'university of texas at austin'), (2015, 'university of texas at austin'), (2016, 'university of texas health science center at houston'), (2016, 'university of texas at austin'), (2017, 'university of texas health science center at houston'), (2017, 'texas medical center'), (2018, 'university of texas health science center at houston'), (2019, 'university of texas health science center at houston'), (2019, 'texas medical center'), (2020, 'university of texas at austin'), (2021, 'university of texas at austin')) -all_us_institutions_year : ((1981, 'tulane university'), (1982, 'tulane university'), (1984, 'tulane university'), (1984, 'university of texas at austin'), (1987, 'tulane university'), (1987, 'university of texas health science center at houston'), (1988, 'university of texas health science center at houston'), (1989, 'university of texas at austin'), (1989, 'university of texas health science center at houston'), (1990, 'university of texas at austin'), (1991, 'university of texas at austin'), (1994, 'university of texas health science center at houston'), (1995, 'university of texas health science center at houston'), (1999, 'university of texas at austin'), (2001, 'university of texas health science center at houston'), (2002, 'university of texas health science center at houston'), (2003, 'university of texas at austin'), (2003, 'university of texas health science center at houston'), (2006, 'university of texas at austin'), (2007, 'university of texas health science center at houston'), (2008, 'university of texas at austin'), (2010, 'university of texas at austin'), (2011, 'university of texas at austin'), (2012, 'university of texas health science center at houston'), (2013, 'university of texas at austin'), (2014, 'university of texas at austin'), (2014, 'university of texas health science center at houston'), (2015, 'university of texas at austin'), (2015, 'university of texas health science center at houston'), (2016, 'university of texas at austin'), (2016, 'university of texas health science center at houston'), (2017, 'texas medical center'), (2017, 'university of texas health science center at houston'), (2018, 'texas medical center'), (2018, 'university of texas health science center at houston'), (2019, 'texas medical center'), (2019, 'university of texas health science center at houston'), (2020, 'texas medical center'), (2020, 'university of texas at austin'), (2021, 'university of texas at austin')) - -6/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : carol -lastname : thompson -middlename : b -year_range : (2009,) -main_us_institutions_year : ((2009, 'northern arizona university'),) -all_us_institutions_year : ((2009, 'northern arizona university'),) - -firstname : carol -lastname : thompson -middlename : b -year_range : (1982, 1994) -main_us_institutions_year : ((1982, 'university of southern california'), (1988, 'university of southern california'), (1994, 'university of southern california')) -all_us_institutions_year : ((1982, 'university of southern california'), (1988, 'university of southern california'), (1994, 'university of southern california')) - -7/10 positive, 14/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : nicole -lastname : piquro -middlename : leeper -year_range : (2007,) -main_us_institutions_year : ((2007, 'university of florida'),) -all_us_institutions_year : ((2007, 'university of florida'),) - -firstname : nicole -lastname : piquero -middlename : leeper -year_range : (1998, 2021) -main_us_institutions_year : ((1998, 'university of maryland college park'), (2000, 'northeastern university'), (2002, 'university of florida'), (2003, 'university of florida'), (2004, 'university of florida'), (2005, 'university of florida'), (2006, 'university of florida'), (2007, 'john jay college of criminal justice'), (2008, 'virginia commonwealth university'), (2009, 'virginia commonwealth university'), (2010, 'florida state university'), (2011, 'florida state university'), (2012, 'university of texas at dallas'), (2013, 'university of texas at dallas'), (2014, 'university of texas at dallas'), (2015, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2017, 'university of texas at dallas'), (2018, 'university of texas at dallas'), (2019, 'university of texas at dallas'), (2020, 'university of texas at dallas'), (2021, 'university of texas at dallas')) -all_us_institutions_year : ((1998, 'university of maryland college park'), (2000, 'northeastern university'), (2002, 'university of florida'), (2003, 'university of florida'), (2004, 'university of florida'), (2005, 'university of florida'), (2006, 'university of florida'), (2007, 'john jay college of criminal justice'), (2007, 'university of florida'), (2008, 'john jay college of criminal justice'), (2008, 'virginia commonwealth university'), (2009, 'virginia commonwealth university'), (2010, 'florida state university'), (2010, 'virginia commonwealth university'), (2011, 'florida state university'), (2011, 'university of texas at austin'), (2011, 'university of texas at dallas'), (2012, 'university of texas at dallas'), (2013, 'university of texas at dallas'), (2014, 'university of texas at austin'), (2014, 'university of texas at dallas'), (2015, 'university of texas at dallas'), (2016, 'university of texas at dallas'), (2017, 'university of texas at dallas'), (2018, 'university of texas at dallas'), (2019, 'university of texas at dallas'), (2020, 'university of texas at dallas'), (2021, 'university of texas at dallas')) - -7/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : marc -lastname : holzer -middlename : None -year_range : (2001,) -main_us_institutions_year : ((2001, 'rutgers university'),) -all_us_institutions_year : ((2001, 'rutgers university'),) - -firstname : marc -lastname : holzer -middlename : None -year_range : (1974, 2019) -main_us_institutions_year : ((1987, 'new york college of health professions'), (1993, 'rutgers university'), (1997, 'rutgers university'), (1999, 'rutgers university'), (2001, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2011, 'rutgers university'), (2012, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2017, 'rutgers university')) -all_us_institutions_year : ((1987, 'new york college of health professions'), (1993, 'rutgers university'), (1997, 'rutgers university'), (1998, 'rutgers university'), (1999, 'rutgers university'), (2001, 'rutgers university'), (2003, 'rutgers university'), (2004, 'rutgers university'), (2005, 'rutgers university'), (2006, 'rutgers university'), (2007, 'rutgers university'), (2008, 'rutgers university'), (2009, 'rutgers university'), (2010, 'rutgers university'), (2011, 'rutgers university'), (2012, 'rutgers university'), (2014, 'rutgers university'), (2015, 'rutgers university'), (2016, 'rutgers university'), (2017, 'rutgers university')) - -8/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : sharon -lastname : tennyson -middlename : None -year_range : (1998,) -main_us_institutions_year : ((1998, 'university of pennsylvania'),) -all_us_institutions_year : ((1998, 'university of pennsylvania'),) - -firstname : sharon -lastname : tennyson -middlename : None -year_range : (1992, 2016) -main_us_institutions_year : ((1992, 'university of pennsylvania'), (1996, 'university of pennsylvania'), (1997, 'university of pennsylvania'), (1998, 'university of pennsylvania'), (1999, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university')) -all_us_institutions_year : ((1992, 'university of pennsylvania'), (1995, 'cornell university'), (1996, 'university of pennsylvania'), (1997, 'university of pennsylvania'), (1998, 'cornell university'), (1998, 'university of pennsylvania'), (1999, 'cornell university'), (2000, 'cornell university'), (2001, 'cornell university'), (2002, 'cornell university'), (2003, 'cornell university'), (2007, 'cornell university'), (2008, 'cornell university'), (2009, 'cornell university'), (2010, 'cornell university'), (2011, 'cornell university'), (2012, 'cornell university'), (2013, 'cornell university'), (2014, 'cornell university'), (2015, 'cornell university'), (2016, 'cornell university'), (2017, 'cornell university')) - -9/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : carol -lastname : anderson -middlename : None -year_range : (2005,) -main_us_institutions_year : ((2005, 'university of missouri columbia'),) -all_us_institutions_year : ((2005, 'university of missouri columbia'),) - -firstname : carolyn -lastname : anderson -middlename : j -year_range : (1979, 2021) -main_us_institutions_year : ((1979, 'university of massachusetts amherst'), (1983, 'university of massachusetts amherst'), (1987, 'university of illinois at urbana champaign'), (1989, 'university of illinois at urbana champaign'), (1992, 'university of illinois at urbana champaign'), (1993, 'washington university in st louis'), (1994, 'washington university in st louis'), (1995, 'washington university in st louis'), (1996, 'washington university in st louis'), (1997, 'washington university in st louis'), (1998, 'washington university in st louis'), (1999, 'washington university in st louis'), (2000, 'washington university in st louis'), (2001, 'washington university in st louis'), (2002, 'washington university in st louis'), (2003, 'washington university in st louis'), (2004, 'washington university in st louis'), (2005, 'washington university in st louis'), (2006, 'washington university in st louis'), (2007, 'washington university in st louis'), (2008, 'washington university in st louis'), (2009, 'washington university in st louis'), (2010, 'washington university in st louis'), (2011, 'washington university in st louis'), (2012, 'university of illinois at urbana champaign'), (2012, 'university of pittsburgh'), (2013, 'university of pittsburgh'), (2014, 'university of pittsburgh'), (2015, 'university of pittsburgh'), (2016, 'university of pittsburgh'), (2017, 'university of pittsburgh'), (2018, 'university of pittsburgh'), (2019, 'university of pittsburgh'), (2020, 'university of pittsburgh'), (2021, 'university of missouri'), (2021, 'university of pittsburgh')) -all_us_institutions_year : ((1979, 'university of massachusetts amherst'), (1983, 'university of massachusetts amherst'), (1987, 'university of illinois at urbana champaign'), (1989, 'university of illinois at urbana champaign'), (1992, 'university of illinois at urbana champaign'), (1992, 'washington university in st louis'), (1993, 'washington university in st louis'), (1994, 'washington university in st louis'), (1995, 'washington university in st louis'), (1996, 'university of illinois at urbana champaign'), (1996, 'washington university in st louis'), (1997, 'washington university in st louis'), (1998, 'university of illinois at urbana champaign'), (1998, 'washington university in st louis'), (1999, 'university of illinois at urbana champaign'), (1999, 'washington university in st louis'), (2000, 'university of illinois at urbana champaign'), (2000, 'washington university in st louis'), (2001, 'washington university in st louis'), (2002, 'washington university in st louis'), (2003, 'washington university in st louis'), (2004, 'washington university in st louis'), (2005, 'university of illinois at urbana champaign'), (2005, 'washington university in st louis'), (2006, 'washington university in st louis'), (2007, 'university of illinois at urbana champaign'), (2007, 'washington university in st louis'), (2008, 'university of illinois at urbana champaign'), (2008, 'washington university in st louis'), (2009, 'university of illinois at urbana champaign'), (2009, 'washington university in st louis'), (2010, 'university of illinois at urbana champaign'), (2010, 'washington university in st louis'), (2011, 'university of illinois at urbana champaign'), (2011, 'university of massachusetts amherst'), (2011, 'washington university in st louis'), (2012, 'university of illinois at urbana champaign'), (2012, 'university of pittsburgh'), (2012, 'washington university in st louis'), (2013, 'university of illinois at urbana champaign'), (2013, 'university of pittsburgh'), (2013, 'washington university in st louis'), (2014, 'swarthmore college'), (2014, 'university of illinois at urbana champaign'), (2014, 'university of pittsburgh'), (2015, 'university of illinois at urbana champaign'), (2015, 'university of pittsburgh'), (2016, 'university of illinois at urbana champaign'), (2016, 'university of massachusetts amherst'), (2016, 'university of pittsburgh'), (2017, 'university of illinois at urbana champaign'), (2017, 'university of pittsburgh'), (2018, 'university of illinois at urbana champaign'), (2018, 'university of pittsburgh'), (2019, 'university of illinois at urbana champaign'), (2019, 'university of massachusetts amherst'), (2019, 'university of pittsburgh'), (2020, 'pacific northwest national laboratory'), (2020, 'university of illinois at urbana champaign'), (2020, 'university of massachusetts amherst'), (2020, 'university of missouri'), (2020, 'university of pittsburgh'), (2021, 'university of missouri'), (2021, 'university of pittsburgh')) - -10/10 positive, 15/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : robert -lastname : pursley -middlename : None -year_range : (1997,) -main_us_institutions_year : ((1997, 'university of tennessee'),) -all_us_institutions_year : ((1997, 'university of tennessee'),) - -firstname : robert -lastname : partee -middlename : p -year_range : (2016, 2019) -main_us_institutions_year : ((2016, 'university of tennessee'), (2017, 'national oceanic and atmospheric administration'), (2018, 'national oceanic and atmospheric administration')) -all_us_institutions_year : ((2016, 'university of tennessee'), (2017, 'national oceanic and atmospheric administration'), (2018, 'national oceanic and atmospheric administration')) - -10/10 positive, 16/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : james -lastname : ceaser -middlename : None -year_range : (2003,) -main_us_institutions_year : ((2003, 'university of virginia main campus'),) -all_us_institutions_year : ((2003, 'university of virginia main campus'),) - -firstname : james -lastname : ceaser -middlename : w -year_range : (1985, 2012) -main_us_institutions_year : ((1985, 'university of virginia'), (1996, 'university of virginia'), (1999, 'university of virginia'), (2003, 'university of virginia'), (2005, 'university of virginia'), (2012, 'university of virginia')) -all_us_institutions_year : ((1985, 'university of virginia'), (1996, 'university of virginia'), (1999, 'university of virginia'), (2003, 'university of virginia'), (2005, 'university of virginia'), (2012, 'university of virginia')) - -10/10 positive, 17/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : donald -lastname : rothchild -middlename : None -year_range : (2000,) -main_us_institutions_year : ((2000, 'university of california davis'),) -all_us_institutions_year : ((2000, 'university of california davis'),) - -firstname : donald -lastname : rothchild -middlename : None -year_range : (1996, 1998) -main_us_institutions_year : ((1996, 'university of california san diego'),) -all_us_institutions_year : ((1996, 'university of california san diego'),) - -11/10 positive, 17/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : craig -lastname : emmert -middlename : None -year_range : (1997,) -main_us_institutions_year : ((1997, 'texas tech university'),) -all_us_institutions_year : ((1997, 'texas tech university'),) - -firstname : craig -lastname : emmert -middlename : f -year_range : (2013, 2014) -main_us_institutions_year : ((2013, 'university of texas of the permian basin'), (2014, 'university of texas of the permian basin')) -all_us_institutions_year : ((2013, 'university of texas of the permian basin'), (2014, 'university of texas of the permian basin')) - -11/10 positive, 17/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : andrew -lastname : hahn -middlename : None -year_range : (2011,) -main_us_institutions_year : ((2011, 'brandeis university the heller school for social policy and management'),) -all_us_institutions_year : ((2011, 'brandeis university the heller school for social policy and management'),) - -firstname : andrew -lastname : hahn -middlename : b -year_range : (1979, 2005) -main_us_institutions_year : ((1979, 'brandeis university'), (1980, 'brandeis university'), (1994, 'brandeis university'), (2005, 'brandeis university')) -all_us_institutions_year : ((1979, 'brandeis university'), (1980, 'brandeis university'), (1994, 'brandeis university'), (2005, 'brandeis university')) - -11/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : chalmers -lastname : johnson -middlename : None -year_range : (1990,) -main_us_institutions_year : ((1990, 'university of california berkeley'),) -all_us_institutions_year : ((1990, 'university of california berkeley'),) - -firstname : chalmers -lastname : johnson -middlename : None -year_range : (1961, 2010) -main_us_institutions_year : ((1962, 'university of california'), (1965, 'university of california'), (1969, 'university of california'), (1977, 'university of california')) -all_us_institutions_year : ((1962, 'university of california'), (1965, 'university of california'), (1969, 'university of california'), (1977, 'university of california')) - -11/10 positive, 18/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : john -lastname : conbere -middlename : None -year_range : (2010,) -main_us_institutions_year : ((2010, 'university of st thomas minnesota'),) -all_us_institutions_year : ((2010, 'university of st thomas minnesota'),) - -firstname : john -lastname : conbere -middlename : None -year_range : (2006, 2018) -main_us_institutions_year : ((2006, 'university of st thomas'), (2007, 'university of st thomas'), (2014, 'university of st thomas')) -all_us_institutions_year : ((2006, 'university of st thomas'), (2007, 'university of st thomas'), (2014, 'university of st thomas')) - -11/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : krisna -lastname : suryanata -middlename : None -year_range : (2009,) -main_us_institutions_year : ((2009, 'university of hawaii at manoa'),) -all_us_institutions_year : ((2009, 'university of hawaii at manoa'),) - -firstname : krisnawati -lastname : suryanata -middlename : None -year_range : (1979, 2021) -main_us_institutions_year : ((1994, 'university of california berkeley'), (2006, 'university of hawaii at manoa'), (2009, 'university of hawaii at manoa'), (2018, 'university of hawaii')) -all_us_institutions_year : ((1994, 'university of california berkeley'), (2000, 'university of hawaii at manoa'), (2006, 'university of hawaii at manoa'), (2009, 'university of hawaii at manoa'), (2018, 'university of hawaii')) - -12/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : eileen -lastname : boris -middlename : None -year_range : (2010,) -main_us_institutions_year : ((2010, 'university of california santa barbara'),) -all_us_institutions_year : ((2010, 'university of california santa barbara'),) - -firstname : eileen -lastname : boris -middlename : None -year_range : (2003, 2015) -main_us_institutions_year : ((2003, 'university of california'), (2009, 'university of california'), (2015, 'university of california')) -all_us_institutions_year : ((2003, 'university of california'), (2009, 'university of california'), (2015, 'university of california')) - -13/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jeannie -lastname : oakes -middlename : None -year_range : (1997,) -main_us_institutions_year : ((1997, 'university of california los angeles'),) -all_us_institutions_year : ((1997, 'university of california los angeles'),) - -firstname : jeannie -lastname : oakes -middlename : None -year_range : (1990, 2004) -main_us_institutions_year : ((2004, 'university of california berkeley'),) -all_us_institutions_year : ((2004, 'university of california berkeley'),) - -14/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : hillestad -middlename : j -year_range : (2000,) -main_us_institutions_year : ((2000, 'rand graduate school'),) -all_us_institutions_year : ((2000, 'rand graduate school'),) - -firstname : richard -lastname : hillestad -middlename : None -year_range : (1980, 2020) -main_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) -all_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) - -14/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jeannie -lastname : oakes -middlename : None -year_range : (1997,) -main_us_institutions_year : ((1997, 'university of california los angeles'),) -all_us_institutions_year : ((1997, 'university of california los angeles'),) - -firstname : jeannie -lastname : oakes -middlename : None -year_range : (1990, 2004) -main_us_institutions_year : ((2004, 'university of california berkeley'),) -all_us_institutions_year : ((2004, 'university of california berkeley'),) - -14/10 positive, 19/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : richard -lastname : hillestad -middlename : j -year_range : (2000,) -main_us_institutions_year : ((2000, 'rand graduate school'),) -all_us_institutions_year : ((2000, 'rand graduate school'),) - -firstname : richard -lastname : hillestad -middlename : None -year_range : (1980, 2020) -main_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) -all_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) - -14/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : sandra -lastname : barrueco -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'catholic university of america'),) -all_us_institutions_year : ((2015, 'catholic university of america'),) - -firstname : sandra -lastname : barrueco -middlename : None -year_range : (2005, 2021) -main_us_institutions_year : ((2005, 'the catholic university of america'), (2007, 'the catholic university of america'), (2008, 'the catholic university of america'), (2011, 'the catholic university of america'), (2015, 'the catholic university of america'), (2017, 'the catholic university of america'), (2021, 'the catholic university of america')) -all_us_institutions_year : ((2005, 'the catholic university of america'), (2007, 'the catholic university of america'), (2008, 'the catholic university of america'), (2011, 'the catholic university of america'), (2013, 'the catholic university of america'), (2015, 'the catholic university of america'), (2017, 'the catholic university of america'), (2019, 'the catholic university of america'), (2021, 'the catholic university of america')) - -15/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : hillestad -middlename : j -year_range : (2000,) -main_us_institutions_year : ((2000, 'rand graduate school'),) -all_us_institutions_year : ((2000, 'rand graduate school'),) - -firstname : richard -lastname : hillestad -middlename : None -year_range : (1980, 2020) -main_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) -all_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) - -14/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : sandra -lastname : barrueco -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'catholic university of america'),) -all_us_institutions_year : ((2015, 'catholic university of america'),) - -firstname : sandra -lastname : barrueco -middlename : None -year_range : (2005, 2021) -main_us_institutions_year : ((2005, 'the catholic university of america'), (2007, 'the catholic university of america'), (2008, 'the catholic university of america'), (2011, 'the catholic university of america'), (2015, 'the catholic university of america'), (2017, 'the catholic university of america'), (2021, 'the catholic university of america')) -all_us_institutions_year : ((2005, 'the catholic university of america'), (2007, 'the catholic university of america'), (2008, 'the catholic university of america'), (2011, 'the catholic university of america'), (2013, 'the catholic university of america'), (2015, 'the catholic university of america'), (2017, 'the catholic university of america'), (2019, 'the catholic university of america'), (2021, 'the catholic university of america')) - -14/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : richard -lastname : hillestad -middlename : j -year_range : (2000,) -main_us_institutions_year : ((2000, 'rand graduate school'),) -all_us_institutions_year : ((2000, 'rand graduate school'),) - -firstname : richard -lastname : hillestad -middlename : None -year_range : (1980, 2020) -main_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) -all_us_institutions_year : ((1980, 'rand corporation'), (1995, 'rand corporation'), (2005, 'rand corporation'), (2009, 'rand corporation'), (2018, 'rand corporation'), (2019, 'rand corporation'), (2020, 'rand corporation')) - -14/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : sandra -lastname : barrueco -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'catholic university of america'),) -all_us_institutions_year : ((2015, 'catholic university of america'),) - -firstname : sandra -lastname : barrueco -middlename : None -year_range : (2005, 2021) -main_us_institutions_year : ((2005, 'the catholic university of america'), (2007, 'the catholic university of america'), (2008, 'the catholic university of america'), (2011, 'the catholic university of america'), (2015, 'the catholic university of america'), (2017, 'the catholic university of america'), (2021, 'the catholic university of america')) -all_us_institutions_year : ((2005, 'the catholic university of america'), (2007, 'the catholic university of america'), (2008, 'the catholic university of america'), (2011, 'the catholic university of america'), (2013, 'the catholic university of america'), (2015, 'the catholic university of america'), (2017, 'the catholic university of america'), (2019, 'the catholic university of america'), (2021, 'the catholic university of america')) - -15/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : philip -lastname : mcmichael -middlename : None -year_range : (2009,) -main_us_institutions_year : ((2009, 'cornell university'),) -all_us_institutions_year : ((2009, 'cornell university'),) - -firstname : phillip -lastname : mcmichael -middlename : None -year_range : (1997, 2000) -main_us_institutions_year : ((1997, 'cornell university'), (2000, 'cornell university')) -all_us_institutions_year : ((1997, 'cornell university'), (2000, 'cornell university')) - -16/10 positive, 20/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : ray -lastname : catalano -middlename : a -year_range : (2001,) -main_us_institutions_year : ((2001, 'university of california berkeley'),) -all_us_institutions_year : ((2001, 'university of california berkeley'),) - -firstname : ralph -lastname : catalano -middlename : None -year_range : (1975, 2021) -main_us_institutions_year : ((1975, 'university of california irvine'), (1976, 'university of california irvine'), (1977, 'national center for public policy research'), (1977, 'university of california irvine'), (1979, 'national center for public policy research'), (1979, 'university of california irvine'), (1981, 'university of california irvine'), (1983, 'national center for public policy research'), (1983, 'university of california irvine'), (1984, 'university of california irvine'), (1986, 'university of california irvine'), (1987, 'university of california irvine'), (1988, 'university of california irvine'), (1989, 'university of california irvine'), (1991, 'university of california berkeley'), (1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1994, 'university of california berkeley'), (1996, 'university of california berkeley'), (1997, 'university of california berkeley'), (1998, 'university of california berkeley'), (1999, 'university of california berkeley'), (2000, 'university of california berkeley'), (2001, 'university of california berkeley'), (2002, 'university of california berkeley'), (2003, 'university of california berkeley'), (2004, 'university of california berkeley'), (2005, 'university of california berkeley'), (2006, 'university of california berkeley'), (2007, 'university of california berkeley'), (2008, 'university of california berkeley'), (2009, 'university of california berkeley'), (2010, 'university of california berkeley'), (2011, 'university of california berkeley'), (2012, 'university of california berkeley'), (2013, 'university of california berkeley'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2016, 'university of california berkeley'), (2017, 'university of california berkeley'), (2018, 'university of california berkeley'), (2019, 'university of california berkeley'), (2020, 'university of california berkeley'), (2021, 'university of california berkeley')) -all_us_institutions_year : ((1975, 'university of california irvine'), (1976, 'university of california irvine'), (1977, 'national center for public policy research'), (1977, 'university of california irvine'), (1979, 'national center for public policy research'), (1979, 'university of california irvine'), (1981, 'university of california irvine'), (1982, 'university of california irvine'), (1983, 'national center for public policy research'), (1983, 'university of california irvine'), (1984, 'university of california irvine'), (1986, 'university of california irvine'), (1987, 'university of california irvine'), (1988, 'university of california irvine'), (1989, 'university of california irvine'), (1991, 'university of california berkeley'), (1992, 'university of california berkeley'), (1993, 'university of california berkeley'), (1994, 'university of california berkeley'), (1996, 'university of california berkeley'), (1997, 'university of california berkeley'), (1998, 'university of california berkeley'), (1999, 'university of california berkeley'), (2000, 'university of california berkeley'), (2001, 'university of california berkeley'), (2002, 'university of california berkeley'), (2003, 'university of california berkeley'), (2004, 'university of california berkeley'), (2005, 'university of california berkeley'), (2006, 'university of california berkeley'), (2007, 'university of california berkeley'), (2008, 'university of california berkeley'), (2009, 'university of california berkeley'), (2010, 'university of california berkeley'), (2011, 'university of california berkeley'), (2012, 'university of california berkeley'), (2013, 'university of california berkeley'), (2014, 'university of california berkeley'), (2015, 'university of california berkeley'), (2016, 'university of california berkeley'), (2017, 'university of california berkeley'), (2018, 'university of california berkeley'), (2019, 'university of california berkeley'), (2020, 'university of california berkeley'), (2021, 'university of california berkeley')) - -16/10 positive, 21/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : cindy -lastname : akers -middlename : None -year_range : (2004,) -main_us_institutions_year : ((2004, 'texas a m university college station'),) -all_us_institutions_year : ((2004, 'texas a m university college station'),) - -firstname : cindy -lastname : akers -middlename : None -year_range : (2003, 2020) -main_us_institutions_year : ((2008, 'texas tech university'), (2017, 'texas tech university')) -all_us_institutions_year : ((2008, 'texas tech university'), (2017, 'texas tech university')) - -16/10 positive, 21/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : nicolas -lastname : walle -middlename : van de -year_range : (2003,) -main_us_institutions_year : ((2003, 'michigan state university'),) -all_us_institutions_year : ((2003, 'michigan state university'),) - -firstname : nicholas -lastname : walle -middlename : van de -year_range : (1997, 1999) -main_us_institutions_year : ((1997, 'michigan state university'),) -all_us_institutions_year : ((1997, 'michigan state university'),) - -16/10 positive, 21/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : bruce -lastname : jones -middlename : anthony -year_range : (2002,) -main_us_institutions_year : ((2002, 'university of missouri columbia'),) -all_us_institutions_year : ((2002, 'university of missouri columbia'),) - -firstname : bruce -lastname : jones -middlename : anthony -year_range : (2005, 2008) -main_us_institutions_year : ((2007, 'university of south florida'), (2008, 'university of south florida')) -all_us_institutions_year : ((2007, 'university of south florida'), (2008, 'university of south florida')) - -17/10 positive, 21/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : nicolas -lastname : walle -middlename : van de -year_range : (2003,) -main_us_institutions_year : ((2003, 'michigan state university'),) -all_us_institutions_year : ((2003, 'michigan state university'),) - -firstname : nicholas -lastname : walle -middlename : van de -year_range : (1997, 1999) -main_us_institutions_year : ((1997, 'michigan state university'),) -all_us_institutions_year : ((1997, 'michigan state university'),) - -16/10 positive, 21/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished -firstname : bruce -lastname : jones -middlename : anthony -year_range : (2002,) -main_us_institutions_year : ((2002, 'university of missouri columbia'),) -all_us_institutions_year : ((2002, 'university of missouri columbia'),) - -firstname : bruce -lastname : jones -middlename : anthony -year_range : (2005, 2008) -main_us_institutions_year : ((2007, 'university of south florida'), (2008, 'university of south florida')) -all_us_institutions_year : ((2007, 'university of south florida'), (2008, 'university of south florida')) - -16/10 positive, 22/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : tim -lastname : mccartney -middlename : None -year_range : (2002,) -main_us_institutions_year : ((2002, 'nova southeastern university'),) -all_us_institutions_year : ((2002, 'nova southeastern university'),) - -firstname : t -lastname : mccartney -middlename : o -year_range : (2006, 2009) -main_us_institutions_year : ((2009, 'nova southeastern university'),) -all_us_institutions_year : ((2009, 'nova southeastern university'),) - -16/10 positive, 23/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : victoria -lastname : hattam -middlename : None -year_range : (2003,) -main_us_institutions_year : ((2003, 'new school university'),) -all_us_institutions_year : ((2003, 'new school university'),) - -firstname : victoria -lastname : hattam -middlename : None -year_range : (1992, 2018) -main_us_institutions_year : ((1994, 'the new school'), (2001, 'the new school'), (2004, 'the new school'), (2005, 'the new school'), (2016, 'the new school'), (2018, 'the new school')) -all_us_institutions_year : ((1994, 'the new school'), (2001, 'the new school'), (2004, 'the new school'), (2005, 'the new school'), (2011, 'the new school'), (2014, 'the new school'), (2016, 'the new school'), (2018, 'the new school')) - -16/10 positive, 23/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : jose -lastname : casanova -middlename : None -year_range : (2003,) -main_us_institutions_year : ((2003, 'new school university'),) -all_us_institutions_year : ((2003, 'new school university'),) - -firstname : jose -lastname : casanova -middlename : None -year_range : (1982, 2019) -main_us_institutions_year : ((1995, 'the new school'), (1996, 'the new school'), (2001, 'the new school'), (2006, 'the new school'), (2008, 'the new school'), (2008, 'georgetown university'), (2011, 'georgetown university'), (2016, 'georgetown university'), (2017, 'georgetown university'), (2018, 'georgetown university'), (2019, 'georgetown university')) -all_us_institutions_year : ((1995, 'the new school'), (1996, 'the new school'), (2001, 'the new school'), (2006, 'the new school'), (2008, 'georgetown university'), (2008, 'the new school'), (2011, 'georgetown university'), (2011, 'the new school'), (2016, 'georgetown university'), (2017, 'georgetown university'), (2018, 'georgetown university'), (2019, 'georgetown university')) - -17/10 positive, 23/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : john -lastname : brooke -middlename : None -year_range : (2015,) -main_us_institutions_year : ((2015, 'ohio state university'),) -all_us_institutions_year : ((2015, 'ohio state university'),) - -firstname : john -lastname : brooke -middlename : m -year_range : (1994, 2016) -main_us_institutions_year : None -all_us_institutions_year : ((2005, 'sun microsystems laboratories'),) - -18/10 positive, 23/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : alejandro -lastname : rodriguez -middlename : None -year_range : (2012,) -main_us_institutions_year : ((2012, 'university of texas at arlington'),) -all_us_institutions_year : ((2012, 'university of texas at arlington'),) - -firstname : a -lastname : rodriguez -middlename : b -year_range : (1987, 2021) -main_us_institutions_year : ((2006, 'university of texas at san antonio'),) -all_us_institutions_year : ((2006, 'university of texas at san antonio'),) - -18/10 positive, 24/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : vivienne -lastname : shue -middlename : None -year_range : (1996,) -main_us_institutions_year : ((1996, 'cornell university'),) -all_us_institutions_year : ((1996, 'cornell university'),) - -firstname : vivienne -lastname : shue -middlename : None -year_range : (1981, 2017) -main_us_institutions_year : ((1988, 'columbia university'),) -all_us_institutions_year : ((1988, 'columbia university'),) - -18/10 positive, 25/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : david -lastname : karp -middlename : None -year_range : (2001,) -main_us_institutions_year : ((2001, 'boston college'),) -all_us_institutions_year : ((2001, 'boston college'),) - -firstname : david -lastname : karp -middlename : r -year_range : (1993, 2019) -main_us_institutions_year : ((1993, 'university of washington'), (1995, 'university of washington'), (1999, 'skidmore college'), (2001, 'skidmore college'), (2002, 'skidmore college'), (2004, 'skidmore college'), (2005, 'skidmore college'), (2007, 'skidmore college'), (2008, 'skidmore college'), (2014, 'skidmore college'), (2016, 'skidmore college'), (2019, 'skidmore college')) -all_us_institutions_year : ((1993, 'university of washington'), (1995, 'university of washington'), (1999, 'skidmore college'), (2001, 'skidmore college'), (2002, 'skidmore college'), (2004, 'skidmore college'), (2005, 'skidmore college'), (2007, 'skidmore college'), (2008, 'skidmore college'), (2014, 'skidmore college'), (2016, 'skidmore college'), (2019, 'skidmore college')) - -18/10 positive, 26/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : r -lastname : nwanko -middlename : nwafo -year_range : (1993,) -main_us_institutions_year : ((1993, 'howard university'),) -all_us_institutions_year : ((1993, 'howard university'),) - -firstname : r -lastname : nwanko -middlename : l nwafo -year_range : (1990, 2000) -main_us_institutions_year : ((1990, 'howard university'), (1991, 'howard university')) -all_us_institutions_year : ((1990, 'howard university'), (1991, 'howard university')) - -18/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : patrick -lastname : mcnamara -middlename : None -year_range : (2014,) -main_us_institutions_year : ((2014, 'northcentral university'),) -all_us_institutions_year : ((2014, 'northcentral university'),) - -firstname : patrick -lastname : mcnamara -middlename : j -year_range : (1995, 2017) -main_us_institutions_year : ((2006, 'university of minnesota'), (2014, 'university of north carolina at chapel hill'), (2014, 'university of minnesota')) -all_us_institutions_year : ((2006, 'university of minnesota'), (2014, 'university of minnesota'), (2014, 'university of north carolina at chapel hill')) - -19/10 positive, 27/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : sumner -lastname : croix -middlename : la -year_range : (1999,) -main_us_institutions_year : ((1999, 'university of hawaii at manoa'),) -all_us_institutions_year : ((1999, 'university of hawaii at manoa'),) - -firstname : sumner -lastname : croix -middlename : j la -year_range : (1983, 2021) -main_us_institutions_year : ((1984, 'university of hawaii'), (1986, 'university of hawaii'), (1989, 'university of hawaii'), (1992, 'university of hawaii'), (1993, 'university of hawaii'), (1994, 'university of hawaii'), (1995, 'university of hawaii'), (1996, 'university of hawaii'), (1997, 'university of hawaii'), (1999, 'university of hawaii'), (2002, 'university of hawaii'), (2006, 'university of hawaii'), (2007, 'university of hawaii'), (2007, 'university of hawaii at manoa'), (2008, 'university of hawaii at manoa'), (2013, 'university of hawaii'), (2014, 'university of hawaii'), (2016, 'university of hawaii'), (2018, 'university of hawaii at manoa'), (2021, 'university of hawaii')) -all_us_institutions_year : ((1984, 'university of hawaii'), (1986, 'university of hawaii'), (1988, 'university of hawaii'), (1989, 'university of hawaii'), (1991, 'university of hawaii'), (1992, 'university of hawaii'), (1993, 'university of hawaii'), (1994, 'university of hawaii'), (1995, 'university of hawaii'), (1996, 'university of hawaii'), (1997, 'university of hawaii'), (1999, 'university of hawaii'), (2002, 'university of hawaii'), (2004, 'university of hawaii'), (2005, 'university of hawaii'), (2005, 'university of hawaii at manoa'), (2006, 'university of hawaii'), (2007, 'university of hawaii'), (2007, 'university of hawaii at manoa'), (2008, 'university of hawaii at manoa'), (2009, 'university of hawaii'), (2010, 'university of hawaii'), (2013, 'university of hawaii'), (2014, 'university of hawaii'), (2015, 'university of hawaii at manoa'), (2016, 'university of hawaii'), (2018, 'university of hawaii at manoa'), (2021, 'university of hawaii')) - -19/10 positive, 28/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -firstname : michael -lastname : boretsky -middlename : None -year_range : (1991,) -main_us_institutions_year : ((1991, 'catholic university of america'),) -all_us_institutions_year : ((1991, 'catholic university of america'),) - -firstname : michael -lastname : boretsky -middlename : None -year_range : (1987, 1990) -main_us_institutions_year : ((1987, 'the catholic university of america'), (1990, 'the catholic university of america')) -all_us_institutions_year : ((1987, 'the catholic university of america'), (1990, 'the catholic university of america')) - -20/10 positive, 28/10 negative -Do these records refer to the same thing? -(y)es / (n)o / (u)nsure / (f)inished / (p)revious -Finished labeling -Done in 112.37411377827327 minutes. +reading from /mnt/ssd/DedupeFiles/advisors/settings_political_science_1990_2015_institutionTrue_fieldofstudy_catFalse_fieldofstudy_strFalse_keywordsFalsechristoph_degree0 +Done in 81.84747852881749 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_psychology_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_psychology_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..50d6b07 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_psychology_christoph_degree0_advisors_9015.log @@ -0,0 +1,806 @@ +Namespace(testing=False, verbose=1, field=['psychology'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [15744967] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008437593777974446 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 318.6123296221097 minutes + +Starting active labeling... +firstname : timothy +lastname : elliott +middlename : r +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of alabama at birmingham'),) +all_us_institutions_year : ((2006, 'university of alabama at birmingham'),) + +firstname : timothy +lastname : elliot +middlename : r +year_range : (1997, 2002) +main_us_institutions_year : ((2000, 'university of alabama'),) +all_us_institutions_year : ((2000, 'university of alabama'),) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : john +lastname : steffen +middlename : j +year_range : (1994,) +main_us_institutions_year : ((1994, 'university of cincinnati'),) +all_us_institutions_year : ((1994, 'university of cincinnati'),) + +firstname : john +lastname : steffensen +middlename : f +year_range : (1981, 2021) +main_us_institutions_year : ((1982, 'university of miami'), (1991, 'marine biological laboratory'), (1992, 'marine biological laboratory'), (1994, 'marine biological laboratory'), (1995, 'marine biological laboratory'), (1997, 'marine biological laboratory'), (1998, 'marine biological laboratory'), (1998, 'scripps health'), (1999, 'marine biological laboratory'), (2001, 'marine biological laboratory'), (2002, 'marine biological laboratory'), (2003, 'marine biological laboratory'), (2004, 'marine biological laboratory'), (2005, 'marine biological laboratory'), (2006, 'marine biological laboratory'), (2010, 'marine biological laboratory'), (2011, 'marine biological laboratory'), (2012, 'marine biological laboratory')) +all_us_institutions_year : ((1982, 'university of miami'), (1991, 'marine biological laboratory'), (1992, 'marine biological laboratory'), (1994, 'marine biological laboratory'), (1995, 'marine biological laboratory'), (1997, 'marine biological laboratory'), (1998, 'marine biological laboratory'), (1998, 'scripps health'), (1999, 'marine biological laboratory'), (2000, 'marine biological laboratory'), (2001, 'marine biological laboratory'), (2002, 'marine biological laboratory'), (2003, 'marine biological laboratory'), (2004, 'marine biological laboratory'), (2005, 'marine biological laboratory'), (2006, 'marine biological laboratory'), (2007, 'marine biological laboratory'), (2008, 'marine biological laboratory'), (2009, 'marine biological laboratory'), (2010, 'marine biological laboratory'), (2011, 'marine biological laboratory'), (2012, 'marine biological laboratory'), (2013, 'marine biological laboratory'), (2014, 'marine biological laboratory'), (2014, 'university of washington'), (2015, 'marine biological laboratory'), (2016, 'marine biological laboratory')) + +1/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : williams +middlename : l +year_range : (1996,) +main_us_institutions_year : ((1996, 'university of tennessee'),) +all_us_institutions_year : ((1996, 'university of tennessee'),) + +firstname : robert +lastname : williamson +middlename : c +year_range : (1954, 2006) +main_us_institutions_year : ((1962, 'haverford college'),) +all_us_institutions_year : ((1962, 'haverford college'),) + +1/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : ying +lastname : cheng +middlename : None +year_range : (2015,) +main_us_institutions_year : ((2015, 'university of notre dame'),) +all_us_institutions_year : ((2015, 'university of notre dame'),) + +firstname : ying +lastname : chen +middlename : None +year_range : (2002, 2021) +main_us_institutions_year : ((2008, 'columbia university'), (2010, 'columbia university'), (2015, 'columbia university'), (2017, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university'), (2021, 'columbia university')) +all_us_institutions_year : ((2008, 'columbia university'), (2010, 'columbia university'), (2015, 'columbia university'), (2017, 'columbia university'), (2019, 'columbia university'), (2020, 'columbia university'), (2021, 'columbia university')) + +1/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : anthony +lastname : stigliano +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'california school of professional psychology berkeley alameda'),) +all_us_institutions_year : ((1999, 'california school of professional psychology berkeley alameda'),) + +firstname : anthony +lastname : stigliani +middlename : None +year_range : (2011, 2014) +main_us_institutions_year : ((2011, 'university of pennsylvania'), (2014, 'university of pennsylvania')) +all_us_institutions_year : ((2011, 'university of pennsylvania'), (2014, 'university of pennsylvania')) + +1/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : diane +lastname : shallert +middlename : l +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of texas at austin'),) +all_us_institutions_year : ((2006, 'university of texas at austin'),) + +firstname : diane +lastname : schallert +middlename : l +year_range : (1976, 2021) +main_us_institutions_year : ((1978, 'university of illinois at urbana champaign'), (1981, 'university of illinois at urbana champaign'), (1987, 'university of texas at austin'), (1994, 'university of texas at austin'), (1996, 'university of texas at austin'), (1997, 'university of texas at austin'), (1999, 'university of texas at austin'), (2002, 'university of texas at austin'), (2003, 'university of texas at austin'), (2004, 'university of texas at austin'), (2005, 'university of texas at austin'), (2006, 'university of texas at austin'), (2007, 'university of texas at austin'), (2008, 'university of texas at austin'), (2009, 'university of texas at austin'), (2010, 'university of texas at austin'), (2011, 'university of texas at austin'), (2012, 'university of texas at austin'), (2013, 'university of texas at austin'), (2014, 'university of texas at austin'), (2015, 'university of texas at austin'), (2016, 'university of texas at austin'), (2017, 'university of texas at austin'), (2018, 'university of texas at austin'), (2019, 'university of texas at austin'), (2020, 'university of texas at austin'), (2021, 'university of texas at austin')) +all_us_institutions_year : ((1978, 'university of illinois at urbana champaign'), (1981, 'university of illinois at urbana champaign'), (1987, 'university of texas at austin'), (1994, 'university of texas at austin'), (1996, 'university of texas at austin'), (1997, 'university of texas at austin'), (1999, 'university of texas at austin'), (2002, 'university of texas at austin'), (2003, 'university of texas at austin'), (2004, 'university of texas at austin'), (2005, 'university of texas at austin'), (2006, 'university of texas at austin'), (2007, 'university of texas at austin'), (2008, 'university of texas at austin'), (2009, 'university of texas at austin'), (2010, 'university of texas at austin'), (2011, 'university of texas at austin'), (2012, 'university of texas at austin'), (2013, 'university of texas at austin'), (2014, 'university of texas at austin'), (2015, 'university of texas at austin'), (2016, 'university of texas at austin'), (2017, 'university of texas at austin'), (2018, 'university of texas at austin'), (2019, 'university of texas at austin'), (2020, 'university of texas at austin'), (2021, 'university of texas at austin')) + +1/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : elizabeth +lastname : stinemorrow +middlename : a l +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2009, 'university of illinois at urbana champaign'),) + +firstname : elizabeth +lastname : stine +middlename : a l +year_range : (1986, 1991) +main_us_institutions_year : ((1986, 'brandeis university'), (1987, 'brandeis university'), (1988, 'brandeis university'), (1989, 'brandeis university'), (1990, 'brandeis university'), (1991, 'brandeis university')) +all_us_institutions_year : ((1986, 'brandeis university'), (1987, 'brandeis university'), (1988, 'brandeis university'), (1989, 'brandeis university'), (1990, 'brandeis university'), (1991, 'brandeis university')) + +2/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mary +lastname : oliver +middlename : beth +year_range : (2007,) +main_us_institutions_year : ((2007, 'pennsylvania state university'),) +all_us_institutions_year : ((2007, 'pennsylvania state university'),) + +firstname : mary +lastname : altier +middlename : beth +year_range : (2012, 2021) +main_us_institutions_year : ((2013, 'pennsylvania state university'), (2014, 'new york university'), (2017, 'new york university'), (2020, 'new york university'), (2021, 'new york university')) +all_us_institutions_year : ((2013, 'pennsylvania state university'), (2014, 'new york university'), (2017, 'new york university'), (2020, 'new york university'), (2021, 'new york university')) + +2/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mary +lastname : pangle +middlename : ann +year_range : (2011,) +main_us_institutions_year : ((2011, 'tennessee state university'),) +all_us_institutions_year : ((2011, 'tennessee state university'),) + +firstname : mary +lastname : littleton +middlename : ann +year_range : (2001, 2020) +main_us_institutions_year : ((2002, 'university of alabama'), (2002, 'university of alabama at birmingham'), (2009, 'east tennessee state university'), (2012, 'east tennessee state university'), (2014, 'tennessee state university'), (2015, 'east tennessee state university'), (2016, 'east tennessee state university'), (2017, 'east tennessee state university'), (2019, 'east tennessee state university'), (2020, 'east tennessee state university')) +all_us_institutions_year : ((1999, 'university of alabama at birmingham'), (2002, 'university of alabama'), (2002, 'university of alabama at birmingham'), (2009, 'east tennessee state university'), (2012, 'east tennessee state university'), (2014, 'tennessee state university'), (2015, 'east tennessee state university'), (2016, 'east tennessee state university'), (2017, 'east tennessee state university'), (2019, 'east tennessee state university'), (2020, 'east tennessee state university')) + +2/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : naomi +lastname : eisenberger +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of california los angeles'),) +all_us_institutions_year : ((2010, 'university of california los angeles'),) + +firstname : david +lastname : eisenberg +middlename : None +year_range : (1960, 2021) +main_us_institutions_year : ((1963, 'harvard university'), (1967, 'california institute of technology'), (1969, 'california institute of technology'), (1971, 'california institute of technology'), (1971, 'university of california los angeles'), (1975, 'university of california los angeles'), (1976, 'university of california los angeles'), (1977, 'university of california los angeles'), (1978, 'university of california los angeles'), (1980, 'university of california los angeles'), (1981, 'university of california los angeles'), (1982, 'university of california los angeles'), (1983, 'university of california los angeles'), (1984, 'university of california los angeles'), (1986, 'university of california los angeles'), (1987, 'university of california los angeles'), (1988, 'university of california los angeles'), (1989, 'university of california los angeles'), (1990, 'university of california los angeles'), (1991, 'university of california los angeles'), (1992, 'university of california los angeles'), (1993, 'university of california los angeles'), (1994, 'university of california los angeles'), (1995, 'university of california los angeles'), (1996, 'university of california los angeles'), (1997, 'university of california los angeles'), (1998, 'university of california los angeles'), (1999, 'university of california los angeles'), (2000, 'university of california los angeles'), (2001, 'university of california los angeles'), (2002, 'university of california los angeles'), (2003, 'university of california los angeles'), (2004, 'university of california los angeles'), (2005, 'university of california los angeles'), (2006, 'university of california los angeles'), (2007, 'university of california los angeles'), (2008, 'university of california los angeles'), (2009, 'university of california los angeles'), (2010, 'howard hughes medical institute'), (2011, 'university of california los angeles'), (2012, 'harvard university'), (2012, 'university of california los angeles'), (2013, 'university of california los angeles'), (2014, 'university of california los angeles'), (2015, 'university of california los angeles'), (2016, 'university of california los angeles'), (2017, 'university of california los angeles'), (2018, 'howard hughes medical institute'), (2019, 'university of california los angeles'), (2020, 'university of california los angeles')) +all_us_institutions_year : ((1963, 'harvard university'), (1967, 'california institute of technology'), (1969, 'california institute of technology'), (1971, 'california institute of technology'), (1971, 'university of california los angeles'), (1975, 'university of california los angeles'), (1976, 'university of california los angeles'), (1977, 'university of california los angeles'), (1978, 'university of california los angeles'), (1980, 'university of california los angeles'), (1981, 'university of california los angeles'), (1982, 'university of california los angeles'), (1983, 'university of california los angeles'), (1984, 'university of california los angeles'), (1986, 'university of california los angeles'), (1987, 'university of california los angeles'), (1988, 'university of california los angeles'), (1989, 'university of california los angeles'), (1990, 'university of california los angeles'), (1991, 'beth israel deaconess medical center'), (1991, 'university of california los angeles'), (1992, 'university of california los angeles'), (1992, 'wilmington university'), (1993, 'beth israel deaconess medical center'), (1993, 'university of california'), (1993, 'university of california los angeles'), (1994, 'harvard university'), (1994, 'university of california'), (1994, 'university of california los angeles'), (1995, 'university of california'), (1995, 'university of california los angeles'), (1996, 'harvard university'), (1996, 'university of california los angeles'), (1997, 'beth israel deaconess hospital'), (1997, 'university of california'), (1997, 'university of california los angeles'), (1998, 'beth israel deaconess medical center'), (1998, 'university of california'), (1998, 'university of california los angeles'), (1999, 'beth israel deaconess medical center'), (1999, 'boston medical center'), (1999, 'harvard university'), (1999, 'university of california los angeles'), (2000, 'beth israel deaconess hospital'), (2000, 'beth israel deaconess medical center'), (2000, 'university of california'), (2000, 'university of california los angeles'), (2001, 'beth israel deaconess medical center'), (2001, 'harvard university'), (2001, 'howard hughes medical institute'), (2001, 'university of california los angeles'), (2002, 'harvard university'), (2002, 'howard hughes medical institute'), (2002, 'los alamos national laboratory'), (2002, 'oregon health science university'), (2002, 'university of california'), (2002, 'university of california los angeles'), (2003, 'harvard university'), (2003, 'howard hughes medical institute'), (2003, 'university of california los angeles'), (2004, 'harvard university'), (2004, 'howard hughes medical institute'), (2004, 'university of california'), (2004, 'university of california los angeles'), (2005, 'beth israel deaconess medical center'), (2005, 'harvard university'), (2005, 'howard hughes medical institute'), (2005, 'university of california los angeles'), (2006, 'harvard university'), (2006, 'howard hughes medical institute'), (2006, 'university of california'), (2006, 'university of california los angeles'), (2007, 'harvard university'), (2007, 'howard hughes medical institute'), (2007, 'university of california'), (2007, 'university of california los angeles'), (2008, 'beth israel deaconess medical center'), (2008, 'harvard university'), (2008, 'howard hughes medical institute'), (2008, 'johnson johnson'), (2008, 'university of california los angeles'), (2009, 'howard hughes medical institute'), (2009, 'university of california los angeles'), (2010, 'howard hughes medical institute'), (2010, 'university of california los angeles'), (2011, 'beth israel deaconess medical center'), (2011, 'harvard university'), (2011, 'howard hughes medical institute'), (2011, 'university of california los angeles'), (2012, 'beth israel deaconess medical center'), (2012, 'harvard university'), (2012, 'howard hughes medical institute'), (2012, 'university of california'), (2012, 'university of california los angeles'), (2013, 'harvard university'), (2013, 'howard hughes medical institute'), (2013, 'university of california los angeles'), (2014, 'harvard university'), (2014, 'howard hughes medical institute'), (2014, 'university of california'), (2014, 'university of california los angeles'), (2014, 'university of texas at austin'), (2014, 'wilmington university'), (2015, 'harvard university'), (2015, 'howard hughes medical institute'), (2015, 'university of california los angeles'), (2015, 'university of texas at austin'), (2015, 'wilmington university'), (2016, 'harvard university'), (2016, 'howard hughes medical institute'), (2016, 'university of california los angeles'), (2016, 'university of california santa barbara'), (2017, 'harvard university'), (2017, 'howard hughes medical institute'), (2017, 'university of california'), (2017, 'university of california los angeles'), (2017, 'yale university'), (2018, 'harvard university'), (2018, 'howard hughes medical institute'), (2018, 'united states department of energy'), (2018, 'university of california los angeles'), (2019, 'harvard university'), (2019, 'howard hughes medical institute'), (2019, 'university of california'), (2019, 'university of california los angeles'), (2020, 'harvard university'), (2020, 'howard hughes medical institute'), (2020, 'university of california berkeley'), (2020, 'university of california los angeles'), (2021, 'harvard university'), (2021, 'howard hughes medical institute'), (2021, 'university of california'), (2021, 'university of california los angeles')) + +2/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : renee +lastname : alexandercraft +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2014, 'university of north carolina at chapel hill'),) + +firstname : lorraine +lastname : alexander +middlename : k +year_range : (1992, 2017) +main_us_institutions_year : ((1992, 'university of north carolina at chapel hill'), (1994, 'university of north carolina at chapel hill'), (1995, 'north carolina state university'), (1995, 'university of north carolina at chapel hill'), (1999, 'university of north carolina at chapel hill'), (2005, 'university of north carolina at chapel hill'), (2008, 'university of north carolina at chapel hill'), (2010, 'university of north carolina at chapel hill'), (2017, 'university of north carolina at chapel hill')) +all_us_institutions_year : ((1992, 'university of north carolina at chapel hill'), (1994, 'university of north carolina at chapel hill'), (1995, 'north carolina state university'), (1995, 'university of north carolina at chapel hill'), (1999, 'university of north carolina at chapel hill'), (2005, 'university of north carolina at chapel hill'), (2008, 'university of north carolina at chapel hill'), (2010, 'university of north carolina at chapel hill'), (2017, 'university of north carolina at chapel hill')) + +2/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : murray +lastname : stein +middlename : None +year_range : (1999,) +main_us_institutions_year : ((1999, 'pacifica graduate institute'),) +all_us_institutions_year : ((1999, 'pacifica graduate institute'),) + +firstname : murray +lastname : stein +middlename : b +year_range : (1982, 2021) +main_us_institutions_year : ((1997, 'university of california san diego'), (1998, 'university of california san diego'), (1999, 'university of california san diego'), (2000, 'university of california san diego'), (2001, 'university of california san diego'), (2002, 'university of california san diego'), (2003, 'university of california san diego'), (2004, 'university of california san diego'), (2005, 'university of california san diego'), (2006, 'university of california san diego'), (2007, 'university of california san diego'), (2008, 'university of california san diego'), (2009, 'university of california san diego'), (2010, 'university of california san diego'), (2011, 'university of california san diego'), (2012, 'university of california san diego'), (2013, 'university of california san diego'), (2014, 'university of california san diego'), (2015, 'university of california san diego'), (2016, 'university of california san diego'), (2017, 'university of california san diego'), (2018, 'university of california san diego'), (2019, 'university of california san diego'), (2020, 'university of california san diego'), (2021, 'university of california san diego')) +all_us_institutions_year : ((1994, 'university of california san diego'), (1996, 'university of california san diego'), (1997, 'university of california san diego'), (1997, 'veterans health administration'), (1998, 'university of california san diego'), (1999, 'university of california san diego'), (1999, 'veterans health administration'), (2000, 'university of california san diego'), (2001, 'san diego state university'), (2001, 'university of california san diego'), (2001, 'veterans health administration'), (2002, 'university of california san diego'), (2002, 'veterans health administration'), (2003, 'university of california san diego'), (2003, 'veterans health administration'), (2004, 'san diego state university'), (2004, 'university of california san diego'), (2004, 'veterans health administration'), (2005, 'university of california san diego'), (2005, 'veterans health administration'), (2006, 'san diego state university'), (2006, 'university of california san diego'), (2006, 'veterans health administration'), (2007, 'university of california los angeles'), (2007, 'university of california san diego'), (2007, 'veterans health administration'), (2008, 'san diego state university'), (2008, 'university of california san diego'), (2008, 'veterans health administration'), (2009, 'university of california san diego'), (2009, 'veterans health administration'), (2010, 'university of california san diego'), (2010, 'veterans health administration'), (2011, 'university of california san diego'), (2011, 'veterans health administration'), (2012, 'san diego state university'), (2012, 'university of california los angeles'), (2012, 'university of california san diego'), (2012, 'veterans health administration'), (2013, 'university of california los angeles'), (2013, 'university of california san diego'), (2013, 'veterans health administration'), (2014, 'university of california san diego'), (2014, 'veterans health administration'), (2015, 'university of california san diego'), (2015, 'veterans health administration'), (2016, 'university of california san diego'), (2016, 'veterans health administration'), (2017, 'university of california los angeles'), (2017, 'university of california san diego'), (2017, 'veterans health administration'), (2018, 'university of california los angeles'), (2018, 'university of california san diego'), (2018, 'veterans health administration'), (2019, 'university of california san diego'), (2019, 'university of pittsburgh'), (2019, 'veterans health administration'), (2020, 'university of california los angeles'), (2020, 'university of california san diego'), (2020, 'veterans health administration'), (2021, 'university of california san diego'), (2021, 'veterans health administration')) + +2/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : margie +lastname : linn +middlename : inman +year_range : (2003,) +main_us_institutions_year : ((2003, 'widener university'),) +all_us_institutions_year : ((2003, 'widener university'),) + +firstname : margaret +lastname : linn +middlename : inman +year_range : (2000, 2003) +main_us_institutions_year : ((2000, 'widener university'), (2002, 'widener university'), (2003, 'widener university')) +all_us_institutions_year : ((2000, 'widener university'), (2002, 'widener university'), (2003, 'widener university')) + +2/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : m +lastname : farrar +middlename : jeffrey +year_range : (1993,) +main_us_institutions_year : ((1993, 'university of florida'),) +all_us_institutions_year : ((1993, 'university of florida'),) + +firstname : michael +lastname : farrar +middlename : jeffrey +year_range : (1984, 1986) +main_us_institutions_year : ((1984, 'emory university'), (1986, 'emory university')) +all_us_institutions_year : ((1984, 'emory university'), (1984, 'university of denver'), (1986, 'emory university')) + +3/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : elizabeth +lastname : stinemorrow +middlename : a l +year_range : (2013,) +main_us_institutions_year : ((2013, 'university of illinois at urbana champaign'),) +all_us_institutions_year : ((2013, 'university of illinois at urbana champaign'),) + +firstname : elizabeth +lastname : stine +middlename : a l +year_range : (1986, 1991) +main_us_institutions_year : ((1986, 'brandeis university'), (1987, 'brandeis university'), (1988, 'brandeis university'), (1989, 'brandeis university'), (1990, 'brandeis university'), (1991, 'brandeis university')) +all_us_institutions_year : ((1986, 'brandeis university'), (1987, 'brandeis university'), (1988, 'brandeis university'), (1989, 'brandeis university'), (1990, 'brandeis university'), (1991, 'brandeis university')) + +3/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : brent +lastname : robbins +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'walden university'),) +all_us_institutions_year : ((2014, 'walden university'),) + +firstname : brent +lastname : robbins +middlename : dean +year_range : (2000, 2021) +main_us_institutions_year : ((2000, 'duquesne university'), (2002, 'duquesne university'), (2006, 'daemen college'), (2008, 'point park university'), (2009, 'point park university'), (2011, 'point park university'), (2012, 'point park university'), (2014, 'point park university'), (2016, 'point park university'), (2017, 'point park university'), (2018, 'point park university'), (2019, 'point park university'), (2021, 'point park university')) +all_us_institutions_year : ((2000, 'duquesne university'), (2002, 'duquesne university'), (2006, 'daemen college'), (2008, 'point park university'), (2009, 'point park university'), (2011, 'point park university'), (2012, 'point park university'), (2013, 'point park university'), (2014, 'point park university'), (2015, 'point park university'), (2016, 'point park university'), (2017, 'point park university'), (2018, 'point park university'), (2019, 'point park university'), (2021, 'point park university')) + +3/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : stephen +lastname : zaccaro +middlename : j +year_range : (2014,) +main_us_institutions_year : ((2014, 'george mason university'),) +all_us_institutions_year : ((2014, 'george mason university'),) + +firstname : stephen +lastname : zaccaro +middlename : j +year_range : (1984, 1989) +main_us_institutions_year : ((1984, 'virginia tech'), (1985, 'virginia tech'), (1986, 'virginia tech'), (1988, 'virginia tech'), (1989, 'virginia tech')) +all_us_institutions_year : ((1984, 'virginia tech'), (1985, 'virginia tech'), (1986, 'virginia tech'), (1988, 'virginia tech'), (1989, 'virginia tech')) + +3/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : leland +lastname : daele +middlename : van den +year_range : (1992,) +main_us_institutions_year : ((1992, 'california school of professional psychology san diego'),) +all_us_institutions_year : ((1992, 'california school of professional psychology san diego'),) + +firstname : leland +lastname : daele +middlename : van den +year_range : (1978, 1992) +main_us_institutions_year : ((1978, 'rutgers university'), (1979, 'rutgers university')) +all_us_institutions_year : ((1978, 'rutgers university'), (1979, 'rutgers university')) + +3/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : george +lastname : stricker +middlename : None +year_range : (2004,) +main_us_institutions_year : ((2004, 'adelphi university the institute of advanced psychological studies'),) +all_us_institutions_year : ((2004, 'adelphi university the institute of advanced psychological studies'),) + +firstname : george +lastname : stricker +middlename : None +year_range : (1960, 2015) +main_us_institutions_year : ((1964, 'adelphi university'), (1966, 'adelphi university'), (1967, 'adelphi university'), (1968, 'adelphi university'), (1969, 'adelphi university'), (1972, 'adelphi university'), (1983, 'adelphi university'), (1985, 'adelphi university'), (1987, 'adelphi university'), (1990, 'adelphi university'), (1991, 'adelphi university'), (1995, 'adelphi university'), (1996, 'adelphi university'), (1997, 'adelphi university'), (1998, 'adelphi university'), (1999, 'adelphi university'), (2000, 'adelphi university'), (2001, 'adelphi university'), (2002, 'adelphi university'), (2003, 'adelphi university'), (2004, 'adelphi university'), (2005, 'alliant international university'), (2005, 'adelphi university'), (2005, 'university of washington'), (2006, 'adelphi university'), (2006, 'university of washington'), (2011, 'university of washington'), (2012, 'university of washington'), (2015, 'university of washington')) +all_us_institutions_year : ((1964, 'adelphi university'), (1966, 'adelphi university'), (1967, 'adelphi university'), (1968, 'adelphi university'), (1969, 'adelphi university'), (1972, 'adelphi university'), (1983, 'adelphi university'), (1985, 'adelphi university'), (1987, 'adelphi university'), (1990, 'adelphi university'), (1991, 'adelphi university'), (1993, 'adelphi university'), (1995, 'adelphi university'), (1996, 'adelphi university'), (1997, 'adelphi university'), (1998, 'adelphi university'), (1999, 'adelphi university'), (2000, 'adelphi university'), (2001, 'adelphi university'), (2002, 'adelphi university'), (2002, 'university of washington'), (2003, 'adelphi university'), (2004, 'adelphi university'), (2005, 'adelphi university'), (2005, 'alliant international university'), (2005, 'university of washington'), (2006, 'adelphi university'), (2006, 'university of washington'), (2009, 'alliant international university'), (2011, 'university of washington'), (2012, 'university of washington'), (2015, 'university of washington')) + +4/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : casey +lastname : dorman +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'united states international university'),) +all_us_institutions_year : ((1995, 'united states international university'),) + +firstname : casey +lastname : dorman +middlename : None +year_range : (1991, 1999) +main_us_institutions_year : ((1991, 'alliant international university'), (1999, 'alliant international university')) +all_us_institutions_year : ((1991, 'alliant international university'), (1999, 'alliant international university')) + +5/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : andrew +lastname : meyer +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'spalding university'),) +all_us_institutions_year : ((2002, 'spalding university'),) + +firstname : andrew +lastname : meyer +middlename : None +year_range : (2011, 2015) +main_us_institutions_year : ((2015, 'yale university'),) +all_us_institutions_year : ((2015, 'yale university'),) + +6/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : symiin +lastname : chow +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of north carolina at chapel hill'),) +all_us_institutions_year : ((2012, 'university of north carolina at chapel hill'),) + +firstname : symiin +lastname : chow +middlename : None +year_range : (2003, 2004) +main_us_institutions_year : ((2003, 'university of virginia'),) +all_us_institutions_year : ((2003, 'university of virginia'),) + +6/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : diperna +middlename : clyde +year_range : (2012,) +main_us_institutions_year : ((2012, 'pennsylvania state university'),) +all_us_institutions_year : ((2012, 'pennsylvania state university'),) + +firstname : james +lastname : diperna +middlename : clyde +year_range : (2000, 2020) +main_us_institutions_year : ((2000, 'lehigh university'), (2002, 'lehigh university')) +all_us_institutions_year : ((2000, 'lehigh university'), (2002, 'lehigh university')) + +6/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : collie +lastname : conoley +middlename : w +year_range : (1994,) +main_us_institutions_year : ((1994, 'university of nebraska lincoln'),) +all_us_institutions_year : ((1994, 'university of nebraska lincoln'),) + +firstname : collie +lastname : conoley +middlename : wyatt +year_range : (1982, 1985) +main_us_institutions_year : ((1982, 'university of north texas'), (1983, 'university of north texas'), (1985, 'university of north texas')) +all_us_institutions_year : ((1982, 'university of north texas'), (1983, 'university of north texas'), (1985, 'university of north texas')) + +6/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : john +lastname : cavanaugh +middlename : c +year_range : (1992,) +main_us_institutions_year : ((1992, 'bowling green state university'),) +all_us_institutions_year : ((1992, 'bowling green state university'),) + +firstname : john +lastname : cavanaugh +middlename : c +year_range : (1994, 2003) +main_us_institutions_year : ((1994, 'university of delaware'), (1995, 'university of delaware'), (1998, 'university of delaware'), (1999, 'university of delaware')) +all_us_institutions_year : ((1994, 'university of delaware'), (1995, 'university of delaware'), (1998, 'university of delaware'), (1999, 'university of delaware')) + +6/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jon +lastname : crawford +middlename : None +year_range : (2014,) +main_us_institutions_year : ((2014, 'northern illinois university'),) +all_us_institutions_year : ((2014, 'northern illinois university'),) + +firstname : j +lastname : crawford +middlename : douglas +year_range : (1995, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2007, 'taylor university'), (2010, 'university of michigan'), (2014, 'university of michigan'), (2019, 'vision sciences inc')) + +6/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : francis +lastname : pirozzolo +middlename : j +year_range : (1992,) +main_us_institutions_year : ((1992, 'california school of professional psychology fresno'),) +all_us_institutions_year : ((1992, 'california school of professional psychology fresno'),) + +firstname : francis +lastname : pirozzolo +middlename : j +year_range : (1979, 2011) +main_us_institutions_year : ((1979, 'university of massachusetts amherst'), (1979, 'university of minnesota'), (1981, 'university of minnesota'), (1982, 'university of minnesota'), (1985, 'baylor college of medicine'), (1986, 'baylor college of medicine'), (1987, 'baylor college of medicine'), (1988, 'baylor college of medicine'), (1989, 'baylor college of medicine'), (1990, 'baylor college of medicine'), (1991, 'baylor college of medicine'), (1993, 'baylor college of medicine'), (1995, 'baylor college of medicine'), (1999, 'baylor college of medicine')) +all_us_institutions_year : ((1979, 'university of massachusetts amherst'), (1979, 'university of minnesota'), (1981, 'university of minnesota'), (1982, 'baylor college of medicine'), (1982, 'university of minnesota'), (1984, 'baylor college of medicine'), (1985, 'baylor college of medicine'), (1986, 'baylor college of medicine'), (1987, 'baylor college of medicine'), (1988, 'baylor college of medicine'), (1989, 'baylor college of medicine'), (1990, 'baylor college of medicine'), (1991, 'baylor college of medicine'), (1993, 'baylor college of medicine'), (1995, 'baylor college of medicine'), (1999, 'baylor college of medicine')) + +7/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : marla +lastname : peterson +middlename : None +year_range : (1995,) +main_us_institutions_year : ((1995, 'university of tennessee'),) +all_us_institutions_year : ((1995, 'university of tennessee'),) + +firstname : w +lastname : peter +middlename : f +year_range : (2003, 2021) +main_us_institutions_year : None +all_us_institutions_year : ((2019, 'public health research institute'),) + +8/10 positive, 14/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : frank +lastname : richardson +middlename : c +year_range : (1990,) +main_us_institutions_year : ((1990, 'university of texas at austin'),) +all_us_institutions_year : ((1990, 'university of texas at austin'),) + +firstname : alan +lastname : richardsonklavehn +middlename : None +year_range : (1988, 2002) +main_us_institutions_year : None +all_us_institutions_year : ((1994, 'university of california los angeles'),) + +8/10 positive, 15/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : oliva +lastname : espin +middlename : m +year_range : (1998,) +main_us_institutions_year : ((1998, 'california school of professional psychology san diego'),) +all_us_institutions_year : ((1998, 'california school of professional psychology san diego'),) + +firstname : oliva +lastname : espin +middlename : m +year_range : (1982, 2019) +main_us_institutions_year : ((1987, 'tufts university'), (1990, 'tufts university'), (1995, 'san diego state university'), (1997, 'san diego state university'), (1998, 'san diego state university'), (2004, 'san diego state university'), (2010, 'san diego state university'), (2012, 'san diego state university'), (2013, 'san diego state university')) +all_us_institutions_year : ((1987, 'tufts university'), (1990, 'tufts university'), (1992, 'san diego state university'), (1995, 'san diego state university'), (1997, 'san diego state university'), (1998, 'san diego state university'), (2004, 'san diego state university'), (2006, 'san diego state university'), (2010, 'san diego state university'), (2012, 'san diego state university'), (2013, 'san diego state university')) + +8/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robin +lastname : coleman +middlename : r means +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of pittsburgh'),) +all_us_institutions_year : ((2003, 'university of pittsburgh'),) + +firstname : robin +lastname : coleman +middlename : r means +year_range : (2003, 2007) +main_us_institutions_year : ((2007, 'university of michigan'),) +all_us_institutions_year : ((2007, 'university of michigan'),) + +9/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : anne +lastname : seraphine +middlename : e +year_range : (2002,) +main_us_institutions_year : ((2002, 'university of florida'),) +all_us_institutions_year : ((2002, 'university of florida'),) + +firstname : anne +lastname : seraphine +middlename : e +year_range : (1998, 2004) +main_us_institutions_year : ((1998, 'university of texas at austin'), (2000, 'university of texas at austin')) +all_us_institutions_year : ((1998, 'university of texas at austin'), (2000, 'university of texas at austin')) + +10/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : kathryn +lastname : white +middlename : p +year_range : (1990,) +main_us_institutions_year : ((1990, 'california school of professional psychology los angeles'),) +all_us_institutions_year : ((1990, 'california school of professional psychology los angeles'),) + +firstname : k +lastname : white +middlename : geoffrey +year_range : (1973, 1985) +main_us_institutions_year : None +all_us_institutions_year : ((1985, 'wellington management company'),) + +11/10 positive, 16/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : melody +lastname : wollan +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'touro university international'),) +all_us_institutions_year : ((2007, 'touro university international'),) + +firstname : melody +lastname : wollan +middlename : l +year_range : (2008, 2018) +main_us_institutions_year : ((2008, 'eastern illinois university'), (2009, 'eastern illinois university'), (2014, 'eastern illinois university'), (2016, 'eastern illinois university')) +all_us_institutions_year : ((2008, 'eastern illinois university'), (2009, 'eastern illinois university'), (2014, 'eastern illinois university'), (2016, 'eastern illinois university'), (2019, 'eastern illinois university')) + +11/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : gretchen +lastname : butera +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'indiana university'),) +all_us_institutions_year : ((2010, 'indiana university'),) + +firstname : gretchen +lastname : butera +middlename : None +year_range : (1991, 1992) +main_us_institutions_year : ((1991, 'university of california santa barbara'), (1992, 'university of california santa barbara')) +all_us_institutions_year : ((1991, 'university of california santa barbara'), (1992, 'university of california santa barbara')) + +11/10 positive, 17/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : wilson +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of california berkeley'),) +all_us_institutions_year : ((2008, 'university of california berkeley'),) + +firstname : mark +lastname : wilson +middlename : a +year_range : (1989, 2019) +main_us_institutions_year : ((1993, 'university of louisville'), (1994, 'university of louisville'), (1995, 'university of louisville'), (1996, 'university of louisville'), (1997, 'university of louisville'), (1998, 'university of louisville'), (1999, 'university of louisville'), (2000, 'university of louisville'), (2001, 'university of louisville'), (2002, 'united states department of veterans affairs'), (2006, 'veterans health administration'), (2009, 'veterans health administration'), (2010, 'united states department of veterans affairs'), (2011, 'veterans health administration'), (2014, 'united states department of veterans affairs'), (2015, 'veterans health administration'), (2016, 'veterans health administration')) +all_us_institutions_year : ((1993, 'university of louisville'), (1994, 'university of louisville'), (1995, 'university of louisville'), (1996, 'united states department of veterans affairs'), (1996, 'university of louisville'), (1996, 'veterans health administration'), (1997, 'united states department of veterans affairs'), (1997, 'university of louisville'), (1997, 'veterans health administration'), (1998, 'united states department of veterans affairs'), (1998, 'university of louisville'), (1999, 'university of louisville'), (2000, 'university of louisville'), (2000, 'veterans health administration'), (2001, 'university of louisville'), (2001, 'veterans health administration'), (2002, 'united states department of veterans affairs'), (2006, 'veterans health administration'), (2009, 'veterans health administration'), (2010, 'united states department of veterans affairs'), (2011, 'veterans health administration'), (2014, 'united states department of veterans affairs'), (2015, 'veterans health administration'), (2016, 'veterans health administration')) + +11/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : lemay +middlename : a leo +year_range : (1991,) +main_us_institutions_year : ((1991, 'university of delaware'),) +all_us_institutions_year : ((1991, 'university of delaware'),) + +firstname : j +lastname : lemay +middlename : a leo +year_range : (1964, 2009) +main_us_institutions_year : ((1992, 'old dominion university'),) +all_us_institutions_year : ((1992, 'old dominion university'),) + +11/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : j +lastname : lemay +middlename : a leo +year_range : (1999,) +main_us_institutions_year : ((1999, 'university of delaware'),) +all_us_institutions_year : ((1999, 'university of delaware'),) + +firstname : j +lastname : lemay +middlename : a leo +year_range : (1964, 2009) +main_us_institutions_year : ((1992, 'old dominion university'),) +all_us_institutions_year : ((1992, 'old dominion university'),) + +12/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : roslyn +lastname : fitch +middlename : holly +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of connecticut'),) +all_us_institutions_year : ((2012, 'university of connecticut'),) + +firstname : r +lastname : fitch +middlename : holly +year_range : (1997, 2010) +main_us_institutions_year : ((1997, 'rutgers university'), (2001, 'beth israel deaconess medical center')) +all_us_institutions_year : ((1997, 'rutgers university'), (2001, 'beth israel deaconess medical center')) + +13/10 positive, 18/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 388.81255267858506 minutes. diff --git a/src/dataprep/temp/trainlink_mag_proquest_sociology_christoph_degree0_advisors_9015.log b/src/dataprep/temp/trainlink_mag_proquest_sociology_christoph_degree0_advisors_9015.log new file mode 100644 index 0000000..3102811 --- /dev/null +++ b/src/dataprep/temp/trainlink_mag_proquest_sociology_christoph_degree0_advisors_9015.log @@ -0,0 +1,670 @@ +Namespace(testing=False, verbose=1, field=['sociology'], train_name='christoph_degree0', startyear=1990, endyear=2015, loadstartyear=1990, loadendyear=2015, mergemode='m:1', recall=0.9, institution='True', fieldofstudy_cat='False', fieldofstudy_str='False', keywords='False', retrain='True', linking_type='advisors', samplesize=100000, write_to='database') +Have max 12 cores available +Testing is False + +I set the write connection to the main database. +id_field is [144024400] and will be passed to sql queries. +finished setup ... +Time elapsed: 0.0008432308832804362 minutes + + + SELECT relationship_id + , year + , year AS year_range + , firstname + , lastname + , CASE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(middle_lastname, 1, l_fullname-l_firstname-l_lastname - 1)) + END AS middlename + , fieldofstudy + , keywords + , institution + , year || "//" || institution as main_us_institutions_year + , year || "//" || institution as all_us_institutions_year + FROM ( + SELECT goid + , relationship_id + , degree_year AS year + , a.fullname + , SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) AS firstname + , REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "") AS lastname + , TRIM(SUBSTR(a.fullname, length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1)) + 1)) AS middle_lastname + , length(a.fullname) AS l_fullname + , length(SUBSTR(TRIM(a.fullname),1,instr(trim(a.fullname)||' ',' ')-1) ) AS l_firstname + , length(REPLACE(a.fullname, RTRIM(a.fullname, REPLACE(a.fullname, " ", "")), "")) AS l_lastname + , fieldname AS fieldofstudy + , university_id + FROM pq_authors + INNER JOIN ( + SELECT goid, fieldname + FROM pq_fields_mag + WHERE mag_field0 IN (?) + ) USING (goid) + INNER JOIN ( --# NOTE: this only keeps the theses where at least one advisor is present + SELECT *, firstname || ' ' || lastname AS fullname + FROM pq_advisors + ) AS a USING(goid) + ) + -- ## NOTE: use left join here as not all graduates have advisor (particularly pre-1980) and possibly also keywords + LEFT JOIN ( + SELECT goid + , fields as keywords + FROM pq_info_linking + ) USING(goid) + INNER JOIN ( + SELECT university_id, normalizedname as institution + FROM pq_unis --## mark: previously we linked advisors anywhere in the world (as career outcomes). for now, focus on US + WHERE location like "%United States%" + ) USING(university_id) + WHERE year >= 1990 and year <= 2015 + + + + SELECT f.AuthorId + , f.year + , f.YearLastPub + , f.firstname + , f.lastname + , CASE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + WHEN + "" THEN NULL + ELSE TRIM(SUBSTR(f.middle_lastname, 1, f.l_fullname - f.l_firstname - f.l_lastname - 1)) + END as middlename + -- ## NOTE this gives "" for middlename when it is missing + , f.fieldofstudy + , g.keywords + , g.coauthors + , g.institution + , g.main_us_institutions_year + + , f.year || ";" || f.YearLastPub AS year_range + , g.all_us_institutions_year + + FROM ( + SELECT a.AuthorId + , a.YearFirstPub AS year + , a.YearLastPub + , a.FirstName AS firstname + , REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "") AS lastname + -- https://stackoverflow.com/questions/21388820/how-to-get-the-last-index-of-a-substring-in-sqlite + , TRIM(SUBSTR(b.NormalizedName, length(a.FirstName) + 1)) AS middle_lastname + -- this gives all except the first name + , length(b.NormalizedName) as l_fullname + , length(a.FirstName) as l_firstname + , length(REPLACE(b.NormalizedName, RTRIM(b.NormalizedName, REPLACE(b.NormalizedName, " ", "")), "")) as l_lastname + , e.NormalizedName AS fieldofstudy + FROM author_sample AS a + INNER JOIN ( + SELECT AuthorId, NormalizedName + FROM Authors + ) AS b USING(AuthorId) + INNER JOIN ( + SELECT AuthorId + FROM author_field0 + WHERE FieldOfStudyId_lvl0 IN (?) + AND Degree <= 0 + ) USING(AuthorId) + LEFT JOIN ( + SELECT AuthorId, NormalizedName + FROM author_fields c + INNER JOIN ( + SELECT FieldOfStudyId, NormalizedName + FROM FieldsOfStudy + ) AS d USING(FieldOfStudyId) + -- ## Condition on fieldofstudy being in the level 0 id_field + INNER JOIN ( + SELECT ParentFieldOfStudyId, ChildFieldOfStudyId + FROM crosswalk_fields + WHERE ParentLevel = 0 + AND ParentFieldOfStudyId IN (?) + ) AS e ON (e.ChildFieldOfStudyId = c.FieldOfStudyId) + WHERE FieldClass = 'first' + ) AS e USING(AuthorId) + ) f + LEFT JOIN ( + SELECT AuthorId + , main_us_institutions_career as institution + , coauthors + , keywords + , main_us_institutions_year + , all_us_institutions_year + FROM author_info_linking + ) AS g USING(AuthorId) + + WHERE f.YearLastPub >= 1990 - 5 AND year <= 2015 + 5 AND institution is not NULL + + +Time elapsed: 122.0439579963684 minutes + +Starting active labeling... +firstname : colin +lastname : johnson +middlename : r +year_range : (2015,) +main_us_institutions_year : ((2015, 'indiana university'),) +all_us_institutions_year : ((2015, 'indiana university'),) + +firstname : colin +lastname : campbell +middlename : None +year_range : (1914, 2020) +main_us_institutions_year : None +all_us_institutions_year : ((2007, 'indiana university'),) + +0/10 positive, 0/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished +firstname : kimberly +lastname : bender +middlename : None +year_range : (2013,) +main_us_institutions_year : ((2013, 'university of denver'),) +all_us_institutions_year : ((2013, 'university of denver'),) + +firstname : kimberly +lastname : bender +middlename : None +year_range : (2002, 2021) +main_us_institutions_year : ((2003, 'colorado state university'), (2006, 'university of texas at austin'), (2007, 'university of texas at austin'), (2008, 'university of texas at austin'), (2009, 'university of denver'), (2010, 'university of denver'), (2011, 'university of denver'), (2012, 'university of denver'), (2013, 'university of denver'), (2014, 'university of denver'), (2015, 'university of denver'), (2016, 'university of denver'), (2017, 'university of denver'), (2018, 'university of denver'), (2019, 'university of denver'), (2020, 'university of denver'), (2021, 'university of denver')) +all_us_institutions_year : ((2003, 'colorado state university'), (2006, 'university of texas at austin'), (2007, 'university of texas at austin'), (2008, 'university of texas at austin'), (2009, 'university of denver'), (2010, 'university of denver'), (2011, 'university of denver'), (2012, 'michigan state university'), (2012, 'university of denver'), (2013, 'university of denver'), (2014, 'university of denver'), (2015, 'university of denver'), (2016, 'michigan state university'), (2016, 'university of denver'), (2017, 'university of denver'), (2018, 'university of denver'), (2019, 'university of denver'), (2020, 'university of denver'), (2021, 'university of denver')) + +0/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : barkley +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'walden university'),) +all_us_institutions_year : ((2011, 'walden university'),) + +firstname : william +lastname : barkley +middlename : m +year_range : (2011, 2014) +main_us_institutions_year : ((2011, 'walden university'), (2012, 'walden university'), (2014, 'walden university')) +all_us_institutions_year : ((2011, 'walden university'), (2012, 'walden university'), (2014, 'walden university'), (2017, 'walden university')) + +1/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : lisa +lastname : disch +middlename : j +year_range : (2003,) +main_us_institutions_year : ((2003, 'university of minnesota'),) +all_us_institutions_year : ((2003, 'university of minnesota'),) + +firstname : lisa +lastname : disch +middlename : None +year_range : (1991, 2021) +main_us_institutions_year : ((1991, 'university of minnesota'), (1992, 'university of minnesota'), (1995, 'university of minnesota'), (1996, 'university of minnesota'), (1998, 'university of minnesota'), (1999, 'university of minnesota'), (2003, 'university of minnesota'), (2008, 'university of minnesota'), (2009, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2018, 'university of michigan'), (2021, 'university of michigan')) +all_us_institutions_year : ((1991, 'university of minnesota'), (1992, 'university of minnesota'), (1995, 'university of minnesota'), (1996, 'university of minnesota'), (1998, 'university of minnesota'), (1999, 'university of minnesota'), (2003, 'university of minnesota'), (2007, 'university of minnesota'), (2008, 'university of michigan'), (2008, 'university of minnesota'), (2009, 'university of michigan'), (2011, 'university of michigan'), (2012, 'university of michigan'), (2014, 'university of michigan'), (2015, 'university of michigan'), (2016, 'university of michigan'), (2018, 'university of michigan'), (2021, 'university of michigan')) + +2/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : miyako +lastname : inoue +middlename : None +year_range : (2011,) +main_us_institutions_year : ((2011, 'stanford university'),) +all_us_institutions_year : ((2011, 'stanford university'),) + +firstname : miyako +lastname : inoue +middlename : None +year_range : (1989, 2018) +main_us_institutions_year : ((1989, 'university of washington'), (2002, 'stanford university'), (2003, 'stanford university'), (2009, 'stanford university'), (2011, 'stanford university'), (2018, 'stanford university')) +all_us_institutions_year : ((1989, 'university of washington'), (2002, 'stanford university'), (2003, 'stanford university'), (2009, 'stanford university'), (2011, 'stanford university'), (2018, 'stanford university')) + +3/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : cynthia +lastname : osborn +middlename : j +year_range : (2008,) +main_us_institutions_year : ((2008, 'kent state university'),) +all_us_institutions_year : ((2008, 'kent state university'),) + +firstname : cynthia +lastname : osborne +middlename : None +year_range : (2005, 2020) +main_us_institutions_year : ((2005, 'princeton university'), (2007, 'university of texas at austin'), (2008, 'university of texas at austin'), (2009, 'university of texas at austin'), (2010, 'university of texas at austin'), (2011, 'university of texas at austin'), (2012, 'university of texas at austin'), (2014, 'university of texas at austin'), (2015, 'university of texas at austin'), (2016, 'university of texas at austin'), (2017, 'university of texas at austin'), (2019, 'university of texas at austin'), (2020, 'university of texas at austin')) +all_us_institutions_year : ((2004, 'university of texas at austin'), (2005, 'princeton university'), (2007, 'university of texas at austin'), (2008, 'university of texas at austin'), (2009, 'university of texas at austin'), (2010, 'university of texas at austin'), (2011, 'university of texas at austin'), (2012, 'university of texas at austin'), (2014, 'university of texas at austin'), (2015, 'university of texas at austin'), (2016, 'university of texas at austin'), (2017, 'university of texas at austin'), (2018, 'university of texas at austin'), (2019, 'university of texas at austin'), (2020, 'university of texas at austin')) + +4/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : thomas +lastname : martinek +middlename : None +year_range : (2009,) +main_us_institutions_year : ((2009, 'university of north carolina at greensboro'),) +all_us_institutions_year : ((2009, 'university of north carolina at greensboro'),) + +firstname : thomas +lastname : martin +middlename : r +year_range : (1830, 2021) +main_us_institutions_year : ((1980, 'university of washington'), (1981, 'university of washington'), (1982, 'university of washington'), (1983, 'university of washington'), (1984, 'united states department of veterans affairs'), (1986, 'veterans health administration'), (1986, 'university of washington'), (1987, 'harvard university'), (1987, 'united states department of veterans affairs'), (1988, 'veterans health administration'), (1988, 'boston children s hospital'), (1988, 'university of washington'), (1988, 'harvard university'), (1989, 'veterans health administration'), (1989, 'university of washington'), (1990, 'veterans health administration'), (1990, 'harvard university'), (1991, 'united states department of veterans affairs'), (1992, 'veterans health administration'), (1992, 'university of washington'), (1993, 'boston children s hospital'), (1994, 'university of washington'), (1995, 'university of washington'), (1996, 'veterans health administration'), (1997, 'veterans health administration'), (1997, 'university of washington'), (1998, 'university of washington'), (1999, 'veterans health administration'), (1999, 'university of washington'), (2000, 'university of washington'), (2001, 'university of washington'), (2002, 'veterans health administration'), (2003, 'university of washington'), (2004, 'university of washington'), (2005, 'university of massachusetts medical school'), (2005, 'veterans health administration'), (2006, 'university of washington'), (2007, 'university of washington'), (2008, 'university of washington'), (2009, 'university of washington'), (2010, 'harvard university'), (2011, 'university of washington'), (2012, 'boston children s hospital'), (2014, 'boston children s hospital'), (2017, 'university of washington'), (2018, 'boston children s hospital'), (2018, 'university of washington'), (2019, 'veterans health administration'), (2020, 'university of washington')) +all_us_institutions_year : ((1980, 'university of washington'), (1981, 'university of washington'), (1982, 'university of washington'), (1983, 'university of washington'), (1984, 'united states department of veterans affairs'), (1986, 'university of washington'), (1986, 'veterans health administration'), (1987, 'harvard university'), (1987, 'united states department of veterans affairs'), (1988, 'boston children s hospital'), (1988, 'harvard university'), (1988, 'university of washington'), (1988, 'veterans health administration'), (1989, 'boston children s hospital'), (1989, 'harvard university'), (1989, 'united states department of veterans affairs'), (1989, 'university of washington'), (1989, 'veterans health administration'), (1990, 'harvard university'), (1990, 'veterans health administration'), (1991, 'united states department of veterans affairs'), (1991, 'university of washington'), (1992, 'university of washington'), (1992, 'veterans health administration'), (1993, 'boston children s hospital'), (1994, 'boston children s hospital'), (1994, 'brigham and women s hospital'), (1994, 'harvard university'), (1994, 'united states department of veterans affairs'), (1994, 'university of washington'), (1994, 'veterans health administration'), (1995, 'boston children s hospital'), (1995, 'university of washington'), (1996, 'harvard university'), (1996, 'university of washington'), (1996, 'veterans health administration'), (1997, 'boston children s hospital'), (1997, 'harvard university'), (1997, 'scripps research institute'), (1997, 'university of washington'), (1997, 'veterans health administration'), (1998, 'united states department of veterans affairs'), (1998, 'university of washington'), (1999, 'harvard university'), (1999, 'university of washington'), (1999, 'veterans health administration'), (2000, 'boston children s hospital'), (2000, 'harvard university'), (2000, 'united states department of veterans affairs'), (2000, 'university of washington'), (2000, 'veterans health administration'), (2001, 'united states department of veterans affairs'), (2001, 'university of washington'), (2001, 'veterans health administration'), (2002, 'tufts university'), (2002, 'united states department of veterans affairs'), (2002, 'university of washington'), (2002, 'veterans health administration'), (2003, 'university of washington'), (2004, 'united states department of veterans affairs'), (2004, 'university of washington'), (2004, 'veterans health administration'), (2005, 'university of massachusetts medical school'), (2005, 'university of washington'), (2005, 'veterans health administration'), (2006, 'harborview medical center'), (2006, 'university of washington'), (2006, 'veterans health administration'), (2007, 'university of washington'), (2007, 'veterans health administration'), (2008, 'university of washington'), (2009, 'university of washington'), (2010, 'harvard university'), (2010, 'university of washington'), (2010, 'veterans health administration'), (2011, 'boston children s hospital'), (2011, 'harvard university'), (2011, 'university of washington'), (2011, 'veterans health administration'), (2012, 'boston children s hospital'), (2012, 'harvard university'), (2012, 'university of washington'), (2013, 'boston children s hospital'), (2013, 'harvard university'), (2013, 'university of washington'), (2013, 'veterans health administration'), (2014, 'boston children s hospital'), (2016, 'boston children s hospital'), (2016, 'university of washington'), (2017, 'university of washington'), (2018, 'boston children s hospital'), (2018, 'university of washington'), (2019, 'veterans health administration'), (2020, 'university of washington')) + +4/10 positive, 1/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : robert +lastname : mahony +middlename : None +year_range : (1991,) +main_us_institutions_year : ((1991, 'catholic university of america'),) +all_us_institutions_year : ((1991, 'catholic university of america'),) + +firstname : robert +lastname : mahon +middlename : None +year_range : (2014, 2020) +main_us_institutions_year : ((2014, 'idaho state university'), (2015, 'university of wyoming'), (2017, 'university of wyoming'), (2018, 'university of wyoming'), (2018, 'university of arkansas'), (2019, 'university of new orleans'), (2019, 'university of arkansas'), (2020, 'university of new orleans')) +all_us_institutions_year : ((2014, 'idaho state university'), (2014, 'university of wyoming'), (2015, 'university of wyoming'), (2017, 'university of wyoming'), (2018, 'university of arkansas'), (2018, 'university of new orleans'), (2018, 'university of wyoming'), (2019, 'university of arkansas'), (2019, 'university of new orleans'), (2020, 'university of new orleans')) + +4/10 positive, 2/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : e +lastname : isaacsavage +middlename : paulette +year_range : (2008,) +main_us_institutions_year : ((2008, 'university of missouri saint louis'),) +all_us_institutions_year : ((2008, 'university of missouri saint louis'),) + +firstname : e +lastname : isaac +middlename : paulette +year_range : (2005, 2011) +main_us_institutions_year : ((2005, 'university of missouri'), (2011, 'university of missouri')) +all_us_institutions_year : ((2005, 'university of missouri'), (2011, 'university of missouri')) + +4/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : e +lastname : issacsavage +middlename : paulette +year_range : (2012,) +main_us_institutions_year : ((2012, 'university of missouri saint louis'),) +all_us_institutions_year : ((2012, 'university of missouri saint louis'),) + +firstname : e +lastname : isaacsavage +middlename : paulette +year_range : (2013, 2018) +main_us_institutions_year : ((2013, 'university of missouri st louis'), (2014, 'university of missouri st louis'), (2016, 'university of missouri st louis'), (2018, 'university of missouri st louis')) +all_us_institutions_year : ((2006, 'university of missouri st louis'), (2013, 'university of missouri st louis'), (2014, 'university of missouri st louis'), (2016, 'university of missouri st louis'), (2018, 'university of missouri st louis')) + +5/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : toni +lastname : mcneron +middlename : a h +year_range : (1994,) +main_us_institutions_year : ((1994, 'university of minnesota'),) +all_us_institutions_year : ((1994, 'university of minnesota'),) + +firstname : toni +lastname : mcnaron +middlename : a h +year_range : (1983, 2007) +main_us_institutions_year : ((1983, 'university of minnesota'), (1992, 'university of minnesota'), (2007, 'university of minnesota')) +all_us_institutions_year : ((1983, 'university of minnesota'), (1992, 'university of minnesota'), (2007, 'university of minnesota')) + +6/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : jurg +lastname : sigenthaler +middlename : k +year_range : (1996,) +main_us_institutions_year : ((1996, 'american university'),) +all_us_institutions_year : ((1996, 'american university'),) + +firstname : jurg +lastname : siegenthaler +middlename : k +year_range : (1982, 2003) +main_us_institutions_year : ((1976, 'american university'), (1979, 'american university'), (1982, 'american university'), (1988, 'american university'), (1989, 'american university'), (1990, 'american university'), (2001, 'american university')) +all_us_institutions_year : ((1976, 'american university'), (1979, 'american university'), (1982, 'american university'), (1988, 'american university'), (1989, 'american university'), (1990, 'american university'), (2001, 'american university'), (2002, 'american university')) + +7/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : minhha +lastname : trinh +middlename : t +year_range : (2011,) +main_us_institutions_year : ((2011, 'university of california berkeley'),) +all_us_institutions_year : ((2011, 'university of california berkeley'),) + +firstname : minh +lastname : tran +middlename : c +year_range : (2010, 2011) +main_us_institutions_year : ((2010, 'university of california los angeles'), (2011, 'university of california')) +all_us_institutions_year : ((2010, 'university of california los angeles'), (2011, 'university of california')) + +8/10 positive, 3/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : davidson +middlename : s +year_range : (2015,) +main_us_institutions_year : ((2015, 'michigan state university'),) +all_us_institutions_year : ((2015, 'michigan state university'),) + +firstname : william +lastname : davis +middlename : c +year_range : (1968, 2020) +main_us_institutions_year : ((1972, 'washington state university'), (1973, 'washington state university'), (1975, 'washington state university'), (1976, 'washington state university'), (1977, 'washington state university'), (1978, 'washington state university'), (1979, 'washington state university'), (1982, 'washington state university'), (1985, 'washington state university'), (1986, 'washington state university'), (1987, 'washington state university'), (1988, 'washington state university'), (1989, 'washington state university'), (1990, 'washington state university'), (1991, 'washington state university'), (1992, 'washington state university'), (1993, 'washington state university'), (1994, 'washington state university'), (1995, 'washington state university'), (1996, 'washington state university'), (1997, 'washington state university'), (1998, 'washington state university'), (1999, 'washington state university'), (2000, 'washington state university'), (2001, 'washington state university'), (2002, 'washington state university'), (2003, 'washington state university'), (2004, 'washington state university'), (2005, 'washington state university'), (2006, 'washington state university'), (2007, 'washington state university'), (2008, 'washington state university'), (2009, 'washington state university'), (2010, 'washington state university'), (2011, 'washington state university'), (2012, 'washington state university'), (2013, 'washington state university'), (2014, 'washington state university'), (2015, 'washington state university'), (2016, 'washington state university'), (2017, 'washington state university'), (2018, 'washington state university'), (2019, 'washington state university'), (2020, 'washington state university')) +all_us_institutions_year : ((1921, 'washington state university'), (1972, 'washington state university'), (1973, 'washington state university'), (1975, 'washington state university'), (1976, 'washington state university'), (1977, 'washington state university'), (1978, 'washington state university'), (1979, 'washington state university'), (1982, 'washington state university'), (1985, 'washington state university'), (1985, 'western university college of veterinary medicine'), (1986, 'washington state university'), (1987, 'washington state university'), (1988, 'washington state university'), (1989, 'washington state university'), (1990, 'washington state university'), (1991, 'washington state university'), (1992, 'washington state university'), (1993, 'washington state university'), (1994, 'washington state university'), (1995, 'washington state university'), (1996, 'agricultural research service'), (1996, 'washington state university'), (1997, 'washington state university'), (1998, 'washington state university'), (1999, 'washington state university'), (2000, 'washington state university'), (2001, 'washington state university'), (2002, 'washington state university'), (2003, 'washington state university'), (2004, 'washington state university'), (2005, 'washington state university'), (2006, 'washington state university'), (2007, 'washington state university'), (2008, 'washington state university'), (2009, 'washington state university'), (2010, 'washington state university'), (2011, 'washington state university'), (2012, 'washington state university'), (2013, 'washington state university'), (2014, 'washington state university'), (2015, 'washington state university'), (2016, 'washington state university'), (2017, 'washington state university'), (2018, 'washington state university'), (2019, 'washington state university'), (2020, 'washington state university')) + +8/10 positive, 4/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : william +lastname : rowe +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'university of south florida'),) +all_us_institutions_year : ((2010, 'university of south florida'),) + +firstname : william +lastname : roweton +middlename : e +year_range : (1974, 2000) +main_us_institutions_year : ((1976, 'madison area technical college'), (1977, 'madison area technical college'), (1978, 'madison area technical college')) +all_us_institutions_year : ((1976, 'madison area technical college'), (1977, 'madison area technical college'), (1978, 'madison area technical college')) + +8/10 positive, 5/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : roger +lastname : peterson +middlename : None +year_range : (2002,) +main_us_institutions_year : ((2002, 'antioch new england graduate school'),) +all_us_institutions_year : ((2002, 'antioch new england graduate school'),) + +firstname : roger +lastname : petersen +middlename : None +year_range : (1989, 2020) +main_us_institutions_year : ((1993, 'washington university in st louis'), (1997, 'university of washington'), (2001, 'university of chicago'), (2002, 'massachusetts institute of technology'), (2004, 'massachusetts institute of technology'), (2006, 'massachusetts institute of technology'), (2011, 'massachusetts institute of technology'), (2013, 'massachusetts institute of technology'), (2020, 'massachusetts institute of technology')) +all_us_institutions_year : ((1993, 'washington university in st louis'), (1997, 'university of washington'), (2001, 'university of chicago'), (2002, 'massachusetts institute of technology'), (2004, 'massachusetts institute of technology'), (2006, 'massachusetts institute of technology'), (2010, 'massachusetts institute of technology'), (2011, 'massachusetts institute of technology'), (2013, 'massachusetts institute of technology'), (2020, 'massachusetts institute of technology')) + +8/10 positive, 6/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : mark +lastname : doty +middlename : None +year_range : (2006,) +main_us_institutions_year : ((2006, 'university of houston'),) +all_us_institutions_year : ((2006, 'university of houston'),) + +firstname : mark +lastname : rothstein +middlename : a +year_range : (1984, 2021) +main_us_institutions_year : ((1988, 'university of houston'), (1989, 'university of houston'), (1990, 'university of houston'), (1992, 'university of houston law center'), (1993, 'university of houston'), (1995, 'university of houston'), (1995, 'university of maryland baltimore'), (1996, 'university of houston'), (1997, 'university of houston'), (1998, 'university of houston'), (1999, 'university of houston'), (2000, 'university of houston'), (2001, 'university of louisville'), (2002, 'university of louisville'), (2003, 'university of louisville'), (2004, 'university of louisville'), (2005, 'university of louisville'), (2006, 'university of louisville'), (2007, 'university of louisville'), (2008, 'university of louisville'), (2009, 'university of louisville'), (2010, 'university of louisville'), (2011, 'university of louisville'), (2012, 'university of louisville'), (2013, 'university of louisville'), (2014, 'university of louisville'), (2015, 'university of louisville'), (2016, 'university of louisville'), (2017, 'university of louisville'), (2018, 'university of louisville'), (2019, 'university of louisville'), (2020, 'university of louisville')) +all_us_institutions_year : ((1988, 'university of houston'), (1989, 'university of houston'), (1990, 'university of houston'), (1992, 'university of houston law center'), (1993, 'university of houston'), (1995, 'university of houston'), (1995, 'university of maryland baltimore'), (1996, 'university of houston'), (1996, 'university of louisville'), (1997, 'university of houston'), (1997, 'university of louisville'), (1998, 'university of houston'), (1998, 'university of louisville'), (1999, 'university of houston'), (1999, 'university of louisville'), (2000, 'university of houston'), (2000, 'university of louisville'), (2001, 'university of houston'), (2001, 'university of louisville'), (2002, 'university of louisville'), (2003, 'university of louisville'), (2004, 'university of louisville'), (2005, 'louisiana state university'), (2005, 'university of louisville'), (2005, 'university of pittsburgh'), (2006, 'university of louisville'), (2007, 'university of louisville'), (2008, 'university of louisville'), (2009, 'university of louisville'), (2010, 'university of louisville'), (2011, 'university of louisville'), (2012, 'university of louisville'), (2013, 'university of louisville'), (2014, 'university of louisville'), (2015, 'university of louisville'), (2016, 'university of louisville'), (2017, 'university of louisville'), (2018, 'university of louisville'), (2019, 'university of louisville'), (2020, 'university of louisville'), (2021, 'university of louisville')) + +8/10 positive, 7/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : dominic +lastname : thomas +middlename : None +year_range : (2007,) +main_us_institutions_year : ((2007, 'university of california los angeles'),) +all_us_institutions_year : ((2007, 'university of california los angeles'),) + +firstname : r +lastname : thomas +middlename : murray +year_range : (1962, 2016) +main_us_institutions_year : ((1962, 'university of california santa barbara'), (1966, 'university of california santa barbara'), (1970, 'university of california santa barbara'), (1973, 'university of california santa barbara'), (1977, 'university of california santa barbara'), (1984, 'university of california santa barbara'), (1985, 'university of california santa barbara'), (1986, 'university of california santa barbara'), (1987, 'university of california santa barbara'), (1988, 'university of california santa barbara'), (1989, 'university of california santa barbara'), (1992, 'university of california santa barbara'), (1993, 'university of california santa barbara'), (1995, 'university of california santa barbara'), (1998, 'university of california santa barbara'), (1999, 'university of california santa barbara'), (2002, 'university of california santa barbara'), (2003, 'university of california santa barbara'), (2004, 'university of california santa barbara'), (2005, 'university of california santa barbara'), (2013, 'university of california santa barbara'), (2014, 'university of california santa barbara'), (2015, 'university of california santa barbara'), (2016, 'university of california santa barbara')) +all_us_institutions_year : ((1962, 'university of california santa barbara'), (1966, 'university of california santa barbara'), (1970, 'university of california santa barbara'), (1973, 'university of california santa barbara'), (1977, 'university of california santa barbara'), (1980, 'university of california santa barbara'), (1984, 'university of california santa barbara'), (1985, 'university of california santa barbara'), (1986, 'university of california santa barbara'), (1987, 'university of california santa barbara'), (1988, 'university of california santa barbara'), (1989, 'university of california santa barbara'), (1992, 'university of california santa barbara'), (1993, 'university of california santa barbara'), (1995, 'university of california santa barbara'), (1998, 'university of california santa barbara'), (1999, 'university of california santa barbara'), (2002, 'university of california santa barbara'), (2003, 'university of california santa barbara'), (2004, 'university of california santa barbara'), (2005, 'university of california santa barbara'), (2013, 'university of california santa barbara'), (2014, 'university of california santa barbara'), (2015, 'university of california santa barbara'), (2016, 'university of california santa barbara')) + +8/10 positive, 8/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : eric +lastname : wagner +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'florida international university'),) +all_us_institutions_year : ((2012, 'florida international university'),) + +firstname : eric +lastname : wagner +middlename : a +year_range : (1984, 1985) +main_us_institutions_year : ((1984, 'ohio university'), (1985, 'ohio university')) +all_us_institutions_year : ((1984, 'ohio university'), (1985, 'ohio university')) + +8/10 positive, 9/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : holly +lastname : miller +middlename : None +year_range : (2008,) +main_us_institutions_year : ((2008, 'sam houston state university'),) +all_us_institutions_year : ((2008, 'sam houston state university'),) + +firstname : holly +lastname : miller +middlename : ventura +year_range : (2007, 2021) +main_us_institutions_year : ((2007, 'university of texas at san antonio'), (2008, 'university of texas at san antonio'), (2009, 'university of texas at san antonio'), (2010, 'university of texas at san antonio'), (2011, 'university of texas at san antonio'), (2012, 'university of texas at san antonio'), (2013, 'university of texas at san antonio'), (2015, 'university of north florida'), (2016, 'university of north florida'), (2017, 'university of north florida'), (2019, 'university of north florida'), (2020, 'university of north florida'), (2021, 'university of north florida')) +all_us_institutions_year : ((2007, 'university of texas at san antonio'), (2008, 'university of texas at san antonio'), (2009, 'university of texas at san antonio'), (2010, 'university of texas at san antonio'), (2011, 'university of texas at san antonio'), (2012, 'university of texas at san antonio'), (2013, 'university of texas at san antonio'), (2015, 'university of north florida'), (2016, 'university of north florida'), (2017, 'university of north florida'), (2018, 'university of north florida'), (2019, 'university of north florida'), (2020, 'university of north florida'), (2021, 'university of north florida')) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : michele +lastname : boyer +middlename : c +year_range : (2014,) +main_us_institutions_year : ((2014, 'indiana state university'),) +all_us_institutions_year : ((2014, 'indiana state university'),) + +firstname : m +lastname : boyer +middlename : christine +year_range : (1997, 2017) +main_us_institutions_year : ((1997, 'princeton university'), (2017, 'princeton university')) +all_us_institutions_year : ((1997, 'princeton university'), (2017, 'princeton university')) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : sara +lastname : berg +middlename : van den +year_range : (2014,) +main_us_institutions_year : ((2014, 'saint louis university'),) +all_us_institutions_year : ((2014, 'saint louis university'),) + +firstname : sarah +lastname : berg +middlename : van den +year_range : (2017, 2021) +main_us_institutions_year : ((2017, 'columbia university'), (2021, 'columbia university')) +all_us_institutions_year : ((2017, 'columbia university'), (2021, 'columbia university')) + +8/10 positive, 10/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : james +lastname : king +middlename : None +year_range : (2004,) +main_us_institutions_year : ((2004, 'university of south florida'),) +all_us_institutions_year : ((2004, 'university of south florida'),) + +firstname : james +lastname : king +middlename : e +year_range : (2000, 2017) +main_us_institutions_year : ((2000, 'samford university'), (2001, 'samford university'), (2003, 'samford university'), (2004, 'samford university'), (2005, 'samford university'), (2008, 'university of alabama'), (2009, 'university of alabama'), (2010, 'university of alabama'), (2012, 'university of alabama'), (2014, 'university of alabama'), (2017, 'university of alabama')) +all_us_institutions_year : ((2000, 'samford university'), (2001, 'samford university'), (2003, 'samford university'), (2004, 'samford university'), (2005, 'samford university'), (2008, 'university of alabama'), (2009, 'university of alabama'), (2010, 'university of alabama'), (2012, 'university of alabama'), (2014, 'university of alabama'), (2017, 'university of alabama'), (2020, 'university of alabama')) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : wang +middlename : derwei +year_range : (2000,) +main_us_institutions_year : ((2000, 'columbia university'),) +all_us_institutions_year : ((2000, 'columbia university'),) + +firstname : david +lastname : wang +middlename : derwei +year_range : (1993, 2014) +main_us_institutions_year : ((1993, 'northwestern university'), (2014, 'northwestern university')) +all_us_institutions_year : ((1993, 'northwestern university'), (2014, 'northwestern university')) + +8/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : hugh +lastname : graham +middlename : davis +year_range : (1994,) +main_us_institutions_year : ((1994, 'vanderbilt university'),) +all_us_institutions_year : ((1994, 'vanderbilt university'),) + +firstname : hugh +lastname : graham +middlename : davis +year_range : (1968, 2004) +main_us_institutions_year : ((1997, 'university of kentucky'), (2001, 'university of south alabama')) +all_us_institutions_year : ((1997, 'university of kentucky'), (2001, 'university of south alabama')) + +9/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : melbourne +lastname : hovell +middlename : None +year_range : (1992,) +main_us_institutions_year : ((1992, 'university of california san diego and san diego state university'),) +all_us_institutions_year : ((1992, 'university of california san diego and san diego state university'),) + +firstname : melbourne +lastname : hovell +middlename : f +year_range : (2000, 2007) +main_us_institutions_year : ((2007, 'centers for disease control and prevention'),) +all_us_institutions_year : ((2007, 'centers for disease control and prevention'),) + +10/10 positive, 11/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : rosa +lastname : eberly +middlename : None +year_range : (2012,) +main_us_institutions_year : ((2012, 'pennsylvania state university'),) +all_us_institutions_year : ((2012, 'pennsylvania state university'),) + +firstname : rosa +lastname : eberly +middlename : a +year_range : (1993, 1999) +main_us_institutions_year : ((1997, 'university of texas at austin'), (1999, 'university of texas at austin')) +all_us_institutions_year : ((1997, 'university of texas at austin'), (1999, 'university of texas at austin')) + +10/10 positive, 12/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : anthony +lastname : weston +middlename : None +year_range : (1994,) +main_us_institutions_year : ((1994, 'stony brook university'),) +all_us_institutions_year : ((1994, 'stony brook university'),) + +firstname : anthony +lastname : weston +middlename : None +year_range : (1985, 2015) +main_us_institutions_year : ((1992, 'state university of new york system'), (1996, 'elon university'), (1998, 'elon university'), (2002, 'elon university'), (2004, 'elon university'), (2009, 'elon university'), (2011, 'elon university'), (2013, 'elon university'), (2015, 'elon university')) +all_us_institutions_year : ((1992, 'state university of new york system'), (1996, 'elon university'), (1998, 'elon university'), (2002, 'elon university'), (2004, 'elon university'), (2009, 'elon university'), (2011, 'elon university'), (2013, 'elon university'), (2015, 'elon university')) + +10/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : eiko +lastname : ikegami +middlename : None +year_range : (2010,) +main_us_institutions_year : ((2010, 'new school university'),) +all_us_institutions_year : ((2010, 'new school university'),) + +firstname : eiko +lastname : ikegami +middlename : None +year_range : (1970, 2005) +main_us_institutions_year : ((1970, 'the new school'), (2005, 'the new school')) +all_us_institutions_year : ((1970, 'the new school'), (2005, 'the new school')) + +11/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : arietta +lastname : slade +middlename : None +year_range : (2000,) +main_us_institutions_year : ((2000, 'city university of new york'),) +all_us_institutions_year : ((2000, 'city university of new york'),) + +firstname : arietta +lastname : slade +middlename : None +year_range : (2004, 2021) +main_us_institutions_year : ((2004, 'yale university'), (2013, 'yale university'), (2014, 'yale university'), (2018, 'yale university'), (2019, 'yale university'), (2020, 'yale university'), (2021, 'yale university')) +all_us_institutions_year : ((2004, 'yale university'), (2013, 'yale university'), (2014, 'city university of new york'), (2014, 'yale university'), (2018, 'yale university'), (2019, 'yale university'), (2020, 'yale university'), (2021, 'yale university')) + +12/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +firstname : david +lastname : britt +middlename : None +year_range : (2001,) +main_us_institutions_year : ((2001, 'wayne state university'),) +all_us_institutions_year : ((2001, 'wayne state university'),) + +firstname : david +lastname : britt +middlename : w +year_range : (1983, 1988) +main_us_institutions_year : ((1983, 'new york institute of technology'), (1987, 'university of maryland college park'), (1988, 'university of maryland eastern shore')) +all_us_institutions_year : ((1983, 'new york institute of technology'), (1987, 'university of maryland college park'), (1988, 'university of maryland eastern shore')) + +12/10 positive, 13/10 negative +Do these records refer to the same thing? +(y)es / (n)o / (u)nsure / (f)inished / (p)revious +Finished labeling +Done in 223.41967072089514 minutes. diff --git a/src/dataprep/temp/write_csv_links_advisors.log b/src/dataprep/temp/write_csv_links_advisors.log index ebb1201..6985401 100644 --- a/src/dataprep/temp/write_csv_links_advisors.log +++ b/src/dataprep/temp/write_csv_links_advisors.log @@ -1,18 +1,23 @@ -Fields where files have the years in the name: dict_keys(['political science', 'geology', 'biology']) -Fields where files do not have the years in the name: dict_keys(['mathematics', 'computer science', 'engineering', 'economics', 'geography', 'physics', 'chemistry', 'sociology', 'environmental science', 'psychology']) +Fields where files have the years in the name: dict_keys(['environmental science', 'chemistry', 'philosophy', 'sociology', 'engineering', 'mathematics', 'history', 'political science', 'physics', 'materials science', 'geology', 'geography', 'business', 'biology', 'computer science', 'economics', 'art', 'psychology']) +Fields where files do not have the years in the name: dict_keys([]) +Writing field environmental science +Writing field chemistry +Writing field philosophy +Writing field sociology +Writing field engineering +Writing field mathematics +Writing field history Writing field political science -The links of the iteration of field geology are already in the database. -The links of the iteration of field biology are already in the database. -The links of the iteration of field mathematics are already in the database. -The links of the iteration of field computer science are already in the database. -The links of the iteration of field engineering are already in the database. -The links of the iteration of field economics are already in the database. -The links of the iteration of field geography are already in the database. -The links of the iteration of field physics are already in the database. -The links of the iteration of field chemistry are already in the database. -The links of the iteration of field sociology are already in the database. -The links of the iteration of field environmental science are already in the database. -The links of the iteration of field psychology are already in the database. +Writing field physics +Writing field materials science +Writing field geology +Writing field geography +Writing field business +Writing field biology +Writing field computer science +Writing field economics +Writing field art +Writing field psychology Running ANALYZE... -Done in 0.008159295717875163 minutes. +Done in 0.01930009126663208 minutes. diff --git a/src/dataprep/temp/write_csv_links_graduates.log b/src/dataprep/temp/write_csv_links_graduates.log index e1f6f43..5f417a9 100644 --- a/src/dataprep/temp/write_csv_links_graduates.log +++ b/src/dataprep/temp/write_csv_links_graduates.log @@ -1,3 +1,5 @@ +Fields where files have the years in the name: dict_keys([]) +Fields where files do not have the years in the name: dict_keys(['political science', 'biology', 'geology', 'psychology', 'physics', 'engineering', 'economics', 'environmental science', 'computer science']) The links of the iteration of field political science are already in the database. The links of the iteration of field biology are already in the database. The links of the iteration of field geology are already in the database. @@ -9,4 +11,4 @@ The links of the iteration of field environmental science are already in the dat The links of the iteration of field computer science are already in the database. Running ANALYZE... -Done in 0.004560442765553793 minutes. +Done in 0.0038335204124450685 minutes. diff --git a/src/dataprep/temp/write_csv_links_graduates_addedfields.log b/src/dataprep/temp/write_csv_links_graduates_addedfields.log new file mode 100644 index 0000000..c0b4b32 --- /dev/null +++ b/src/dataprep/temp/write_csv_links_graduates_addedfields.log @@ -0,0 +1,11 @@ +Fields where files have the years in the name: dict_keys(['business', 'philosophy', 'history', 'medicine', 'art', 'materials science']) +Fields where files do not have the years in the name: dict_keys([]) +Writing field business +Writing field philosophy +Writing field history +Writing field medicine +Writing field art +Writing field materials science +Running ANALYZE... + +Done in 0.003022166093190511 minutes.