diff --git a/output/quality_linking.pdf b/output/quality_linking.pdf
index 5c0105e..9401ad1 100644
Binary files a/output/quality_linking.pdf and b/output/quality_linking.pdf differ
diff --git a/src/dataprep/main/link/prep_linked_data.py b/src/dataprep/main/link/prep_linked_data.py
index 2e58946..97c5229 100644
--- a/src/dataprep/main/link/prep_linked_data.py
+++ b/src/dataprep/main/link/prep_linked_data.py
@@ -31,7 +31,6 @@
 import pdb 
 import argparse
 
-# ## Arguments
 # ## Arguments
 parser = argparse.ArgumentParser(description = 'Inputs for author_collab')
 parser.add_argument("--filter_trainname", 
@@ -145,7 +144,7 @@
 
 # for now, do not condition on certain time distance between 
 # graduation year and whenever the supervisor has a publication. 
-# TODO: do this after gaining some insights in the analysis
+# Do this on the fly after gaining some insights in the analysis 
 
 con.execute("CREATE UNIQUE INDEX idx_cla_AuthorIdrelid ON current_links_advisors (AuthorId ASC, relationship_id ASC)")
 con.execute("CREATE UNIQUE INDEX idx_cla_relid ON current_links_advisors (relationship_id ASC)") # this is also a way to make sure there are not multiple links per goid
diff --git a/src/dataprep/main/reports/quality_linking.Rmd b/src/dataprep/main/reports/quality_linking.Rmd
index ac93083..17fc99e 100644
--- a/src/dataprep/main/reports/quality_linking.Rmd
+++ b/src/dataprep/main/reports/quality_linking.Rmd
@@ -22,10 +22,26 @@ lapply(packages, library, character.only = TRUE)
 
 datapath <- "/mnt/ssd/"
 db_file  <- paste0(datapath, "AcademicGraph/AcademicGraph.sqlite")
-select_fields <- c("physics", "biology", "chemistry", "sociology",
-                    "economics", "political science", "psychology", 
-                    "mathematics", "geography", "geology", "engineering",
-                    "computer science", "environmental science") # fields currently matched 
+select_fields <- c("art",
+                   "biology",
+                   "business",
+                   "chemistry",
+                   "computer science" ,
+                   "economics",
+                   "engineering",
+                   "environmental science",
+                   "geography",
+                   "geology" ,
+                   "history",
+                   "materials science",
+                   "mathematics",
+                   "medicine",
+                   "philosophy",
+                   "physics",
+                   "political science",
+                   "psychology" ,
+                   "sociology") # all fields are currently matched
+
 
 
 date_method_change <- ymd("2022-07-01") # after summer we extended the sampling period and added more features 
diff --git a/src/dataprep/pipeline.sh b/src/dataprep/pipeline.sh
index 7a19d5e..f8c7ac5 100644
--- a/src/dataprep/pipeline.sh
+++ b/src/dataprep/pipeline.sh
@@ -83,6 +83,10 @@ Rscript -e "rmarkdown::render('$script_path/reports/sample_size_linking.Rmd', ou
 # ## 1. Link graduates to MAG
 bash $script_path/link/graduates.sh $logfile_path
 
+# Christoph retrained with with the following options:
+# --train_name "christoph_degree0" --keepyears "19852015"
+# need to run the write_csv_links script with these options as well
+# to get all links into db
 python -m $script_path.link.write_csv_links --linking_type "graduates" --train_name "christoph_fielddegree0" \
     &> $logfile_path/write_csv_links_graduates.log
 
diff --git a/src/dataprep/temp/prep_linked_data.log b/src/dataprep/temp/prep_linked_data.log
index 92744be..7c15b15 100644
--- a/src/dataprep/temp/prep_linked_data.log
+++ b/src/dataprep/temp/prep_linked_data.log
@@ -1,4 +1,4 @@
-Start time: 1664470032.5248213 
+Start time: 1670232570.3368495 
 
 Using the following DocTypes for citations: ('Journal', 'Book', 'BookChapter', 'Conference')... 
 
@@ -8,16 +8,16 @@ where_stmt_iterations is
     
 current_links for graduates
 current_links for advisors
-Time elapsed: 0.33170623779296876 minutes 
+Time elapsed: 0.2818102161089579 minutes 
 
 Making author_citations... 
 
-Time elapsed: 7.353962099552154 minutes 
+Time elapsed: 11.818827704588573 minutes 
 
 Making author_output... 
 
-Time elapsed: 22.382010038693746 minutes 
+Time elapsed: 26.798788146177927 minutes 
 
 Running ANALYZE... 
 
-Done in 22.38204313913981 minutes.
+Done in 26.79902730782827 minutes.
diff --git a/src/dataprep/temp/write_csv_links_graduates.log b/src/dataprep/temp/write_csv_links_graduates.log
index e1f6f43..5f417a9 100644
--- a/src/dataprep/temp/write_csv_links_graduates.log
+++ b/src/dataprep/temp/write_csv_links_graduates.log
@@ -1,3 +1,5 @@
+Fields where files have the years in the name: dict_keys([])
+Fields where files do not have the years in the name: dict_keys(['political science', 'biology', 'geology', 'psychology', 'physics', 'engineering', 'economics', 'environmental science', 'computer science'])
 The links of the iteration of field political science are already in the database.
 The links of the iteration of field biology are already in the database.
 The links of the iteration of field geology are already in the database.
@@ -9,4 +11,4 @@ The links of the iteration of field environmental science are already in the dat
 The links of the iteration of field computer science are already in the database.
 Running ANALYZE... 
 
-Done in 0.004560442765553793 minutes.
+Done in 0.0038335204124450685 minutes.
diff --git a/src/dataprep/temp/write_csv_links_graduates_addedfields.log b/src/dataprep/temp/write_csv_links_graduates_addedfields.log
new file mode 100644
index 0000000..c0b4b32
--- /dev/null
+++ b/src/dataprep/temp/write_csv_links_graduates_addedfields.log
@@ -0,0 +1,11 @@
+Fields where files have the years in the name: dict_keys(['business', 'philosophy', 'history', 'medicine', 'art', 'materials science'])
+Fields where files do not have the years in the name: dict_keys([])
+Writing field business
+Writing field philosophy
+Writing field history
+Writing field medicine
+Writing field art
+Writing field materials science
+Running ANALYZE... 
+
+Done in 0.003022166093190511 minutes.