FIX Corrected bug in script that generates the test data in SDP 2015.

Cocophotos · Jan 11, 2015 · 1a9a589 · 1a9a589
1 parent 661239f
commit 1a9a589
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 16 deletions.
diff --git a/scripts_srl/train_test_semantic_parser.sh b/scripts_srl/train_test_semantic_parser.sh
@@ -90,12 +90,14 @@ then
         #file_train_orig=${path_data}/${language}_${formalism}_augmented_train.sdp
         file_train_orig=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
         files_test_orig[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
-        #files_test_orig[1]=${path_data}/${language}_${formalism}_augmented_test.sdp
+        files_test_orig[1]=${path_data}/${language}_id_${formalism}_augmented_test.sdp
+        files_test_orig[2]=${path_data}/${language}_ood_${formalism}_augmented_test.sdp
 
         #file_train=${path_data}/${language}_ctags_${formalism}_augmented_train.sdp
         file_train=${path_data}/${language}_ctags_${formalism}_augmented_train+dev.sdp
         files_test[0]=${path_data}/${language}_ctags_${formalism}_augmented_dev.sdp
-        #files_test[1]=${path_data}/${language}_ctags_${formalism}_augmented_test.sdp
+        files_test[1]=${path_data}/${language}_id_ctags_${formalism}_augmented_test.sdp
+        files_test[2]=${path_data}/${language}_ood_ctags_${formalism}_augmented_test.sdp
 
         rm -f ${file_train}
         awk 'NF>0{OFS="\t";$4=substr($4,0,2);print}NF==0{print}' ${file_train_orig} \
@@ -115,11 +117,18 @@ then
             awk 'NF>0{OFS="\t";$4=substr($4,0,2);print}NF==0{print}' ${file_test_orig}.unaugmented \
                 > ${file_test}.unaugmented
         done
+    elif [ "$language" == "english" ]
+    then
+        #file_train=${path_data}/${language}_${formalism}_augmented_train.sdp
+        file_train=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
+        files_test[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
+        files_test[1]=${path_data}/${language}_id_${formalism}_augmented_test.sdp
+        files_test[2]=${path_data}/${language}_ood_${formalism}_augmented_test.sdp
     else
         #file_train=${path_data}/${language}_${formalism}_augmented_train.sdp
         file_train=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
         files_test[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
-        #files_test[1]=${path_data}/${language}_${formalism}_augmented_test.sdp
+        files_test[1]=${path_data}/${language}_id_${formalism}_augmented_test.sdp
     fi
 else
     if [ "$language" == "english" ]
@@ -203,6 +212,7 @@ then
         if [ "$file_format" == "sdp" ]
         then
             python remove_augmented.py ${file_pruner_prediction} > ${file_pruner_prediction}.unaugmented
+            python remove_augmented.py ${file_test} > ${file_test}.unaugmented
             sh evaluator/toolkit/run.sh Scorer ${file_test}.unaugmented ${file_pruner_prediction}.unaugmented representation=${formalism} \
                 >> ${file_pruner_results}
             cat ${file_pruner_results}
@@ -323,6 +333,7 @@ then
         if [ "$file_format" == "sdp" ]
         then
             python remove_augmented.py ${file_prediction} > ${file_prediction}.unaugmented
+            python remove_augmented.py ${file_test} > ${file_test}.unaugmented
             sh evaluator/toolkit/run.sh Scorer ${file_test}.unaugmented ${file_prediction}.unaugmented representation=${formalism} \
                 >> ${file_results}
             cat ${file_results}

diff --git a/semeval2015_data/scripts/generate_all_splits.sh b/semeval2015_data/scripts/generate_all_splits.sh
@@ -2,33 +2,40 @@
 
 # Folder where the data will be placed.
 data_folder="`cd $(dirname $0);cd ..;pwd`"
-generate_test_splits=false
+generate_test_splits=true
+blind_test=true
 
 for language in english czech chinese
 do
     if [ "${language}" == "english" ]
     then
         prefix=en
         formalisms=( "dm" "pas" "psd" )
-        train_companion=../train/${prefix}.sb.bn.cpn
-        test_companion=../test/${prefix}.sb.bn.cpn
+        use_companion=true
+	domains=( "id" "ood" )
     elif [ "${language}" == "czech" ]
     then
         prefix=cs
         formalisms=( "psd" )
-        train_companion=""
-        test_companion=""
+        use_companion=false
+	domains=( "id" "ood" )
     elif [ "${language}" == "chinese" ]
     then
         prefix=cz
         formalisms=( "pas" )
-        train_companion=""
-        test_companion=""
+        use_companion=false
+	domains=( "id" )
     fi
 
     for formalism in "${formalisms[@]}"
     do
         echo "Generating splits for ${language} ${formalism}..."
+	if ${use_companion}
+	then
+	    train_companion=../train/${prefix}.sb.bn.cpn
+	else
+	    train_companion=""
+	fi
         python augment_with_companion_data.py \
             ../train/${prefix}.${formalism}.sdp \
             ${train_companion} > \
@@ -53,13 +60,28 @@ do
 
         if ${generate_test_splits}
         then
-            cp ../test/${prefix}.${formalism}.sdp \
-                ${path_data}/${language}_${formalism}_augmented_test.sdp.unaugmented
+	    if ${blind_test}
+	    then
+		extension=tt
+	    else
+		extension=sdp
+	    fi
+	    for domain in "${domains[@]}"
+	    do
+		cp ../test/${prefix}.${domain}.${formalism}.${extension} \
+                    ${path_data}/${language}_${domain}_${formalism}_augmented_test.sdp.unaugmented
 
-            python augment_with_companion_data.py \
-                ../test/${prefix}.${formalism}.sdp \
-                ${test_companion} > \
-                ${path_data}/${language}_${formalism}_augmented_test.sdp
+		if ${use_companion}
+		then
+		    test_companion=../test/${prefix}.${domain}.sb.bn.cpn
+		else
+		    test_companion=""
+		fi
+		python augment_with_companion_data.py \
+                    ../test/${prefix}.${domain}.${formalism}.${extension} \
+                    ${test_companion} > \
+                    ${path_data}/${language}_${domain}_${formalism}_augmented_test.sdp
+	    done
         fi
     done
 done