Skip to content

Commit

Permalink
FIX Corrected bug in script that generates the test data in SDP 2015.
Browse files Browse the repository at this point in the history
  • Loading branch information
André T. Martins committed Jan 11, 2015
1 parent 661239f commit 1a9a589
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 16 deletions.
17 changes: 14 additions & 3 deletions scripts_srl/train_test_semantic_parser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,14 @@ then
#file_train_orig=${path_data}/${language}_${formalism}_augmented_train.sdp
file_train_orig=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
files_test_orig[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
#files_test_orig[1]=${path_data}/${language}_${formalism}_augmented_test.sdp
files_test_orig[1]=${path_data}/${language}_id_${formalism}_augmented_test.sdp
files_test_orig[2]=${path_data}/${language}_ood_${formalism}_augmented_test.sdp

#file_train=${path_data}/${language}_ctags_${formalism}_augmented_train.sdp
file_train=${path_data}/${language}_ctags_${formalism}_augmented_train+dev.sdp
files_test[0]=${path_data}/${language}_ctags_${formalism}_augmented_dev.sdp
#files_test[1]=${path_data}/${language}_ctags_${formalism}_augmented_test.sdp
files_test[1]=${path_data}/${language}_id_ctags_${formalism}_augmented_test.sdp
files_test[2]=${path_data}/${language}_ood_ctags_${formalism}_augmented_test.sdp

rm -f ${file_train}
awk 'NF>0{OFS="\t";$4=substr($4,0,2);print}NF==0{print}' ${file_train_orig} \
Expand All @@ -115,11 +117,18 @@ then
awk 'NF>0{OFS="\t";$4=substr($4,0,2);print}NF==0{print}' ${file_test_orig}.unaugmented \
> ${file_test}.unaugmented
done
elif [ "$language" == "english" ]
then
#file_train=${path_data}/${language}_${formalism}_augmented_train.sdp
file_train=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
files_test[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
files_test[1]=${path_data}/${language}_id_${formalism}_augmented_test.sdp
files_test[2]=${path_data}/${language}_ood_${formalism}_augmented_test.sdp
else
#file_train=${path_data}/${language}_${formalism}_augmented_train.sdp
file_train=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
files_test[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
#files_test[1]=${path_data}/${language}_${formalism}_augmented_test.sdp
files_test[1]=${path_data}/${language}_id_${formalism}_augmented_test.sdp
fi
else
if [ "$language" == "english" ]
Expand Down Expand Up @@ -203,6 +212,7 @@ then
if [ "$file_format" == "sdp" ]
then
python remove_augmented.py ${file_pruner_prediction} > ${file_pruner_prediction}.unaugmented
python remove_augmented.py ${file_test} > ${file_test}.unaugmented
sh evaluator/toolkit/run.sh Scorer ${file_test}.unaugmented ${file_pruner_prediction}.unaugmented representation=${formalism} \
>> ${file_pruner_results}
cat ${file_pruner_results}
Expand Down Expand Up @@ -323,6 +333,7 @@ then
if [ "$file_format" == "sdp" ]
then
python remove_augmented.py ${file_prediction} > ${file_prediction}.unaugmented
python remove_augmented.py ${file_test} > ${file_test}.unaugmented
sh evaluator/toolkit/run.sh Scorer ${file_test}.unaugmented ${file_prediction}.unaugmented representation=${formalism} \
>> ${file_results}
cat ${file_results}
Expand Down
48 changes: 35 additions & 13 deletions semeval2015_data/scripts/generate_all_splits.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,40 @@

# Folder where the data will be placed.
data_folder="`cd $(dirname $0);cd ..;pwd`"
generate_test_splits=false
generate_test_splits=true
blind_test=true

for language in english czech chinese
do
if [ "${language}" == "english" ]
then
prefix=en
formalisms=( "dm" "pas" "psd" )
train_companion=../train/${prefix}.sb.bn.cpn
test_companion=../test/${prefix}.sb.bn.cpn
use_companion=true
domains=( "id" "ood" )
elif [ "${language}" == "czech" ]
then
prefix=cs
formalisms=( "psd" )
train_companion=""
test_companion=""
use_companion=false
domains=( "id" "ood" )
elif [ "${language}" == "chinese" ]
then
prefix=cz
formalisms=( "pas" )
train_companion=""
test_companion=""
use_companion=false
domains=( "id" )
fi

for formalism in "${formalisms[@]}"
do
echo "Generating splits for ${language} ${formalism}..."
if ${use_companion}
then
train_companion=../train/${prefix}.sb.bn.cpn
else
train_companion=""
fi
python augment_with_companion_data.py \
../train/${prefix}.${formalism}.sdp \
${train_companion} > \
Expand All @@ -53,13 +60,28 @@ do

if ${generate_test_splits}
then
cp ../test/${prefix}.${formalism}.sdp \
${path_data}/${language}_${formalism}_augmented_test.sdp.unaugmented
if ${blind_test}
then
extension=tt
else
extension=sdp
fi
for domain in "${domains[@]}"
do
cp ../test/${prefix}.${domain}.${formalism}.${extension} \
${path_data}/${language}_${domain}_${formalism}_augmented_test.sdp.unaugmented

python augment_with_companion_data.py \
../test/${prefix}.${formalism}.sdp \
${test_companion} > \
${path_data}/${language}_${formalism}_augmented_test.sdp
if ${use_companion}
then
test_companion=../test/${prefix}.${domain}.sb.bn.cpn
else
test_companion=""
fi
python augment_with_companion_data.py \
../test/${prefix}.${domain}.${formalism}.${extension} \
${test_companion} > \
${path_data}/${language}_${domain}_${formalism}_augmented_test.sdp
done
fi
done
done

0 comments on commit 1a9a589

Please sign in to comment.