Skip to content

Commit

Permalink
ENH: Added scripts to create the data for reproducing the SemEval 201…
Browse files Browse the repository at this point in the history
…4 paper.
  • Loading branch information
andre-martins committed Jun 24, 2014
1 parent 2499854 commit f6a7340
Show file tree
Hide file tree
Showing 9 changed files with 68,197 additions and 10 deletions.
15 changes: 5 additions & 10 deletions scripts_srl/train_test_semantic_parser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ then
allow_unseen_predicates=false # This should be irrelevant.
use_predicate_senses=false #true
formalism=$7 #pcedt #pas #dm
subfolder=sdp/${formalism}
subfolder=semeval2014_data/${formalism}
else
allow_self_loops=true
allow_root_predicate=false
Expand Down Expand Up @@ -84,15 +84,10 @@ if [ "$language" == "english" ]
then
if [ "$file_format" == "sdp" ]
then
file_train=${path_data}/${formalism}_augmented_train+dev0.sdp
#file_train=${path_data}/${formalism}_augmented_train+dev0+dev.sdp
files_test[0]=${path_data}/${formalism}_augmented_dev.sdp
files_test[1]=${path_data}/${formalism}_augmented_test.sdp
#files_test[1]=${path_data}/${formalism}_augmented_test_blind.sdp
#files_test[1]=${path_data}/${formalism}_augmented_train+dev0.sdp
#files_test[0]=${path_data}/${formalism}_augmented_dev0.sdp
#files_test[1]=${path_data}/${formalism}_augmented_dev.sdp
#files_test[2]=${path_data}/${formalism}_augmented_train.sdp
file_train=${path_data}/${language}_${formalism}_augmented_train.sdp
#file_train=${path_data}/${language}_${formalism}_augmented_train+dev.sdp
files_test[0]=${path_data}/${language}_${formalism}_augmented_dev.sdp
files_test[1]=${path_data}/${language}_${formalism}_augmented_test.sdp
else
file_train=${path_data}/${language}_train.conll2008
files_test[0]=${path_data}/${language}_test.conll2008
Expand Down
24 changes: 24 additions & 0 deletions scripts_srl/train_test_submission_closed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
model_type=af+as+cs+gp+cp+ccp
open=false

formalism=dm
C=0.01
cost_fp=0.3
cost_fn=0.7
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

formalism=pas
C=0.01
cost_fp=0.4
cost_fn=0.6
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

formalism=pcedt
C=0.01
cost_fp=0.3
cost_fn=0.7
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

24 changes: 24 additions & 0 deletions scripts_srl/train_test_submission_open.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
model_type=af+as+cs+cp+ccp #af+as+cs+gp+cp+ccp
open=true #false

formalism=dm
C=0.01
cost_fp=0.4
cost_fn=0.6
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

formalism=pas
C=0.01
cost_fp=0.4
cost_fn=0.6
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

formalism=pcedt
C=0.01
cost_fp=0.4
cost_fn=0.6
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

47 changes: 47 additions & 0 deletions semeval2014_data/scripts/augment_with_companion_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import sys

keep_document_names = True

filepath = sys.argv[1]
filepath_companion = sys.argv[2]
f = open(filepath)
f_companion = open(filepath_companion)
for line, line_companion in zip(f, f_companion):
line = line.rstrip('\n')
line_companion = line_companion.rstrip('\n')
if line.startswith('#') and line.split('\t')[0] != '#':
if keep_document_names: print line
elif line == '':
print line
else:
fields = line.split("\t")
fields_companion = line_companion.split("\t")
word_index = fields[0]
word = fields[1]
lemma = fields[2]
pos = fields[3]
if len(fields) == 4:
top = '-'
pred = '-'
else:
top = fields[4]
pred = fields[5]
args = fields[6:]
predicted_pos = fields_companion[0]
head = fields_companion[1]
deprel = fields_companion[2]

fields_output = [word_index,
word,
lemma,
pos, #predicted_pos
head,
deprel,
top,
pred]
fields_output.extend(args);
line_output = '\t'.join(fields_output)
print line_output

f.close()
f_companion.close()
Loading

0 comments on commit f6a7340

Please sign in to comment.