Skip to content

Commit

Permalink
ENH: Script for ablation study.
Browse files Browse the repository at this point in the history
  • Loading branch information
andre-martins committed Jun 26, 2014
1 parent ad44459 commit a6799d8
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
7 changes: 7 additions & 0 deletions python/tokenizers/portuguese/word_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def tokenize(self, text):
# Note: the Portuguese sentence tokenizer should also do this!!
text = re.sub('\xc2\xa0', ' ', text)

# Replace U+0096 by dashes.
text = re.sub('\xc2\x96', ' -- ', text)

if self.replace_parenthesis:
# Replace all parenthesis by single quotes.
# This looks a really terrible idea. However, since there are
Expand All @@ -110,6 +113,10 @@ def tokenize(self, text):
text = re.sub(r'(``)', r' \1 ', text)
text = re.sub(r'([ (\[{<])"', r'\1 `` ', text)

# I added these for single quotes -- to avoid things like
# "o 'apartheid social ' "
text = re.sub(r'\'', '\' ', text)

#punctuation
text = re.sub(r'([:,])([^\d])', r' \1 \2', text)
text = re.sub(r'\.\.\.', r' ... ', text)
Expand Down
4 changes: 2 additions & 2 deletions scripts_srl/train_test_submission_open.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ C=0.01
cost_fp=0.4
cost_fn=0.6
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}
#./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

formalism=pas
C=0.01
cost_fp=0.4
cost_fn=0.6
echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}"
./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}
#./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type}

formalism=pcedt
C=0.01
Expand Down
1 change: 1 addition & 0 deletions src/semantic_parser/SemanticFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2085,6 +2085,7 @@ void SemanticFeatures::AddPredicateFeatures(SemanticInstanceNumeric* sentence,
fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHP, flags, HWID, bdHPID);
AddFeature(fkey, features);
fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHR, flags, HWID, bdHRID);
//fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHR, flags, HWID, bdHPID);
AddFeature(fkey, features);
fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HP_bdHW, flags, bdHWID, HPID);
AddFeature(fkey, features);
Expand Down

0 comments on commit a6799d8

Please sign in to comment.