Skip to content

Commit

Permalink
FIX Corrected a few bugs when self-loops are allowed.
Browse files Browse the repository at this point in the history
  • Loading branch information
andre-martins committed Jul 24, 2014
1 parent 2938248 commit 6ce6c4c
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 18 deletions.
6 changes: 3 additions & 3 deletions scripts_srl/train_test_semantic_parser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ test=true
prune=true # This will revert to false if model_type=basic.
prune_labels=true
prune_distances=true
train_external_pruner=false # If true, the pruner is trained separately.
trained_external_pruner=false # If true, loads the external pruner.
train_external_pruner=false #false # If true, the pruner is trained separately.
trained_external_pruner=true #false # If true, loads the external pruner.
posterior_threshold=0.0001 # Posterior probability threshold for the pruner.
pruner_max_arguments=20 #10 # Maximum number of candidate heads allowed by the pruner.
labeled=true # Output semantic labels.
Expand All @@ -32,7 +32,7 @@ model_type=$5 #af+as+gp+cp # Parts used in the model (subset of "af+cs+gp+as+hb+
# make the parser a lot slower.
train_cost_false_positives=$3
train_cost_false_negatives=$4
file_format=sdp # conll
file_format=conll #sdp # conll

if [ "${file_format}" == "sdp" ]
then
Expand Down
55 changes: 54 additions & 1 deletion src/semantic_parser/FactorPredicateAutomaton.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,30 @@ class FactorPredicateAutomaton : public GenericFactor {
}

int GetNumSenses() const { return index_arguments_.size(); }
int GetLength(int sense) const { return index_arguments_[sense].size(); }
int GetLength(int sense) const {
CHECK_GE(sense, 0);
CHECK_LT(sense, index_arguments_.size());
return index_arguments_[sense].size();
}
double GetSenseScore(int sense,
const vector<double> &variable_log_potentials,
const vector<double> &additional_log_potentials) const {
CHECK_GE(sense, 0);
CHECK_LT(sense, variable_log_potentials.size());
return variable_log_potentials[sense];
}
double GetArgumentScore(
int sense,
int argument,
const vector<double> &variable_log_potentials,
const vector<double> &additional_log_potentials) const {
CHECK_GE(sense, 0);
CHECK_LT(sense, index_arguments_.size());
CHECK_GE(argument, 0);
CHECK_LT(argument, index_arguments_[sense].size());
int index = index_arguments_[sense][argument];
CHECK_GE(index, 0);
CHECK_LT(index, variable_log_potentials.size());
return variable_log_potentials[index];
}
double GetSiblingScore(
Expand All @@ -79,7 +91,15 @@ class FactorPredicateAutomaton : public GenericFactor {
int second_argument,
const vector<double> &variable_log_potentials,
const vector<double> &additional_log_potentials) const {
CHECK_GE(sense, 0);
CHECK_LT(sense, index_siblings_.size());
CHECK_GE(first_argument, 0);
CHECK_LT(first_argument, index_siblings_[sense].size());
CHECK_GE(second_argument, 0);
CHECK_LT(second_argument, index_siblings_[sense][first_argument].size());
int index = index_siblings_[sense][first_argument][second_argument];
CHECK_GE(index, 0) << sense << " " << first_argument << " " << second_argument; // This check failed!!!
CHECK_LT(index, additional_log_potentials.size());
return additional_log_potentials[index];
}
void AddSensePosterior(int sense,
Expand Down Expand Up @@ -111,6 +131,7 @@ class FactorPredicateAutomaton : public GenericFactor {
const vector<double> &additional_log_potentials,
Configuration &configuration,
double *value) {
//LOG(INFO) << "Begin Maximize";
// Decode maximizing over the senses and using the Viterbi algorithm
// as an inner loop.
// If sense=-1, the final score is zero (so take the argmax at the end).
Expand All @@ -126,6 +147,9 @@ class FactorPredicateAutomaton : public GenericFactor {
vector<vector<int> > path(length);
CHECK_GE(length, 1);

//LOG(INFO) << "Sense " << s << " of " << num_senses;
//LOG(INFO) << "length = " << length;

// The start state is a1 = 0.
values[0].resize(1);
values[0][0] = 0.0;
Expand Down Expand Up @@ -156,6 +180,8 @@ class FactorPredicateAutomaton : public GenericFactor {
additional_log_potentials);
}

//LOG(INFO) << "Terminate";

// The end state is a = length.
int best_last_state = -1;
double best_score = -1e12;
Expand All @@ -176,21 +202,34 @@ class FactorPredicateAutomaton : public GenericFactor {
best_score += GetSenseScore(s, variable_log_potentials,
additional_log_potentials);

//LOG(INFO) << "Backtrack";

// Only backtrack if the solution is the best so far.
if (best_score > *value) {
// This is the best sense so far.
best_sense = s;
*value = best_score;
//LOG(INFO) << length;
best_path.resize(length);
best_path[length-1] = best_last_state;

// Backtrack.
for (int a = length-1; a > 0; --a) {
CHECK_GE(a-1, 0);
CHECK_LT(a-1, best_path.size());
CHECK_GE(a, 0);
CHECK_LT(a, best_path.size());
CHECK_GE(a, 0);
CHECK_LT(a, path.size());
CHECK_GE(best_path[a], 0);
CHECK_LT(best_path[a], path[a].size());
best_path[a-1] = path[a][best_path[a]];
}
}
}

//LOG(INFO) << "Write config";

// Now write the configuration.
vector<int> *sense_arguments =
static_cast<vector<int>*>(configuration);
Expand All @@ -201,13 +240,17 @@ class FactorPredicateAutomaton : public GenericFactor {
sense_arguments->push_back(a);
}
}

//LOG(INFO) << "End Maximize";
}

// Compute the score of a given assignment.
void Evaluate(const vector<double> &variable_log_potentials,
const vector<double> &additional_log_potentials,
const Configuration configuration,
double *value) {
//LOG(INFO) << "Begin Evaluate";

const vector<int> *sense_arguments =
static_cast<const vector<int>*>(configuration);
// Sense belong to {-1,0,1,...}
Expand All @@ -233,6 +276,7 @@ class FactorPredicateAutomaton : public GenericFactor {
int a2 = GetLength(s); // Stop position.
*value += GetSiblingScore(s, a1, a2, variable_log_potentials,
additional_log_potentials);
//LOG(INFO) << "End Evaluate";
}

// Given a configuration with a probability (weight),
Expand Down Expand Up @@ -343,6 +387,10 @@ class FactorPredicateAutomaton : public GenericFactor {
map_senses[s] = k;
}

//LOG(INFO) << outgoing_arcs.size() << " outgoing arcs.";
//LOG(INFO) << siblings.size() << " siblings.";


// Create a temporary list of arguments.
// Each argument position will be mapped to a one-based array, in
// which sense_arguments[s][1] = p, sense_arguments[s][2] = p+1 (p-1),
Expand All @@ -358,6 +406,7 @@ class FactorPredicateAutomaton : public GenericFactor {
CHECK_EQ(p, outgoing_arcs[k]->predicate());
int a = outgoing_arcs[k]->argument();
int s = outgoing_arcs[k]->sense();
//LOG(INFO) << "Arc " << s << " " << p << " " << a;
int position = (right)? a-p : p-a;
++position; // Position 0 is reserved for the case a1=-1.
CHECK_GE(position, 1) << p << " " << a;
Expand Down Expand Up @@ -398,6 +447,9 @@ class FactorPredicateAutomaton : public GenericFactor {
int s = siblings[k]->sense();
int a1 = siblings[k]->first_argument();
int a2 = siblings[k]->second_argument();

//LOG(INFO) << "Sibling " << s << " " << p << " " << a1 << " " << a2;

int position1 = right? a1-p : p-a1;
int position2 = right? a2-p : p-a2;
if (a1 < 0) position1 = -1; // To handle a1=-1.
Expand All @@ -420,6 +472,7 @@ class FactorPredicateAutomaton : public GenericFactor {
// Index of siblings in the additional_variables array.
index_siblings_[sense][first_argument][second_argument] = k;
}
CHECK_GE(index_siblings_[0][0][1], 0);
}

private:
Expand Down
46 changes: 34 additions & 12 deletions src/semantic_parser/SemanticDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1242,10 +1242,10 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
for (int r = 0; r < num_arcs; ++r) {
SemanticPartArc* arc =
static_cast<SemanticPartArc*>((*parts)[offset_arcs + r]);
if (arc->predicate() >= arc->argument()) {
// Handle self-loops (p=a) in the right side automaton.
if (arc->predicate() > arc->argument()) {
left_arc_indices[arc->predicate()].push_back(offset_arcs + r);
}
if (arc->predicate() <= arc->argument()) {
} else {
right_arc_indices[arc->predicate()].push_back(offset_arcs + r);
}
}
Expand All @@ -1261,16 +1261,24 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
SemanticPartConsecutiveSibling *sibling =
static_cast<SemanticPartConsecutiveSibling*>(
(*parts)[offset_consecutive_siblings + r]);
if (sibling->predicate() >= sibling->second_argument()) {
// TODO: Try to disable self loops on the left side?
// Make sure no non-basic sibling part ends up in two
// factors.
if (sibling->predicate() > sibling->second_argument()) {
// Left sibling.
left_siblings[sibling->predicate()].push_back(sibling);
left_scores[sibling->predicate()].push_back(
scores[offset_consecutive_siblings + r]);
// Save the part index to get the posterior later.
left_indices[sibling->predicate()].
push_back(offset_consecutive_siblings + r);
}
if (sibling->predicate() <= sibling->second_argument()) {
} else {
CHECK(sibling->predicate() < sibling->second_argument() ||
(sibling->predicate() == sibling->second_argument() &&
sibling->first_argument() < 0))
<< sibling->predicate() << " "
<< sibling->first_argument() << " "
<< sibling->second_argument();
// Right sibling.
right_siblings[sibling->predicate()].push_back(sibling);
right_scores[sibling->predicate()].push_back(
Expand All @@ -1284,6 +1292,13 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
// Now, go through each predicate and create left and right automata.
for (int p = 0; p < sentence->size(); ++p) {
// Build left head automaton.
if (predicate_part_indices[p].size() == 0) {
CHECK_EQ(left_arc_indices[p].size(), 0);
CHECK_EQ(right_arc_indices[p].size(), 0);
CHECK_EQ(left_siblings[p].size(), 0);
CHECK_EQ(right_siblings[p].size(), 0);
continue;
}
vector<AD3::BinaryVariable*> local_variables;
vector<SemanticPartPredicate*> predicate_senses;
vector<SemanticPartArc*> left_arcs;
Expand Down Expand Up @@ -1319,6 +1334,7 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
vector<SemanticPartArc*> right_arcs;
for (int s = 0; s < predicate_part_indices[p].size(); ++s) {
int r = predicate_part_indices[p][s];
CHECK_GE(r, 0);
int index = offset_predicate_variables + r - offset_predicate_parts;
local_variables.push_back(variables[index]);
SemanticPartPredicate *predicate =
Expand All @@ -1327,6 +1343,7 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
}
for (int k = 0; k < right_arc_indices[p].size(); ++k) {
int r = right_arc_indices[p][k];
CHECK_GE(r, 0);
int index = offset_arc_variables + r - offset_arcs;
local_variables.push_back(variables[index]);
SemanticPartArc *arc =
Expand All @@ -1345,6 +1362,8 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
}
}

//LOG(INFO) << "Created predicate automata.";

//////////////////////////////////////////////////////////////////////
// Build consecutive co-parent factors.
//////////////////////////////////////////////////////////////////////
Expand All @@ -1357,10 +1376,9 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
for (int r = 0; r < num_arcs; ++r) {
SemanticPartArc* arc =
static_cast<SemanticPartArc*>((*parts)[offset_arcs + r]);
if (arc->predicate() <= arc->argument()) {
if (arc->predicate() < arc->argument()) {
left_arc_indices[arc->argument()].push_back(offset_arcs + r);
}
if (arc->predicate() >= arc->argument()) {
} else {
right_arc_indices[arc->argument()].push_back(offset_arcs + r);
}
}
Expand All @@ -1376,16 +1394,18 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
SemanticPartConsecutiveCoparent *coparent =
static_cast<SemanticPartConsecutiveCoparent*>(
(*parts)[offset_consecutive_coparents + r]);
if (coparent->argument() >= coparent->second_predicate()) {
if (coparent->argument() > coparent->second_predicate()) {
// Left co-parent.
left_coparents[coparent->argument()].push_back(coparent);
left_scores[coparent->argument()].push_back(
scores[offset_consecutive_coparents + r]);
// Save the part index to get the posterior later.
left_indices[coparent->argument()].
push_back(offset_consecutive_coparents + r);
}
if (coparent->argument() <= coparent->second_predicate()) {
} else {
CHECK(coparent->argument() < coparent->second_predicate() ||
(coparent->argument() == coparent->second_predicate() &&
coparent->first_predicate() < 0));
// Right co-parent.
right_coparents[coparent->argument()].push_back(coparent);
right_scores[coparent->argument()].push_back(
Expand Down Expand Up @@ -1560,6 +1580,8 @@ void SemanticDecoder::DecodeFactorGraph(Instance *instance, Parts *parts,
factor_graph->SetResidualThresholdAD3(1e-3);
//factor_graph->SetResidualThresholdAD3(1e-6);

//LOG(INFO) << "Running AD3";

// Run AD3.
timeval start, end;
gettimeofday(&start, NULL);
Expand Down
16 changes: 14 additions & 2 deletions src/semantic_parser/SemanticPipe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1042,14 +1042,16 @@ void SemanticPipe::MakePartsConsecutiveSiblings(Instance *instance,
}

// Left side.
// Allow self loops (a1 = p). We use a1 = p+1 to denote the special case
// NOTE: Self loops (a1 = p) are disabled on the left side, to prevent
// having repeated parts. We use a1 = p+1 to denote the special case
// in which a2 is the first argument.
for (int a1 = p+1; a1 >= 0; --a1) {
int r1 = -1;
if (a1 <= p) {
r1 = semantic_parts->FindArc(p, a1, s);
if (r1 < 0) continue;
}
if (a1 == p) continue; // See NOTE above.

if (make_gold) {
// Check if the first arc is active.
Expand All @@ -1067,6 +1069,8 @@ void SemanticPipe::MakePartsConsecutiveSiblings(Instance *instance,
r2 = semantic_parts->FindArc(p, a2, s);
if (r2 < 0) continue;
}
if (a2 == p) continue; // See NOTE above.

if (make_gold) {
// Check if the second arc is active.
if (a2 == -1 ||
Expand Down Expand Up @@ -1338,13 +1342,16 @@ void SemanticPipe::MakePartsConsecutiveCoparents(Instance *instance,
}

// Left side.
// Allow self loops (p1 = a). We use p1 = a+1 to denote the special case
// NOTE: Self loops (p1 = a) are disabled on the left side, to prevent
// having repeated parts. We use p1 = a+1 to denote the special case
// in which p2 is the first predicate.
for (int p1 = a+1; p1 >= 0; --p1) {
int num_senses1;
if (p1 > a) {
// If p1 = a+1, pretend there is a single sense (s1=0).
num_senses1 = 1;
} else if (p1 == a) { // See NOTE above.
continue;
} else {
//const vector<int> &senses = semantic_parts->GetSenses(p);
//CHECK_EQ(senses.size(), predicates->size());
Expand All @@ -1368,6 +1375,7 @@ void SemanticPipe::MakePartsConsecutiveCoparents(Instance *instance,
r1 = semantic_parts->FindArc(p1, a, s1);
if (r1 < 0) continue;
}
if (p1 == a) continue; // See NOTE above.

if (make_gold) {
// Check if the first arc is active.
Expand All @@ -1384,6 +1392,8 @@ void SemanticPipe::MakePartsConsecutiveCoparents(Instance *instance,
if (p2 == -1) {
// If p2 = -1, pretend there is a single sense (s2=0).
num_senses2 = 1;
} else if (p2 == a) { // See NOTE above.
continue;
} else {
//const vector<int> &senses = semantic_parts->GetSenses(p);
//CHECK_EQ(senses.size(), predicates->size());
Expand All @@ -1407,6 +1417,8 @@ void SemanticPipe::MakePartsConsecutiveCoparents(Instance *instance,
r2 = semantic_parts->FindArc(p2, a, s2);
if (r2 < 0) continue;
}
if (p2 == a) continue; // See NOTE above.

if (make_gold) {
// Check if the second arc is active.
if (p2 == -1 ||
Expand Down

0 comments on commit 6ce6c4c

Please sign in to comment.