-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from zifeishan/develop
BrainDump 0.1.2 - Better layout and more information in the automatic report (README.md) - Good-Turing estimator for extractions and features - Examine why features get high/low weight by looking at number of examples associated - Refined Documentation; add common diagnostics for KBP with BrainDump
- Loading branch information
Showing
125 changed files
with
16,527 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
.DS_Store | ||
util/config-generator/venv/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
|
||
########## Conventions. Do not recommend to change. ########### | ||
|
||
# Set the utility files dir | ||
export UTIL_DIR="$HOME/local/braindump" | ||
|
||
# Report folder: use current | ||
export REPORT_DIR="$WORKING_DIR/experiment-reports" | ||
|
||
|
||
########## User-specified configurations ########### | ||
|
||
# Directories | ||
|
||
# Use absolute path if possible. | ||
# Avoid using "pwd" or "dirname $0", they don't work properly. | ||
# $WORKING_DIR is set to be the directory where braindump is running. | ||
# (the directory that contains braindump.conf) | ||
export APP_HOME=$WORKING_DIR | ||
|
||
# Specify deepdive out directory (DEEPDIVE_HOME/out) | ||
export DD_OUTPUT_DIR="$HOME/repos/deepdive/out" | ||
|
||
# Database Configuration | ||
export DBNAME=$DBNAME | ||
export PGUSER=${PGUSER:-`whoami`} | ||
export PGPASSWORD=${PGPASSWORD:-} | ||
export PGPORT=${PGPORT:-5432} | ||
export PGHOST=${PGHOST:-localhost} | ||
|
||
# Specify all feature tables. | ||
# e.g. FEATURE_TABLES=(f1 f2 f3) | ||
export FEATURE_TABLES=(dd_query_classify_gene_hpoterm_relations_features) | ||
export FEATURE_COLUMNS=(feature) | ||
|
||
# Specify all variable tables | ||
export VARIABLE_TABLES=(hpoterm_mentions gene_mentions gene_hpoterm_relations) | ||
export VARIABLE_COLUMNS=(is_correct is_correct is_correct) | ||
export VARIABLE_DOCID_COLUMNS=(doc_id doc_id doc_id) | ||
export VARIABLE_WORDS_COLUMNS=(words words "words_1,words_2") | ||
# Assume that in DeepDive, inference result tables will be named as [VARIABLE_TABLE]_[VARIABLE_COLUMN]_inference | ||
|
||
# If the variable is a mention, specify the words / description for the mention. | ||
# This is used for a statistics with naive entity linking. If empty, do not count deduplicated mentions. | ||
# e.g. export VARIABLE_WORDS_COLUMNS=(w1 "" w3) | ||
# In the examples above, the second element is left empty | ||
#export VARIABLE_WORDS_COLUMNS=("word1, word2,rel") | ||
|
||
# Set variable docid columns to count distinct documents that have extractions | ||
# export VARIABLE_DOCID_COLUMNS=(doc_id) | ||
|
||
# Code configs to save | ||
export CODE_CONFIG="$WORKING_DIR/../empty-code.conf" | ||
|
||
# Number of samples | ||
export NUM_SAMPLED_FEATURES=100 | ||
export NUM_SAMPLED_SUPERVISION=500 | ||
export NUM_SAMPLED_RESULT=1000 | ||
export NUM_TOP_ENTITIES=50 | ||
|
||
# Specify some tables for statistics | ||
export SENTENCE_TABLE=sentences | ||
export SENTENCE_TABLE_DOC_ID_COLUMN=doc_id | ||
|
||
# Define how to send result. use "true" to activate. | ||
export SEND_RESULT_WITH_GIT=false | ||
# If true, push after commiting the report | ||
export SEND_RESULT_WITH_GIT_PUSH=false | ||
export SEND_RESULT_WITH_EMAIL=false | ||
|
||
######## CUSTOM SCRIPTS ########### | ||
# Leave blank for default stats report. | ||
# Set to a location of a script (e.g. $APP_HOME/your_script) to use it instead of default | ||
|
||
# Self-defined scripts for stats. | ||
export STATS_SCRIPT= | ||
export SUPERVISION_SAMPLE_SCRIPT= | ||
export INFERENCE_SAMPLE_SCRIPT= | ||
|
||
########## Conventions. Do not recommend to change. ########### | ||
|
||
# Hack: use the last DD run as output dir | ||
# Suppose out/ is under $DEEPDIVE_HOME/ | ||
# You may need to manually change it based on need | ||
export DD_TIMESTAMP=`ls -t $DD_OUTPUT_DIR/ | head -n 1` | ||
export DD_THIS_OUTPUT_DIR=$DD_OUTPUT_DIR/$DD_TIMESTAMP | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#! /bin/bash | ||
|
||
export DBNAME=genomics | ||
export PGUSER=senwu | ||
export PGPASSWORD=${PGPASSWORD:-} | ||
export PGHOST=raiders2.stanford.edu | ||
export PGPORT=6432 | ||
|
||
export GPHOST=${GPHOST:-localhost} | ||
export GPPORT=${GPPORT:-15433} | ||
export GPPATH=${GPPATH:-/tmp} | ||
# . /lfs/local/0/senwu/software/greenplum/greenplum-db/before_greenplum.sh |
Oops, something went wrong.