-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
12feedd
commit 03df924
Showing
13 changed files
with
9,705 additions
and
5,244 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
*.aux | ||
*.bbl | ||
*.log | ||
*.blg | ||
*.ent | ||
*.idx | ||
*.out | ||
*.synctex.gz | ||
behzad/* | ||
chen/* | ||
test/News* | ||
__pycache__/ | ||
.RData | ||
.Rhistory | ||
main.pdf | ||
.DS_Store | ||
code/venv | ||
code/.idea | ||
venv/* | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm | ||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | ||
data/Amazon* | ||
data/Youtu* | ||
# User-specific stuff | ||
.idea/**/workspace.xml | ||
.idea/**/tasks.xml | ||
.idea/**/usage.statistics.xml | ||
.idea/**/dictionaries | ||
.idea/**/shelf | ||
|
||
# Generated files | ||
.idea/**/contentModel.xml | ||
|
||
# Sensitive or high-churn files | ||
.idea/**/dataSources/ | ||
.idea/**/dataSources.ids | ||
.idea/**/dataSources.local.xml | ||
.idea/**/sqlDataSources.xml | ||
.idea/**/dynamic.xml | ||
.idea/**/uiDesigner.xml | ||
.idea/**/dbnavigator.xml | ||
|
||
# Gradle | ||
.idea/**/gradle.xml | ||
.idea/**/libraries | ||
|
||
# Gradle and Maven with auto-import | ||
# When using Gradle or Maven with auto-import, you should exclude module files, | ||
# since they will be recreated, and may cause churn. Uncomment if using | ||
# auto-import. | ||
# .idea/modules.xml | ||
# .idea/*.iml | ||
# .idea/modules | ||
# *.iml | ||
# *.ipr | ||
|
||
# CMake | ||
cmake-build-*/ | ||
|
||
# Mongo Explorer plugin | ||
.idea/**/mongoSettings.xml | ||
|
||
# File-based project format | ||
*.iws | ||
|
||
# IntelliJ | ||
out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Cursive Clojure plugin | ||
.idea/replstate.xml | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
fabric.properties | ||
|
||
# Editor-based Rest Client | ||
.idea/httpRequests | ||
|
||
# Android studio 3.1+ serialized cache file | ||
.idea/caches/build_file_checksums.ser | ||
|
||
snorkel.db | ||
.idea | ||
*.json | ||
*.pkl | ||
*.csv | ||
*.zip |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import unittest | ||
|
||
from snorkel.labeling import LabelingFunction | ||
from types import SimpleNamespace | ||
from verifier.modeler import Modeler | ||
|
||
|
||
class modelerTest(unittest.TestCase): | ||
def setUp(self): | ||
def createDfSplit(p): | ||
df_size = 20 | ||
num_pos = int(df_size*p) | ||
df = pd.DataFrame({ | ||
"label": np.append(np.zeros(num_pos), np.ones(df_size- num_pos)), | ||
}) | ||
df["label"] = df["label"].astype('int32') | ||
df["text"] = df["label"].map({1:"positive", 0: "negative"}) | ||
return df | ||
|
||
df_train, df_dev, df_valid, df_test = [createDfSplit(p) for p in [0.5, 0.5, 0.5, 0.5]] | ||
self.m = Modeler(df_train, df_dev, df_valid, df_test) | ||
lfs = { | ||
"1": LabelingFunction(name="1", f=lambda x: 1), | ||
"0": LabelingFunction(name="0", f=lambda x: 0), | ||
"pos": LabelingFunction(name="pos",f=lambda x: 1 if x.text=="positive" else -1), | ||
"neg": LabelingFunction(name="neg",f=lambda x: 1 if x.text=="negative" else 0) | ||
} | ||
self.lfs = lfs | ||
self.m.add_lfs(lfs) | ||
self.m.apply_lfs() | ||
self.m.fit_label_model() | ||
|
||
def test_analyze_lfs(self): | ||
analysis = self.m.analyze_lfs() | ||
self.assertTrue((analysis["Coverage Dev."].values==[1, 1, 0.5, 1]).all()) | ||
self.assertTrue((analysis["Emp. Acc."]==[0.5, 0.5, 1, 0]).all()) | ||
|
||
def test_lf_mistakes(self): | ||
analysis = self.m.analyze_lfs() | ||
for lfid, lf in self.lfs.items(): | ||
mistakes = self.m.lf_mistakes(lfid) | ||
if analysis.loc[lfid]["Emp. Acc."]== 1.0: | ||
self.assertEqual(len(mistakes), 0) | ||
for ex in mistakes: | ||
x = SimpleNamespace(text=ex["text"]) | ||
self.assertTrue(lf(x)!=-1) | ||
|
||
|
||
def test_lf_examples(self): | ||
for lfid, lf in self.lfs.items(): | ||
examples = self.m.lf_examples(lfid) | ||
for ex in examples: | ||
x = SimpleNamespace(text=ex["text"]) | ||
self.assertTrue(lf(x)!=-1) | ||
|
||
def test_get_label_model_stats(self): | ||
stats = self.m.get_label_model_stats() | ||
|
||
def test_pred_prob(self): | ||
probs = self.m.label_model.predict_proba(L=self.m.L_train) | ||
for i in range(5): | ||
probs2 = self.m.label_model.predict_proba(L=self.m.L_train) | ||
self.assertTrue((probs==probs2).all()) | ||
|
||
def test_train(self): | ||
# make sure stats stay the same | ||
stats1 = self.m.train() | ||
stats1 = self.m.train() | ||
print(stats1) | ||
for i in range(5): | ||
stats2 = self.m.train() | ||
print(stats2) | ||
self.assertEqual(stats1, stats2) | ||
#self.assertTrue((stats1==stats2).all()) | ||
|
||
def test_next_text(self): | ||
text = self.m.next_text() | ||
|
||
def test_filter_identical_signature(self): | ||
lfs = {"dupe": LabelingFunction(name="dupe", f=lambda x: 1)} | ||
self.m.add_lfs(lfs) | ||
self.m.apply_lfs() | ||
self.m.fit_label_model() | ||
a = self.m.analyze_lfs() | ||
self.assertEqual(a.loc["dupe"]["Duplicate"], "1") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
import unittest | ||
|
||
from synthesizer.gll import * | ||
from verifier.translator import make_lf | ||
from types import SimpleNamespace | ||
|
||
class translatorTest(unittest.TestCase): | ||
def setUp(self): | ||
pass | ||
|
||
def test_one_token(self): | ||
instance = { | ||
"name": "test_lf", | ||
LABEL: 1, | ||
DIRECTION: False, | ||
CONNECTIVE: ConnectiveType[OR], | ||
CONDS: [{"string": "great", "type": KeyType[TOKEN]}] | ||
} | ||
concepts = {} | ||
lf = make_lf(instance, concepts) | ||
Positives = [ | ||
"This is great!!!", | ||
"Great stuff.", | ||
"such ~GREAT~ work"] | ||
Negatives = [ | ||
"The greatest of all time", | ||
"gre at", | ||
"Hi mom" | ||
] | ||
for ex in Positives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), 1, msg=ex) | ||
for ex in Negatives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), -1, msg=ex) | ||
|
||
def test_one_concept(self): | ||
instance = { | ||
"name": "test_lf", | ||
LABEL: 1, | ||
DIRECTION: False, | ||
CONNECTIVE: ConnectiveType[OR], | ||
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}] | ||
} | ||
|
||
concepts = { | ||
"emphasis": [ | ||
{"string": "very", "type": KeyType[TOKEN]}, | ||
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False}, | ||
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True} | ||
] | ||
} | ||
lf = make_lf(instance, concepts) | ||
|
||
Positives = [ | ||
"This is soooooo great!!!", | ||
"I'm extremely impressed.", | ||
"VERY good test cases"] | ||
Negatives = [ | ||
"The greatest of all time", | ||
"s good", | ||
"it's just not for me" | ||
] | ||
|
||
for ex in Positives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), 1, msg=ex) | ||
|
||
for ex in Negatives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), -1, msg=ex) | ||
|
||
def test_OR(self): | ||
instance = { | ||
"name": "test_lf", | ||
LABEL: 1, | ||
DIRECTION: False, | ||
CONNECTIVE: ConnectiveType[OR], | ||
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}, {"string": "great", "type": KeyType[TOKEN], "case_sensitive": False}] | ||
} | ||
|
||
concepts = { | ||
"emphasis": [ | ||
{"string": "very", "type": KeyType[TOKEN]}, | ||
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False}, | ||
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True} | ||
] | ||
} | ||
lf = make_lf(instance, concepts) | ||
Positives = [ | ||
"This is great!!!", | ||
"Great stuff.", | ||
"such ~GREAT~ work", | ||
"This is soooooo great!!!", | ||
"I'm extremely impressed.", | ||
"VERY good test cases"] | ||
Negatives = [ | ||
"The best of all time", | ||
"s good", | ||
"it's just not for me" | ||
] | ||
|
||
for ex in Positives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), 1, msg=ex) | ||
|
||
for ex in Negatives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), -1, msg=ex) | ||
|
||
|
||
def test_AND(self): | ||
instance = { | ||
"name": "test_lf", | ||
LABEL: 1, | ||
DIRECTION: False, | ||
CONNECTIVE: ConnectiveType[AND], | ||
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}, {"string": "great", "type": KeyType[TOKEN], "case_sensitive": False}] | ||
} | ||
|
||
concepts = { | ||
"emphasis": [ | ||
{"string": "very", "type": KeyType[TOKEN]}, | ||
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False}, | ||
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True} | ||
] | ||
} | ||
lf = make_lf(instance, concepts) | ||
Positives = [ | ||
"This is sooooo great!!!", | ||
"such extremely ~GREAT~ work", | ||
"I'm extremely impressed by the great work."] | ||
Negatives = [ | ||
"The best of all time", | ||
"Great stuff.", | ||
"so very very good", | ||
"it's just not for me" | ||
] | ||
|
||
for ex in Positives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), 1, msg=ex) | ||
|
||
for ex in Negatives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), -1, msg=ex) | ||
|
||
def test_directional(self): | ||
instance = { | ||
"name": "test_lf", | ||
LABEL: 1, | ||
DIRECTION: True, | ||
CONNECTIVE: ConnectiveType[AND], | ||
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}, {"string": "great", "type": KeyType[TOKEN], "case_sensitive": False}] | ||
} | ||
|
||
concepts = { | ||
"emphasis": [ | ||
{"string": "very", "type": KeyType[TOKEN]}, | ||
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False}, | ||
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True} | ||
] | ||
} | ||
lf = make_lf(instance, concepts) | ||
Positives = [ | ||
"This is sooooo great!!!", | ||
"This is SOOOO great!!!", | ||
"such extremely ~GREAT~ work", | ||
"I'm extremely impressed by the great work."] | ||
Negatives = [ | ||
"The best of all time", | ||
"Great stuff.", | ||
"so very very good", | ||
"it's just not for me", | ||
"The great work is extremely impressive." | ||
] | ||
|
||
for ex in Positives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), 1, msg=ex) | ||
|
||
for ex in Negatives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), -1, msg=ex) | ||
|
||
def test_NER(self): | ||
|
||
instance = { | ||
"name": "test_lf", | ||
LABEL: 1, | ||
DIRECTION: False, | ||
CONNECTIVE: ConnectiveType[OR], | ||
CONDS: [{"string": "ORG", "type": KeyType[NER]}] | ||
} | ||
concepts = {} | ||
lf = make_lf(instance, concepts) | ||
Positives = [ | ||
"The FBI is here", | ||
"I called the World Bank", | ||
"MVPD is here to serve."] | ||
Negatives = [ | ||
"The greatest of all time", | ||
"gre at", | ||
"Hi mom" | ||
] | ||
for ex in Positives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), 1, msg=ex) | ||
for ex in Negatives: | ||
x = SimpleNamespace(text=ex) | ||
self.assertEqual(lf(x), -1, msg=ex) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
Oops, something went wrong.