Skip to content

Commit

Permalink
Ruler version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
primeviking committed Oct 7, 2020
1 parent 12feedd commit 03df924
Show file tree
Hide file tree
Showing 13 changed files with 9,705 additions and 5,244 deletions.
95 changes: 95 additions & 0 deletions server/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
*.aux
*.bbl
*.log
*.blg
*.ent
*.idx
*.out
*.synctex.gz
behzad/*
chen/*
test/News*
__pycache__/
.RData
.Rhistory
main.pdf
.DS_Store
code/venv
code/.idea
venv/*
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
data/Amazon*
data/Youtu*
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

snorkel.db
.idea
*.json
*.pkl
*.csv
*.zip
Empty file added server/test/__init__.py
Empty file.
88 changes: 88 additions & 0 deletions server/test/test_modeler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import numpy as np
import pandas as pd
import unittest

from snorkel.labeling import LabelingFunction
from types import SimpleNamespace
from verifier.modeler import Modeler


class modelerTest(unittest.TestCase):
def setUp(self):
def createDfSplit(p):
df_size = 20
num_pos = int(df_size*p)
df = pd.DataFrame({
"label": np.append(np.zeros(num_pos), np.ones(df_size- num_pos)),
})
df["label"] = df["label"].astype('int32')
df["text"] = df["label"].map({1:"positive", 0: "negative"})
return df

df_train, df_dev, df_valid, df_test = [createDfSplit(p) for p in [0.5, 0.5, 0.5, 0.5]]
self.m = Modeler(df_train, df_dev, df_valid, df_test)
lfs = {
"1": LabelingFunction(name="1", f=lambda x: 1),
"0": LabelingFunction(name="0", f=lambda x: 0),
"pos": LabelingFunction(name="pos",f=lambda x: 1 if x.text=="positive" else -1),
"neg": LabelingFunction(name="neg",f=lambda x: 1 if x.text=="negative" else 0)
}
self.lfs = lfs
self.m.add_lfs(lfs)
self.m.apply_lfs()
self.m.fit_label_model()

def test_analyze_lfs(self):
analysis = self.m.analyze_lfs()
self.assertTrue((analysis["Coverage Dev."].values==[1, 1, 0.5, 1]).all())
self.assertTrue((analysis["Emp. Acc."]==[0.5, 0.5, 1, 0]).all())

def test_lf_mistakes(self):
analysis = self.m.analyze_lfs()
for lfid, lf in self.lfs.items():
mistakes = self.m.lf_mistakes(lfid)
if analysis.loc[lfid]["Emp. Acc."]== 1.0:
self.assertEqual(len(mistakes), 0)
for ex in mistakes:
x = SimpleNamespace(text=ex["text"])
self.assertTrue(lf(x)!=-1)


def test_lf_examples(self):
for lfid, lf in self.lfs.items():
examples = self.m.lf_examples(lfid)
for ex in examples:
x = SimpleNamespace(text=ex["text"])
self.assertTrue(lf(x)!=-1)

def test_get_label_model_stats(self):
stats = self.m.get_label_model_stats()

def test_pred_prob(self):
probs = self.m.label_model.predict_proba(L=self.m.L_train)
for i in range(5):
probs2 = self.m.label_model.predict_proba(L=self.m.L_train)
self.assertTrue((probs==probs2).all())

def test_train(self):
# make sure stats stay the same
stats1 = self.m.train()
stats1 = self.m.train()
print(stats1)
for i in range(5):
stats2 = self.m.train()
print(stats2)
self.assertEqual(stats1, stats2)
#self.assertTrue((stats1==stats2).all())

def test_next_text(self):
text = self.m.next_text()

def test_filter_identical_signature(self):
lfs = {"dupe": LabelingFunction(name="dupe", f=lambda x: 1)}
self.m.add_lfs(lfs)
self.m.apply_lfs()
self.m.fit_label_model()
a = self.m.analyze_lfs()
self.assertEqual(a.loc["dupe"]["Duplicate"], "1")

214 changes: 214 additions & 0 deletions server/test/test_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
import unittest

from synthesizer.gll import *
from verifier.translator import make_lf
from types import SimpleNamespace

class translatorTest(unittest.TestCase):
def setUp(self):
pass

def test_one_token(self):
instance = {
"name": "test_lf",
LABEL: 1,
DIRECTION: False,
CONNECTIVE: ConnectiveType[OR],
CONDS: [{"string": "great", "type": KeyType[TOKEN]}]
}
concepts = {}
lf = make_lf(instance, concepts)
Positives = [
"This is great!!!",
"Great stuff.",
"such ~GREAT~ work"]
Negatives = [
"The greatest of all time",
"gre at",
"Hi mom"
]
for ex in Positives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), 1, msg=ex)
for ex in Negatives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), -1, msg=ex)

def test_one_concept(self):
instance = {
"name": "test_lf",
LABEL: 1,
DIRECTION: False,
CONNECTIVE: ConnectiveType[OR],
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}]
}

concepts = {
"emphasis": [
{"string": "very", "type": KeyType[TOKEN]},
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False},
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True}
]
}
lf = make_lf(instance, concepts)

Positives = [
"This is soooooo great!!!",
"I'm extremely impressed.",
"VERY good test cases"]
Negatives = [
"The greatest of all time",
"s good",
"it's just not for me"
]

for ex in Positives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), 1, msg=ex)

for ex in Negatives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), -1, msg=ex)

def test_OR(self):
instance = {
"name": "test_lf",
LABEL: 1,
DIRECTION: False,
CONNECTIVE: ConnectiveType[OR],
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}, {"string": "great", "type": KeyType[TOKEN], "case_sensitive": False}]
}

concepts = {
"emphasis": [
{"string": "very", "type": KeyType[TOKEN]},
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False},
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True}
]
}
lf = make_lf(instance, concepts)
Positives = [
"This is great!!!",
"Great stuff.",
"such ~GREAT~ work",
"This is soooooo great!!!",
"I'm extremely impressed.",
"VERY good test cases"]
Negatives = [
"The best of all time",
"s good",
"it's just not for me"
]

for ex in Positives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), 1, msg=ex)

for ex in Negatives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), -1, msg=ex)


def test_AND(self):
instance = {
"name": "test_lf",
LABEL: 1,
DIRECTION: False,
CONNECTIVE: ConnectiveType[AND],
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}, {"string": "great", "type": KeyType[TOKEN], "case_sensitive": False}]
}

concepts = {
"emphasis": [
{"string": "very", "type": KeyType[TOKEN]},
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False},
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True}
]
}
lf = make_lf(instance, concepts)
Positives = [
"This is sooooo great!!!",
"such extremely ~GREAT~ work",
"I'm extremely impressed by the great work."]
Negatives = [
"The best of all time",
"Great stuff.",
"so very very good",
"it's just not for me"
]

for ex in Positives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), 1, msg=ex)

for ex in Negatives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), -1, msg=ex)

def test_directional(self):
instance = {
"name": "test_lf",
LABEL: 1,
DIRECTION: True,
CONNECTIVE: ConnectiveType[AND],
CONDS: [{"string": "emphasis", "type": KeyType[CONCEPT]}, {"string": "great", "type": KeyType[TOKEN], "case_sensitive": False}]
}

concepts = {
"emphasis": [
{"string": "very", "type": KeyType[TOKEN]},
{"string": "so+", "type": KeyType[REGEXP], "case_sensitive": False},
{"string": "[Ee]xtreme\w+", "type": KeyType[REGEXP], "case_sensitive": True}
]
}
lf = make_lf(instance, concepts)
Positives = [
"This is sooooo great!!!",
"This is SOOOO great!!!",
"such extremely ~GREAT~ work",
"I'm extremely impressed by the great work."]
Negatives = [
"The best of all time",
"Great stuff.",
"so very very good",
"it's just not for me",
"The great work is extremely impressive."
]

for ex in Positives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), 1, msg=ex)

for ex in Negatives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), -1, msg=ex)

def test_NER(self):

instance = {
"name": "test_lf",
LABEL: 1,
DIRECTION: False,
CONNECTIVE: ConnectiveType[OR],
CONDS: [{"string": "ORG", "type": KeyType[NER]}]
}
concepts = {}
lf = make_lf(instance, concepts)
Positives = [
"The FBI is here",
"I called the World Bank",
"MVPD is here to serve."]
Negatives = [
"The greatest of all time",
"gre at",
"Hi mom"
]
for ex in Positives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), 1, msg=ex)
for ex in Negatives:
x = SimpleNamespace(text=ex)
self.assertEqual(lf(x), -1, msg=ex)

if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 03df924

Please sign in to comment.