Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

few bugs, few operations, few algorithms... #1

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
build
*.so
*.swp
*~
*.pyc
*.dot
*.png
*.pdf
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,17 @@ Python interface to [OpenFst](http://openfst.org)
## Installation

1. Install the latest version of OpenFst (1.3.2)

2. `pip install -e git+https://github.com/vchahun/pyfst.git#egg=pyfst`

3. Or you can do

python setup.py build_ext -i [--mustache] [--cython]

If you use the option `--mustache` the mustache templates will be used to generate `fst.pyx` and `cfst.pxd` (requires [mustache](http://mustache.github.com/)).

If you use the option `--cython` the setup will compile pyx files into cpp files (requires [Cython 0.17.1](http://cython.org)).

## Usage

The [basic example](http://www.openfst.org/twiki/bin/view/FST/FstQuickTour#CreatingFsts) from the documentation translates to:
Expand Down Expand Up @@ -45,4 +54,10 @@ t[2].final = 3.5
t.shortest_path() # 2 -(a:A/0.5)-> 1 -(c:C/2.5)-> 0/3.5
```

## Examples

In `examples` you will find a bunch of test cases, e.g. `edit.py`, `sampling.py`, `matching.py`, etc.

## IPython notebook

The pyfst API is [IPython notebook](http://ipython.org/ipython-doc/dev/interactive/htmlnotebook.html)-friendly: the transducers objects are [automatically drawn](http://nbviewer.ipython.org/3835477/) using [Graphviz](http://www.graphviz.org).
2 changes: 1 addition & 1 deletion examples/basic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from fst import StdVectorFst
from pyfst.fst import StdVectorFst

fst = StdVectorFst()

Expand Down
2 changes: 1 addition & 1 deletion examples/edit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys
import fst
import pyfst.fst as fst

def make_input(word, syms):
"""
Expand Down
94 changes: 94 additions & 0 deletions examples/matching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from pyfst.fst import StdVectorFst
from pyfst.algorithm.matching import substring_matcher, trie_matcher, path_matcher
from pyfst.algorithm.util import path_fsa, draw, make_fsa
from pytrie import Trie
from time import time
import sys

if len(sys.argv) < 5:
print >> sys.stderr, 'Usage: python %s substring|trie|path outstem vocab [pattern/weight]+ < queries' % sys.argv[0]
print >> sys.stderr, 'Examples:'
print >> sys.stderr, 'echo "4 5 1 2 3 6" | python %s substring matching-substring 1-10 1,2,3/2' % sys.argv[0]
print >> sys.stderr, 'echo "5 3 1 2 3 4" | python %s trie matching-trie 1-10 1,2,3/2 1,2/1 3,1,2,3/5' % sys.argv[0]
print >> sys.stderr, 'echo "1 2 3" | python %s path matching-path 1-10 1,2,3 1,2,4 1,3,4 1,2 3,4,5 3,4,5,6,7 3,4,1' % sys.argv[0]
sys.exit(0)

alg = sys.argv[1]
ostem = sys.argv[2]
lower, upper = sys.argv[3].split('-')
lower, upper = int(lower), int(upper)
patterns = sys.argv[4:]
V = range(lower, upper + 1)

if alg == "substring":
if len(patterns) != 1:
print sys.stderr, "I'm using only the first pattern"
pattern, weight = patterns[0].split('/')
print 'pattern: %s (%s)' % (pattern, weight)
pattern = [int(x) for x in pattern.split(',')]
weight = float(weight)
dfa = substring_matcher(V, tuple(pattern), weight)
draw(dfa, ostem)
try:
while True:
query = [int(x) for x in raw_input().split()]
f = path_fsa(query)
try:
f = dfa.intersect(f)
for path in f.paths():
arcs = [(arc.ilabel, float(arc.weight)) for arc in path]
print '%s: %f' % (' '.join(['%d:%s' % (l, str(w)) for l, w in arcs]), sum(w for l, w in arcs))
except KeyError:
print 'does not belong'
except EOFError:
sys.exit(0)

elif alg == "trie":
trie = Trie()
for pattern in patterns:
pattern, weight = pattern.split('/')
print '+pattern: %s (%s)' % (pattern, weight)
pattern = tuple(int(x) for x in pattern.split(','))
weight = float(weight)
trie[pattern] = weight

dfa = trie_matcher(V, trie)
draw(dfa, ostem)
try:
while True:
query = [int(x) for x in raw_input().split()]
f = path_fsa(query)
try:
f = dfa.intersect(f)
for path in f.paths():
arcs = [(arc.ilabel, float(arc.weight)) for arc in path]
print '%s: %f' % (' '.join(['%d:%s' % (l, str(w)) for l, w in arcs]), sum(w for l, w in arcs))
except KeyError:
print 'does not belong'
except EOFError:
sys.exit(0)
elif alg == "path":
trie = Trie()
for pattern in patterns:
print '+pattern: %s' % (pattern)
pattern = tuple(int(x) for x in pattern.split(','))
trie[pattern] = True

dfa = path_matcher(trie)
draw(dfa, ostem)
try:
while True:
query = [int(x) for x in raw_input().split()]
f = path_fsa(query)
try:
f = dfa.intersect(f)
for path in f.paths():
arcs = [(arc.ilabel, float(arc.weight)) for arc in path]
print '%s' % (' '.join(['%d:%s' % (l, str(w)) for l, w in arcs]))
except KeyError:
print 'does not belong'
except EOFError:
sys.exit(0)
else:
print >> sys.stderr, "Unknown algorithm", alg

68 changes: 68 additions & 0 deletions examples/sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from pyfst.fst import LogVectorFst
from pyfst.algorithm.util import network_fsa
from pyfst.algorithm.sampling import samples, deque_samples
from collections import defaultdict
from random import random, randint
from time import time

def small():
A = defaultdict(lambda : defaultdict(int))
A[0][1] = [1, 2]
A[0][2] = [2, 1]
A[1][3] = [3, 2]
A[1][4] = [4, 4]
A[2][3] = [5, 6]
A[2][4] = [6, 2]
A[3][5] = [7, 1]
A[4][5] = [8, 2]

t0 = time()
f = network_fsa(2, 2, arc = lambda sfrom, sto: A[sfrom][sto])
t1 = time()
print 'Small: states %d arcs %d time %f' % (len(f), f.num_arcs(), t1-t0)
return f

def big():
def custom(sfrom, sto):
label = sto
w = random()
if sto % 101 != 1:
w += randint(5,15)
return label, w

t0 = time()
f = network_fsa(20, 400, arc = custom)
t1 = time()
print 'Big: states %d arcs %d time %f' % (len(f), f.num_arcs(), t1-t0)
return f

def test(f, N = 1000, top = 10):
t0 = time()
f = LogVectorFst(f)
t1 = time()
print 'Tropical -> Log', t1-t0

t0 = time()
totals = f.shortest_distance(True)
t1 = time()
print 'shortest distance', t1-t0

print 'samples'
t0 = time()
dist = samples(f, totals, N)
t1 = time()
for path, n in dist.most_common(top):
print ' ',n, path
print '', N, 'samples', t1-t0

print 'deque_samples'
t0 = time()
dist = deque_samples(f, totals, N)
t1 = time()
for path, n in dist.most_common(top):
print ' ',n, path
print '', N, 'samples', t1-t0

if __name__ == '__main__':
test(small())
test(big())
23 changes: 23 additions & 0 deletions examples/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from pyfst.algorithm.util import draw, path_fsa, make_fsa, network_fsa

p = path_fsa([10, 20, 30])
draw(p, 'util-path')
wp = path_fsa([(1,10), (2,20), (3,30)], weights = [0.5, 0.6, 0.7], label = lambda pair: pair[1])
draw(wp, 'util-wpath')

wfsa = make_fsa(6, 0, 5, sort = True,
arcs = [
(0,1,1,1),
(0,2,2,1),
(1,3,3,2),
(1,4,4,4),
(2,3,3,2.5),
(2,4,4,5),
(3,5,5,1),
(4,5,5,1)
])
draw(wfsa, 'util-wfsa')

net = network_fsa(3,3)
draw(net, 'util-net')

Empty file added pyfst/__init__.py
Empty file.
Empty file added pyfst/algorithm/__init__.py
Empty file.
Loading