Skip to content

Commit

Permalink
CPSM like similar file matching based on levenshtein distance
Browse files Browse the repository at this point in the history
  • Loading branch information
raghur committed Oct 1, 2018
1 parent 23cf322 commit d5d4980
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 34 deletions.
44 changes: 25 additions & 19 deletions rplugin/python3/denite/filter/matcher/fruzzymatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys
import logging
from os import path

logger = logging.getLogger()
pkgPath = os.path.dirname(__file__).split(os.path.sep)[:-3]
Expand Down Expand Up @@ -36,42 +37,47 @@ def __init__(self, vim):
self.debug("usenative: %s" % self.useNative)

def filter(self, context):
if not context['candidates'] or not context['input']:
return context['candidates']
candidates = context['candidates']
qry = context['input']
# self.debug("source: %s" % candidates[0]['source_name'])
# self.debug("source: %s" % context['source_name'])
ispath = candidates[0]['source_name'] in ["file", "file_rec",
"file_mru", "directory",
"directory_mru", "file_old",
"directory_rec", "buffer"]
ispath = False
for s in context['sources']:
if s['name'] in ["file", "file_rec",
"file_mru", "directory",
"directory_mru", "file_old",
"directory_rec", "buffer"]:
ispath = True
break
# self.debug("candidates %s %s" % (qry, len(candidates)))
results = self.scoreMatchesProxy(qry, candidates, 10,
key=lambda x: x['word'],
ispath=ispath)
ispath=ispath,
buffer=context['bufnr'])
# self.debug("results %s" % results)
rset = [w[0] for w in results]
# self.debug("rset %s" % rset)
return rset

def scoreMatchesProxy(self, q, c, limit, key=None, ispath=True):
def scoreMatchesProxy(self, q, c, limit, key=None, ispath=True, buffer=0):
relname = ""
if ispath and buffer > 0 and q == "":
fname = self.vim.buffers[buffer].name
d = self.vim.command("pwd")
relname = path.relpath(fname, start=d)
self.debug("buffer: %s, '%s'" % (relname, q))
if self.useNative:
idxArr = self.nativeMethod(q, [key(d) for d in c], limit, ispath)
idxArr = self.nativeMethod(q, [key(d) for d in c],
relname, limit, ispath)
results = []
for i in idxArr:
results.append((c[i[0]], i[1]))
idx, score = i
results.append((c[idx], score))
return results
else:
return fruzzy.scoreMatches(q, c, limit, key, ispath)
return fruzzy.scoreMatches(q, c, relname, limit, key, ispath)

def convert_pattern(self, input_str):
if not input_str:
return input_str
pat = ""
for c in input_str[:-1]:
pat = pat + "%s[^%s]{-}" % (c, c)
p = pat + input_str[-1]
# p = convert2fuzzy_pattern(input_str)
# self.debug("pattern: %s : %s" % (input_str, p))
p = convert2fuzzy_pattern(input_str)
self.debug("pattern: %s : %s" % (input_str, p))
return p
6 changes: 5 additions & 1 deletion rplugin/python3/fruzzy.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ def scorer(x, key, ispath=True):
# return position_boost + cluster_boost + sep_boost + camel_boost


def scoreMatches(query, candidates, limit, key=None, ispath=True):
def scoreMatches(query, candidates, current, limit, key=None, ispath=True):
# TODO: implement levenshtein but not at the cost of complicating the
# installation
if query == "":
return ((c, 0) for c in candidates)
key = idfn if not key else key
matches = fuzzyMatches(query, candidates, limit * 5, key, ispath)
return heapq.nlargest(limit, matches, key=lambda x: x[5])
Expand Down
33 changes: 19 additions & 14 deletions rplugin/python3/fruzzy_mod.nim
Original file line number Diff line number Diff line change
Expand Up @@ -262,31 +262,36 @@ proc isMatch(query, candidate: string, m: var Match) =
break
return

iterator fuzzyMatches(query:string, candidates: openarray[string], limit: int, ispath: bool = true): tuple[i:int, r:int] =
iterator fuzzyMatches(query:string, candidates: openarray[string], current: string, limit: int, ispath: bool = true): tuple[i:int, r:int] =
let findFirstN = true
var count = 0
var mtch:Match
mtch.positions = newSeq[int](query.len)
var heap = newHeap[tuple[i:int, r:int]]() do (a, b: tuple[i:int, r:int]) -> int:
b.r - a.r
for i, x in candidates:
l "processing: {x}"
isMatch(query, x, mtch)
if mtch.found:
count.inc
l "ADDED: {x}"
let rank = scorer(mtch, x, ispath)
info &"{x} - {mtch} - {rank}"
heap.push((i, rank))
if findFirstN and count == limit * 5:
break
if query != "":
for i, x in candidates:
l "processing: {x}"
isMatch(query, x, mtch)
if mtch.found:
count.inc
l "ADDED: {x}"
let rank = scorer(mtch, x, ispath)
info &"{x} - {mtch} - {rank}"
heap.push((i, rank))
if findFirstN and count == limit * 5:
break
else: # if blank string just take N items based on levenshtien (rev)
for i, x in candidates:
if current != x:
heap.push((i, 300 - current.editDistance(x)))
count = 0
while count < limit and heap.size > 0:
let item = heap.pop
yield item
count.inc

proc scoreMatchesStr(query: string, candidates: openarray[string], limit: int, ispath:bool=true): seq[tuple[i:int, r:int]] {.exportpy.} =
proc scoreMatchesStr(query: string, candidates: openarray[string], current: string, limit: int, ispath:bool=true): seq[tuple[i:int, r:int]] {.exportpy.} =
result = newSeq[tuple[i:int, r:int]](limit)
var idx = 0
if os.existsEnv("FRUZZY_USEALT"):
Expand All @@ -295,7 +300,7 @@ proc scoreMatchesStr(query: string, candidates: openarray[string], limit: int, i
result[idx] = m
idx.inc
else:
for m in fuzzyMatches(query, candidates, limit, ispath):
for m in fuzzyMatches(query, candidates, current, limit, ispath):
result[idx] = m
idx.inc

Expand Down

0 comments on commit d5d4980

Please sign in to comment.