From cdf0f30e755b9ffd84a06481644276a9041d3232 Mon Sep 17 00:00:00 2001 From: Q Date: Sat, 4 Jun 2016 21:09:24 +1000 Subject: [PATCH 1/2] skip any non-ASCII characters in the files, since this seems to break the source code parser --- pycscope/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pycscope/__init__.py b/pycscope/__init__.py index e0adc90..94fb2a9 100755 --- a/pycscope/__init__.py +++ b/pycscope/__init__.py @@ -23,6 +23,19 @@ import getopt, sys, os, string, re import keyword, parser, symbol, token +_re_ascii_filter = '[^%s]' % (re.escape(string.printable), ) + +def ascii_dammit( sourcecode, _re_expr = re.compile( _re_ascii_filter ) ): + """ + just ignore all non-ascii characters + since any identifiers should be ASCII anyway ; + nb: this will work for utf-8 as well + + """ + + result = _re_expr.sub( '', sourcecode ) + return result + class Mark(object): """ Marks, as defined by Cscope, that are implemented. @@ -238,6 +251,7 @@ def parseFile(basepath, relpath, indexbuff, indexbuff_len, fnamesbuff, dump=Fals # Add path info to any syntax errors in the source files if filecontents: try: + filecontents = ascii_dammit( filecontents ) indexbuff_len = parseSource(filecontents, indexbuff, indexbuff_len, dump) except (SyntaxError, AssertionError) as e: e.filename = fullpath From 50e42f9551e33ceb895bb9f575ae174ef224bda1 Mon Sep 17 00:00:00 2001 From: Q Date: Sat, 4 Jun 2016 22:56:35 +1000 Subject: [PATCH 2/2] tell the user what file let us down when we fail --- pycscope/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pycscope/__init__.py b/pycscope/__init__.py index 94fb2a9..642ea88 100755 --- a/pycscope/__init__.py +++ b/pycscope/__init__.py @@ -256,6 +256,11 @@ def parseFile(basepath, relpath, indexbuff, indexbuff_len, fnamesbuff, dump=Fals except (SyntaxError, AssertionError) as e: e.filename = fullpath raise e + except Exception as e: + # debug a fatal exception: + e.filename = fullpath + print( "pycscope.py: %s in %s" % ( e, repr(fullpath) ) ) + raise e return indexbuff_len