Skip to content

Commit

Permalink
solved issues with any string without quoutes being parsed as span; s…
Browse files Browse the repository at this point in the history
…ee test Regex_without_quoutes in CqpGrammarTest

Change-Id: I704c47704e8a744b427b0082dccb8df0aa1d2c74
Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/7384
Reviewed-by: Nils Diewald <[email protected]>
  • Loading branch information
Akron committed Sep 13, 2023
1 parent 0985a2e commit d964323
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 239 deletions.
25 changes: 12 additions & 13 deletions src/main/antlr/cqp/CQP.g4
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,18 @@ DQUOTE: ('"'|'„'|'“'|'“'|'”');
/* Regular expressions and Regex queries */
fragment RE_symbol : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
| '(' | ')' | '|' | '\\' | '"' | ':' | '\'');
fragment RE_esc : ('\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
| '(' | ')' | '|' | '\\' | ':' | '"' | '\''))| '\'' '\'' | '"' '"';
fragment RE_char : (RE_symbol | RE_esc );
fragment RE_esc : '\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
| '(' | ')' | '|' | '\\' | ':' | '"' | '\'')| '\'' '\'' | '"' '"';
fragment RE_char : RE_symbol | RE_esc ;
fragment RE_alter : ((RE_char | ('(' RE_expr ')') | RE_chgroup) '|' RE_expr )+;

fragment RE_chgroup : '[' RE_char+ ']';
fragment RE_quant : ('.' | RE_char | RE_chgroup | ( '(' RE_expr ')')) ('?' | '*' | '+' | FOCC) QMARK?;
fragment RE_group : '(' RE_expr ')';
fragment RE_expr : ('.' | RE_char | RE_alter | RE_chgroup | RE_quant | RE_group)+;
/* you can search for DQUOTE inside SQUOUTE, and viceversa: '"' or "'"; */
fragment RE_dquote : DQUOTE (RE_expr | '\'' | ':' )* DQUOTE; // DQOUTE is not good, modify like verbatim in PQ+!
fragment RE_squote : SQUOTE (RE_expr | '"' | ':')* SQUOTE;
fragment RE_dquote : DQUOTE (RE_expr | '\'' | ':' )+ DQUOTE; // empty regex are no longer valid
fragment RE_squote : SQUOTE (RE_expr | '"' | ':')+ SQUOTE;



Expand Down Expand Up @@ -219,10 +219,9 @@ token
)
;


span
: skey // for lbound/sbound; check how it works for meet!
| LT ((foundry SLASH)? layer termOp)? skey (( NEG* (LRPAREN term RRPAREN| LRPAREN termGroup RRPAREN | NEG* term | NEG* termGroup))? GT)
spankey: skey; // simple span to be used only with operators (region, lbound,rbound, within, meet)
span:
LT ((foundry SLASH)? layer termOp)? skey (( NEG* (LRPAREN term RRPAREN| LRPAREN termGroup RRPAREN | NEG* term | NEG* termGroup))? GT)
;

closingspan
Expand All @@ -232,7 +231,7 @@ LT '/' ((foundry SLASH)? layer termOp)? skey (( NEG* (LRPAREN term RRPAREN| LR

position
//: POSITION_OP LRPAREN (segment|sequence) COMMA (segment|sequence) RRPAREN
: POSITION_OP LRPAREN span RRPAREN
: POSITION_OP LRPAREN (span|spankey) RRPAREN
;


Expand Down Expand Up @@ -288,7 +287,7 @@ matches: span (sequence | segment) closingspan;

startswith: span (sequence|segment);
endswith: (sequence|segment) closingspan;
region: SLASH REGION_OP LPAREN span RPAREN;
region: SLASH REGION_OP LPAREN (span|spankey) RPAREN;



Expand Down Expand Up @@ -324,7 +323,7 @@ query
;

within
: WITHIN span //WORD
: WITHIN (span|spankey) //WORD
;

/**
Expand Down Expand Up @@ -363,7 +362,7 @@ matching

meetunion
:
(((LRPAREN meetunion RRPAREN) | segment) ((LRPAREN meetunion RRPAREN) | segment) ((NUMBER NUMBER) | span))
(((LRPAREN meetunion RRPAREN) | segment) ((LRPAREN meetunion RRPAREN) | segment) ((NUMBER NUMBER) | span | spankey))
;

/**
Expand Down
Loading

0 comments on commit d964323

Please sign in to comment.