Skip to content

Commit

Permalink
Closes #1420: Integrate the HFRI project mining
Browse files Browse the repository at this point in the history
Resolve false positives hfri.
  • Loading branch information
marekhorst committed Oct 25, 2023
1 parent d45f981 commit d4c3784
Showing 1 changed file with 4 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ hidden var 'nihposfull' from select jmergeregexp(jgroup(word)) from (select * fr
hidden var 'nihpositives' from select jmergeregexp(jgroup(word)) from (select * from nihpositives order by length(word) desc);
hidden var 'nihnegatives' from select jmergeregexp(jgroup(word)) from (select * from nihnegatives order by length(word) desc);
hidden var 'hfripos' from select "(?:innovation project)|(?:multigold numbered)|(?:fellowship number)|(?:grant fellowship)|(?:innovation grant)|(?:scholarship code)|(?:technology gsrt)|(?:project number)|(?:faculty grant)|(?:hfri project)|(?:elidek grant)|(?:agreement no)|(?:funded grant)|(?:project no)|(?:hfri grant)|(?:gsrt grant)|(?:hfri fm17)|(?:hfri code)|(?:grant no)|(?:grant ga)|(?:ga hfri)";
hidden var 'hfrineg' from select "(?:\bekt\b)|(?:eliamep)|(?:\bforth\b)|(?:\bi.k.a.\b)|(?:\bipep\b)";
hidden var 'miur_unidentified' from select id from grants where fundingclass1="MIUR" and grantid="unidentified" limit 1;
hidden var 'wt_unidentified' from select id from grants where fundingclass1="WT" and grantid="unidentified" limit 1;
hidden var 'gsri_unidentified' from select id from grants where fundingclass1="GSRI" and grantid="unidentified" limit 1;
Expand All @@ -32,8 +33,8 @@ select id, grantid, gid, jmergeregexp(terms) as terms, jlen(terms) as lt from
where fundingclass1 = "INCa" and gid is not null));

create temp table hfri_unidentified_only as select docid, var('hfri_unidentified') as id, prev, middle, next from (setschema 'docid,prev,middle,next'
select c1 as docid, textwindow2s(c2, 10,2,10, "\bHFRI\b|Hellenic Foundation|Greek Foundation|ΕΛΙΔΕΚ|Ελληνικό Ίδρυμα") from ((setschema 'c1,c2' select * from pubs where c2 is not null)))
where var('hfri_unidentified') and lower(j2s(prev,middle,next)) not like "%himalayan%" and regexprmatches("gsrt|greek|hellenic|innovation|research|grant|greece",lower(j2s(prev,middle,next)));
select c1 as docid, textwindow2s(lower(c2), 10,2,10, "\bhfri\b|hellenic foundation|greek foundation|ελιδεκ|ελληνικο ιδρυμα") from ((setschema 'c1,c2' select * from pubs where c2 is not null)))
where var('hfri_unidentified') and lower(j2s(prev,middle,next)) not like "%himalayan%" and not regexprmatches(var('hfrineg'), lower(j2s(prev,middle,next))) and regexprmatches("gsrt|innovation|research|grant|greece",lower(j2s(prev,middle,next)));


create temp table output_hfri as
Expand Down Expand Up @@ -371,4 +372,4 @@ select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'text
union all
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" "||middle||" "||next) from matched_undefined_gsri
union all
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" "||middle||" "||next) from (select * from hfri_unidentified_only group by docid);
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" "||middle||" "||next) from (select * from hfri_unidentified_only group by docid);

0 comments on commit d4c3784

Please sign in to comment.