Skip to content

Commit

Permalink
Clean up and make Swedish abbreviation check more efficient
Browse files Browse the repository at this point in the history
  • Loading branch information
andersjohansson committed Jul 31, 2020
1 parent 429e2b4 commit 13b735b
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/rules/sv.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ replacements = [

# Filter out lots of other abbreviations
# Taken from list: https://sv.wikipedia.org/wiki/Lista_%C3%B6ver_f%C3%B6rkortningar
abbreviation_patterns = [ "art\\.|bl\\.a\\.|B\\.V\\.|civ\\.ek\\.|civ\\.ing\\.|doc\\.|d\\.v\\.s\\.|d\\.y\\.|d\\.ä\\.|ekon\\.|farm\\.|f\\.d\\.|fig\\.|fil\\.|fr\\.o\\.m\\.|Ibid\\.|ib\\.|i\\.o\\.m\\.|i\\.s\\.f\\.|lb\\.|lic\\.|lisp\\.|mag\\.|med\\.|m\\.h\\.p\\.p\\.|min\\.|m\\.m\\.|m\\.m\\.d\\.|mom\\.|m\\.v\\.h\\.|möjl\\.|m ö\\.h\\.|n\\.b\\.|näml\\.|nästk\\.|obs\\.|o\\.d\\.|odont\\.|o\\.dyl\\.|o\\.k\\.s\\.|omkr\\.|o\\.m\\.s\\.|op\\.|ordf\\.|org\\.nr|o\\.s\\.v\\.|pers\\.|p\\.g\\.a\\.|pol\\.|prel\\.|prof\\.|prov\\.|rc\\.|ref\\.|resp\\.|R\\.I\\.P\\.|rst\\.|s\\.a\\.s\\.|sek\\.|sekr\\.|s\\.g\\.s\\.|sid\\.|sign\\.|sistl\\.|s\\.k\\.|sk\\.|skepp\\.|skålp\\.|s\\.m\\.|s\\.m\\.s\\.|sp\\.|spec\\.|s\\.st\\.|st\\.|St\\.|stud\\.|särsk\\.|tab\\.|tekn\\.|tel\\.|temp\\.|teol\\.|t\\.ex\\.|tf\\.|t\\.h\\.|tim\\.|t\\.o\\.m\\.|trol\\.|t\\.v\\.|ung\\.|u\\.a\\.|u\\.f\\.a\\.|u\\.p\\.a\\.|urspr\\.|usk\\.|utg\\.|å\\.k\\.|äv\\.|ö\\.a\\.|övers\\.anm\\.|ö\\.h\\.|ö\\.h\\.t\\.|ök\\.|övers\\.|att\\.|Avs\\.|b\\.v\\.|D\\.S\\.|n\\.b\\.|o\\.s\\.a\\.|P\\.P\\.S\\.|P\\.S\\.|tr\\.|ö\\.g\\.|A\\.D\\.|e\\.Kr\\.|e\\.v\\.t\\.|g\\.s\\.|mån\\.|s\\.å\\.|civ\\.ek\\.|civ\\.ing\\.|ekon\\.dr|ekon\\.mag\\.|ekon\\.kand\\.|fil\\.dr|fil\\.lic\\.|fil\\.kand|fil\\.mag|jur\\.kand\\.|jur\\.utr\\.kand\\.|jur\\.lic\\.|jur\\.dr|med\\.dr|med\\.lic\\.|med\\.kand\\.|odont\\.kand\\.|odont\\.lic\\.|odont\\.dr|pol\\.kand\\.|pol\\.mag\\.|pol\\.dr|tekn\\.dr|tekn\\.lic\\.|teol\\.kand\\.|teol\\.lic\\.|teol\\.dr|a\\.a\\.|e\\.g\\.|e\\.o\\.|ibid\\.|id\\.|i\\.e\\.|L\\.s\\.|l\\.s\\.|m\\.p\\.|N\\.N\\.|op\\.cit\\.|alban\\.|arab\\.|aram\\.|armen\\.|assyr\\.|avest\\.|babyl\\.|bret\\.|bulg\\.|dan\\.|egypt\\.|eng\\.|est\\.|fa\\.|fd\\.|fenic\\.|fe\\.|feng\\.|ffrans\\.|ffris\\.|fgutn\\.|fht\\.|fi\\.|fin\\.|find\\.|flfran\\.|fnor\\.|fpers\\.|fpreuss\\.|fr\\.|frans\\.|frank\\.|fris\\.|fsax\\.|fslav\\.|fsv\\.|fvn\\.|fär\\.|gael\\.|gall\\.|georg\\.|got\\.|gr\\.|grek\\.|hebr\\.|hett\\.|hind\\.|hindost\\.|holl\\.|ieur\\.|ir\\.|irl\\.|isl\\.|it\\.|ital\\.|jap\\.|kelt\\.|kines\\.|korn\\.|kymr\\.|kyrkslav\\.|lat\\.|lap\\.|lapp\\.|lett\\.|lit\\.|lt\\.|mag\\.|meng\\.|mfr\\.|mholl\\.|mht\\.|mlat\\.|mlt\\.|mnl\\.|mnt\\.|moes\\.|nfris\\.|nht\\.|nl\\.|no\\.|nor\\.|ns\\.|nt\\.|nyfris\\.|nygr\\.|nyheb\\.|nyisl\\.|nysv\\.|osset\\.|osk\\.|per\\.|pers\\.|pol\\.|port\\.|prt\\.|pt\\.|provenç\\.|prov\\.|rundan\\.|runsv\\.|ry\\.|ryss\\.|sam\\.|sard\\.|skr\\.|slav\\.|semit\\.|sengr\\.|senlat\\.|shet\\.|slovak\\.|sloven\\.|slov\\.|sorb\\.|sp\\.|span\\.|sv\\.|syr\\.|tam\\.|tjeck\\.|tochar\\.|ty\\.|ung\\.|urgerm\\.|urn\\.|wal\\.|vall\\.|wogul\\.|zig\\.|æthiop\\.|disk\\.|s\\.d\\.|w\\.o\\." ]
# Cleaned up and prioritized more common ones and finally compiled using emacs’ regexp-opt
abbreviation_patterns = [
"((A\\.D|B\\.V|D\\.S|N\\.N|P\\.S|R\\.I\\.P|St|a(\\.a|lban|r(ab|men|t)|tt)|d(\\.[yä]|an|isk|oc)|e(\\.(Kr|v\\.t|[go])|kon|ng|st)|f(\\.d|arm|i[gln]|r((an|i)s)|sv|[deir])|g(\\.s|ot|r(ek)?)|h(e(br|tt)|oll)|i(\\.e|bid|(s|ta)l|[bdrt])|j(ap|ur)|korn|l(a(pp|t)|ett|i[ct])|m( ö\\.h|ag|ed|in|om|ån)|n[lo]|o(mkr|p|rdf)|p(ers|o(l|rt)|ro[fv]|t)|r(ef|undan|y(ss)?)|s(\\.(a\\.s|[kå])|e(kr?|nlat)|id|kepp|lav|pec|tud|[kptv])|t(\\.[hv]|a[bm]|e(kn|ol)|im|rol|[fry])|u(\\.p\\.a|ng|rspr|tg)|vall|w\\.o|äv|ö(\\.h|vers))\\.)"
]

0 comments on commit 13b735b

Please sign in to comment.