-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9d222e6
commit fd46275
Showing
2 changed files
with
223 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
bl | ||
fr | ||
IUCN | ||
bg | ||
bh | ||
c:a | ||
cd | ||
d:r | ||
dr | ||
iaf | ||
iofs | ||
ISBN | ||
IVA | ||
jbo | ||
jb/o | ||
k:a | ||
kbfd | ||
KBM | ||
KFM | ||
mc | ||
mm | ||
mnkr | ||
Mkr | ||
msk | ||
mtp | ||
n/a | ||
N:o | ||
nr | ||
N:s | ||
obs | ||
omm | ||
oms | ||
pg | ||
pua | ||
RC | ||
R:dr | ||
Rdr | ||
R:gs | ||
RIP | ||
s:a | ||
SEK | ||
Sk:pund | ||
Sk | ||
sms | ||
sr | ||
ss | ||
ssk | ||
S:t | ||
S:ta | ||
tfn | ||
tgm | ||
tjf | ||
tjl | ||
tjm | ||
tkr | ||
t:r | ||
trpt | ||
trsp | ||
VAB | ||
vd | ||
vpl | ||
C/o | ||
OSA | ||
ZIP code | ||
Ftf | ||
AA | ||
AB | ||
AF | ||
ASEA | ||
ASEAN | ||
CSA | ||
CSA | ||
CSN | ||
DACO | ||
DAFA | ||
DCRI | ||
EG | ||
EMU | ||
FK | ||
FRA | ||
FN | ||
JK | ||
JO | ||
JämO | ||
KDU | ||
KF | ||
KO | ||
Komintern | ||
LO | ||
MUF | ||
NATO | ||
NF | ||
NBV | ||
NO | ||
OD | ||
OK | ||
PO | ||
RFoD | ||
RFSL | ||
RFSU | ||
ROKS | ||
SACO | ||
SAP | ||
SCB | ||
SEB | ||
SIFO | ||
SKF | ||
SKL | ||
SKR | ||
SKTF | ||
SR | ||
SSAB | ||
SSG | ||
SSR | ||
SSRS | ||
SSU | ||
TCO | ||
TRV | ||
Unesco | ||
Unicef | ||
WHO | ||
WWF | ||
sic | ||
AC | ||
AM | ||
ATB | ||
bps | ||
DC | ||
DIN | ||
DPF | ||
DSG | ||
DVD | ||
EAN | ||
EGR | ||
FM | ||
HF | ||
IF | ||
ISO | ||
KV | ||
LV | ||
LW | ||
MF | ||
MV | ||
MW | ||
MMS | ||
NOM | ||
PC | ||
PCM | ||
PWM | ||
RAM | ||
ROM | ||
rpm | ||
SQR | ||
SQRT | ||
SSB | ||
SW | ||
TDI | ||
UHF | ||
UKV | ||
VHF | ||
VLF | ||
DM | ||
DNF | ||
DNS | ||
efl | ||
FM | ||
GM | ||
GP | ||
GS | ||
GS | ||
IEM | ||
IVM | ||
JSM | ||
KO | ||
MMA | ||
MTB | ||
NM | ||
TKO | ||
USM | ||
VC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
min_trimmed_length = 3 | ||
min_word_count = 2 | ||
max_word_count = 14 | ||
min_characters = 2 | ||
may_end_with_colon = false | ||
quote_start_with_letter = true | ||
needs_punctuation_end = true | ||
needs_letter_start = true | ||
needs_uppercase_start = true | ||
# This should cover most common Swedish words | ||
allowed_symbols_regex = "[a-zåäöA-ZÅÄÖé,.?!: ]" | ||
broken_whitespace = [" ", " ,", " .", " ?", " !", " ;"] | ||
|
||
# No roman numerals (common in texts about Swedish kings etc., but hard to read out) | ||
# The :s is for genitive constructions (Karl XII:s) | ||
other_patterns = ["[MDCLXVI]+(:s)?"] | ||
|
||
# We don’t allow quotes or parenthesis symbols. No other use for matching | ||
# (Swedish quotes are actually opening "”" and closing "”", so impossible to | ||
# match anyway) | ||
# matching_symbols = [ ] | ||
|
||
#expand some commmon abbreviations that will probably be reasonable and unambigous to speak out | ||
replacements = [ | ||
["bl.a.", "bland annat"], | ||
["ca.", "cirka"], | ||
["d.v.s.", "det vill säga"], | ||
["fr.o.m.", "från och med"], | ||
["i.o.m.", "i och med"], | ||
["m.m.", "med mera"], | ||
["m.fl.", "med flera"], | ||
["o.d.", "och dylikt"], | ||
["o.dyl.", "och dylikt"], | ||
["o.s.v.", "och så vidare"], | ||
["p.g.a.", "på grund av"], | ||
["resp.", "respektive"], | ||
["t.ex.", "till exempel"], | ||
["t.o.m.", "till och med"], | ||
] | ||
|
||
# Filter out lots of other abbreviations | ||
# Taken from list: https://sv.wikipedia.org/wiki/Lista_%C3%B6ver_f%C3%B6rkortningar | ||
abbreviation_patterns = [ "art\\.|bl\\.a\\.|B\\.V\\.|civ\\.ek\\.|civ\\.ing\\.|doc\\.|d\\.v\\.s\\.|d\\.y\\.|d\\.ä\\.|ekon\\.|farm\\.|f\\.d\\.|fig\\.|fil\\.|fr\\.o\\.m\\.|Ibid\\.|ib\\.|i\\.o\\.m\\.|i\\.s\\.f\\.|lb\\.|lic\\.|lisp\\.|mag\\.|med\\.|m\\.h\\.p\\.p\\.|min\\.|m\\.m\\.|m\\.m\\.d\\.|mom\\.|m\\.v\\.h\\.|möjl\\.|m ö\\.h\\.|n\\.b\\.|näml\\.|nästk\\.|obs\\.|o\\.d\\.|odont\\.|o\\.dyl\\.|o\\.k\\.s\\.|omkr\\.|o\\.m\\.s\\.|op\\.|ordf\\.|org\\.nr|o\\.s\\.v\\.|pers\\.|p\\.g\\.a\\.|pol\\.|prel\\.|prof\\.|prov\\.|rc\\.|ref\\.|resp\\.|R\\.I\\.P\\.|rst\\.|s\\.a\\.s\\.|sek\\.|sekr\\.|s\\.g\\.s\\.|sid\\.|sign\\.|sistl\\.|s\\.k\\.|sk\\.|skepp\\.|skålp\\.|s\\.m\\.|s\\.m\\.s\\.|sp\\.|spec\\.|s\\.st\\.|st\\.|St\\.|stud\\.|särsk\\.|tab\\.|tekn\\.|tel\\.|temp\\.|teol\\.|t\\.ex\\.|tf\\.|t\\.h\\.|tim\\.|t\\.o\\.m\\.|trol\\.|t\\.v\\.|ung\\.|u\\.a\\.|u\\.f\\.a\\.|u\\.p\\.a\\.|urspr\\.|usk\\.|utg\\.|å\\.k\\.|äv\\.|ö\\.a\\.|övers\\.anm\\.|ö\\.h\\.|ö\\.h\\.t\\.|ök\\.|övers\\.|att\\.|Avs\\.|b\\.v\\.|D\\.S\\.|n\\.b\\.|o\\.s\\.a\\.|P\\.P\\.S\\.|P\\.S\\.|tr\\.|ö\\.g\\.|A\\.D\\.|e\\.Kr\\.|e\\.v\\.t\\.|g\\.s\\.|mån\\.|s\\.å\\.|civ\\.ek\\.|civ\\.ing\\.|ekon\\.dr|ekon\\.mag\\.|ekon\\.kand\\.|fil\\.dr|fil\\.lic\\.|fil\\.kand|fil\\.mag|jur\\.kand\\.|jur\\.utr\\.kand\\.|jur\\.lic\\.|jur\\.dr|med\\.dr|med\\.lic\\.|med\\.kand\\.|odont\\.kand\\.|odont\\.lic\\.|odont\\.dr|pol\\.kand\\.|pol\\.mag\\.|pol\\.dr|tekn\\.dr|tekn\\.lic\\.|teol\\.kand\\.|teol\\.lic\\.|teol\\.dr|a\\.a\\.|e\\.g\\.|e\\.o\\.|ibid\\.|id\\.|i\\.e\\.|L\\.s\\.|l\\.s\\.|m\\.p\\.|N\\.N\\.|op\\.cit\\.|alban\\.|arab\\.|aram\\.|armen\\.|assyr\\.|avest\\.|babyl\\.|bret\\.|bulg\\.|dan\\.|egypt\\.|eng\\.|est\\.|fa\\.|fd\\.|fenic\\.|fe\\.|feng\\.|ffrans\\.|ffris\\.|fgutn\\.|fht\\.|fi\\.|fin\\.|find\\.|flfran\\.|fnor\\.|fpers\\.|fpreuss\\.|fr\\.|frans\\.|frank\\.|fris\\.|fsax\\.|fslav\\.|fsv\\.|fvn\\.|fär\\.|gael\\.|gall\\.|georg\\.|got\\.|gr\\.|grek\\.|hebr\\.|hett\\.|hind\\.|hindost\\.|holl\\.|ieur\\.|ir\\.|irl\\.|isl\\.|it\\.|ital\\.|jap\\.|kelt\\.|kines\\.|korn\\.|kymr\\.|kyrkslav\\.|lat\\.|lap\\.|lapp\\.|lett\\.|lit\\.|lt\\.|mag\\.|meng\\.|mfr\\.|mholl\\.|mht\\.|mlat\\.|mlt\\.|mnl\\.|mnt\\.|moes\\.|nfris\\.|nht\\.|nl\\.|no\\.|nor\\.|ns\\.|nt\\.|nyfris\\.|nygr\\.|nyheb\\.|nyisl\\.|nysv\\.|osset\\.|osk\\.|per\\.|pers\\.|pol\\.|port\\.|prt\\.|pt\\.|provenç\\.|prov\\.|rundan\\.|runsv\\.|ry\\.|ryss\\.|sam\\.|sard\\.|skr\\.|slav\\.|semit\\.|sengr\\.|senlat\\.|shet\\.|slovak\\.|sloven\\.|slov\\.|sorb\\.|sp\\.|span\\.|sv\\.|syr\\.|tam\\.|tjeck\\.|tochar\\.|ty\\.|ung\\.|urgerm\\.|urn\\.|wal\\.|vall\\.|wogul\\.|zig\\.|æthiop\\.|disk\\.|s\\.d\\.|w\\.o\\." ] |