From f568a00c07105ef90b1c4bfd562db625cfa38ace Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 4 Dec 2024 10:34:47 +0100 Subject: [PATCH 1/2] Revert "Revert "Merge pull request #1692 from CentreForDigitalHumanities/feature/gallica"" This reverts commit bc3194a5b246972eab4e459abecb4901ceea22f9. --- backend/addcorpus/python_corpora/corpus.py | 17 ++ backend/addcorpus/validation/indexing.py | 27 +-- backend/corpora/gallica/conftest.py | 35 ++++ backend/corpora/gallica/figaro.py | 55 ++++++ backend/corpora/gallica/gallica.py | 162 ++++++++++++++++++ backend/corpora/gallica/images/figaro.jpg | Bin 0 -> 32859 bytes backend/corpora/gallica/tests/__init__.py | 0 .../gallica/tests/data/figaro/Issues.xml | 4 + .../gallica/tests/data/figaro/OAIRecord.xml | 62 +++++++ .../gallica/tests/data/figaro/RoughText.html | 2 + .../gallica/tests/data/figaro/Years.xml | 102 +++++++++++ backend/corpora/gallica/tests/test_import.py | 33 ++++ backend/requirements.txt | 6 +- 13 files changed, 493 insertions(+), 12 deletions(-) create mode 100644 backend/corpora/gallica/conftest.py create mode 100644 backend/corpora/gallica/figaro.py create mode 100644 backend/corpora/gallica/gallica.py create mode 100644 backend/corpora/gallica/images/figaro.jpg create mode 100644 backend/corpora/gallica/tests/__init__.py create mode 100644 backend/corpora/gallica/tests/data/figaro/Issues.xml create mode 100644 backend/corpora/gallica/tests/data/figaro/OAIRecord.xml create mode 100644 backend/corpora/gallica/tests/data/figaro/RoughText.html create mode 100644 backend/corpora/gallica/tests/data/figaro/Years.xml create mode 100644 backend/corpora/gallica/tests/test_import.py diff --git a/backend/addcorpus/python_corpora/corpus.py b/backend/addcorpus/python_corpora/corpus.py index 3b0e2594a..220a427bc 100644 --- a/backend/addcorpus/python_corpora/corpus.py +++ b/backend/addcorpus/python_corpora/corpus.py @@ -79,6 +79,23 @@ def category(self): ''' raise NotImplementedError('CorpusDefinition missing category') + ''' + Directory where source data is located + If neither `data_directory` nor `data_url` is set to valid paths, this corpus cannot be indexed + ''' + data_directory = None + + ''' + URL where source data is located + If neither `data_directory` nor `data_url` is set to valid paths, this corpus cannot be indexed + ''' + data_url = None + + ''' + If connecting to the data URL requires and API key, it needs to be set here + ''' + data_api_key = None + @property def es_index(self): ''' diff --git a/backend/addcorpus/validation/indexing.py b/backend/addcorpus/validation/indexing.py index a3763ef7b..92d0c89c2 100644 --- a/backend/addcorpus/validation/indexing.py +++ b/backend/addcorpus/validation/indexing.py @@ -1,9 +1,9 @@ ''' This module defines functions to check if a corpus is ready for indexing. ''' - -import warnings import os +import requests +import warnings from addcorpus.validation.creation import primary_mapping_type @@ -87,12 +87,19 @@ def validate_has_data_directory(corpus): return config = corpus.configuration - if not config.data_directory: - raise CorpusNotIndexableError( - 'Missing data directory' - ) + if not config.data_directory and not config.data_url: + raise CorpusNotIndexableError('Missing data directory or url') - if not os.path.isdir(config.data_directory): - raise CorpusNotIndexableError( - 'Configured data directory does not exist.' - ) + if corpus.data_dircetory and not os.path.isdir(config.data_directory): + raise CorpusNotIndexableError('Configured data directory does not exist.') + + if corpus.data_url: + headers = {} + if corpus.data_api_key: + headers = {"Authorization": f"Token {corpus.data_api_key}"} + try: + requests.get(corpus.data_url, headers=headers) + except ConnectionError: + raise CorpusNotIndexableError( + 'Cannot connect to the configured data url. Do you need to provide an API key?' + ) diff --git a/backend/corpora/gallica/conftest.py b/backend/corpora/gallica/conftest.py new file mode 100644 index 000000000..e1d7e2fef --- /dev/null +++ b/backend/corpora/gallica/conftest.py @@ -0,0 +1,35 @@ +import os + +import pytest + +here = os.path.abspath(os.path.dirname(__file__)) + + +@pytest.fixture() +def gallica_corpus_settings(settings): + settings.CORPORA = { + "figaro": os.path.join(here, "figaro.py"), + } + + +class MockResponse(object): + def __init__(self, filepath): + self.mock_content_file = filepath + + @property + def content(self): + with open(self.mock_content_file, "r") as f: + return f.read() + + +def mock_response(url: str) -> MockResponse: + if url.endswith("date"): + filename = os.path.join(here, "tests", "data", "figaro", "Years.xml") + elif "&" in url: + filename = os.path.join(here, "tests", "data", "figaro", "Issues.xml") + elif "?" in url: + filename = os.path.join(here, "tests", "data", "figaro", "OAIRecord.xml") + elif url.endswith("texteBrut"): + filename = os.path.join(here, "tests", "data", "figaro", "RoughText.html") + return MockResponse(filename) + diff --git a/backend/corpora/gallica/figaro.py b/backend/corpora/gallica/figaro.py new file mode 100644 index 000000000..32ac4347b --- /dev/null +++ b/backend/corpora/gallica/figaro.py @@ -0,0 +1,55 @@ +from datetime import datetime +from typing import Union + +from django.conf import settings +from ianalyzer_readers.xml_tag import Tag +from ianalyzer_readers.extract import XML + +from addcorpus.python_corpora.corpus import FieldDefinition +from addcorpus.es_mappings import ( + keyword_mapping, +) + +from corpora.gallica.gallica import Gallica + + +def join_issue_strings(issue_description: Union[list[str], None]) -> Union[str, None]: + if issue_description: + return "".join(issue_description[:2]) + + +class Figaro(Gallica): + title = "Le Figaro" + description = "Newspaper archive, 1854-1953" + min_date = datetime(year=1854, month=1, day=1) + max_date = datetime(year=1953, month=12, day=31) + corpus_id = "cb34355551z" + category = "periodical" + es_index = getattr(settings, 'FIGARO_INDEX', 'figaro') + image = "figaro.jpg" + + contributor = FieldDefinition( + name="contributor", + description="Persons who contributed to this issue", + es_mapping=keyword_mapping(enable_full_text_search=True), + extractor=XML(Tag("dc:contributor"), multiple=True), + ) + + issue = FieldDefinition( + name="issue", + description="Issue description", + es_mapping=keyword_mapping(), + extractor=XML( + Tag("dc:description"), multiple=True, transform=join_issue_strings + ), + ) + + def __init__(self): + self.fields = [ + self.content(), + self.contributor, + self.date(self.min_date, self.max_date), + self.identifier(), + self.issue, + self.url(), + ] diff --git a/backend/corpora/gallica/gallica.py b/backend/corpora/gallica/gallica.py new file mode 100644 index 000000000..fa52cce58 --- /dev/null +++ b/backend/corpora/gallica/gallica.py @@ -0,0 +1,162 @@ +from datetime import datetime +import logging +from time import sleep + +from bs4 import BeautifulSoup +from ianalyzer_readers.xml_tag import Tag +from ianalyzer_readers.extract import Metadata, XML +import requests + +from addcorpus.python_corpora.corpus import XMLCorpusDefinition +from addcorpus.python_corpora.corpus import FieldDefinition +from addcorpus.python_corpora.filters import DateFilter +from addcorpus.es_mappings import ( + keyword_mapping, + date_mapping, + main_content_mapping, +) +from addcorpus.es_settings import es_settings + +logger = logging.getLogger('indexing') + +def get_content(content: BeautifulSoup) -> str: + """Return text content in the parsed HTML file from the `texteBrut` request + This is contained in the first

element after the first


element. + """ + text_nodes = content.find("hr").find_next_siblings("p") + return "".join([node.get_text() for node in text_nodes]) + + +def get_publication_id(identifier: str) -> str: + try: + return identifier.split("/")[-1] + except: + return None + + +class Gallica(XMLCorpusDefinition): + + languages = ["fr"] + data_url = "https://gallica.bnf.fr" + corpus_id = "" # each corpus on Gallica has an "ark" id + + @property + def es_settings(self): + return es_settings( + self.languages[:1], stopword_analysis=True, stemming_analysis=True + ) + + def sources(self, start: datetime, end: datetime): + # obtain list of ark numbers + response = requests.get( + f"{self.data_url}/services/Issues?ark=ark:/12148/{self.corpus_id}/date" + ) + year_soup = BeautifulSoup(response.content, "xml") + years = [ + year.string + for year in year_soup.find_all("year") + if int(year.string) >= start.year and int(year.string) <= end.year + ] + for year in years: + try: + response = requests.get( + f"{self.data_url}/services/Issues?ark=ark:/12148/{self.corpus_id}/date&date={year}" + ) + ark_soup = BeautifulSoup(response.content, "xml") + ark_numbers = [ + issue_tag["ark"] for issue_tag in ark_soup.find_all("issue") + ] + sleep(2) + except ConnectionError: + logger.warning(f"Connection error when processing year {year}") + break + + for ark in ark_numbers: + try: + source_response = requests.get( + f"{self.data_url}/services/OAIRecord?ark={ark}" + ) + sleep(2) + except ConnectionError: + logger.warning(f"Connection error encountered in issue {ark}") + break + + if source_response: + try: + content_response = requests.get( + f"{self.data_url}/ark:/12148/{ark}.texteBrut" + ) + sleep(10) + except ConnectionError: + logger.warning( + f"Connection error when fetching full text of issue {ark}" + ) + parsed_content = BeautifulSoup( + content_response.content, "lxml-html" + ) + yield ( + source_response.content, + {"content": parsed_content}, + ) + + def content(self): + return FieldDefinition( + name="content", + description="Content of publication", + display_name="Content", + display_type="text_content", + es_mapping=main_content_mapping( + token_counts=True, + stopword_analysis=True, + stemming_analysis=True, + language=self.languages[0], + ), + extractor=Metadata("content", transform=get_content), + ) + + def date(self, min_date: datetime, max_date: datetime): + return FieldDefinition( + name="date", + display_name="Date", + description="The date of the publication.", + es_mapping=date_mapping(), + extractor=XML( + Tag("dc:date"), + ), + results_overview=True, + search_filter=DateFilter( + min_date, max_date, description="Search only within this time range." + ), + visualizations=["resultscount", "termfrequency"], + csv_core=True, + ) + + def identifier(self): + return FieldDefinition( + name="id", + display_name="Publication ID", + description="Identifier of the publication on Gallica", + es_mapping=keyword_mapping(), + extractor=XML(Tag("dc:identifier"), transform=get_publication_id), + csv_core=True, + ) + + def url(self): + return FieldDefinition( + name="url", + display_name="Source URL", + display_type="url", + description="URL to scan on Gallica", + es_mapping=keyword_mapping(), + extractor=XML(Tag("dc:identifier")), + searchable=False, + ) + + # define fields property so it can be set in __init__ + @property + def fields(self): + return self._fields + + @fields.setter + def fields(self, value): + self._fields = value diff --git a/backend/corpora/gallica/images/figaro.jpg b/backend/corpora/gallica/images/figaro.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a59554cae1183aee252ce5d99d6168778e222411 GIT binary patch literal 32859 zcmb4qbyQSe{O!Qd(xF4gfWQDsDlv5DP(w&aceiv4NDd+3(B0CdNJ)p%Fi1FrfPi$v zM zJnTKa?5){7nUtN)ZLK|-UNXse*jw3JJDa=wpCG^k3Lp!>0s{ZLuz*jQp0Ua5goEUszq0g*NS%6&h~~d^|KDT)kMUq&18^Rn36MSJ;lE@a4Fmtb zTwq`V$bih`SbWkHEL!}JN!0%f>-lMc4Hl5m_DCsMe^>$#0x=#-0VD%R0=92nM}3~3 z?=ap>L}aMMTH+KI4>RGpNpdjVmxYo9(LMmyzc)n1ANO2d&KX0s1TdQZ;icjt@5BcO zxzDW4x{T$-tbD6ezP;Kn^2C&o;43r=pZ**FRI3$I78Y@SSo^Fh`Rb#))^8r^pDkwT zI5d0>hQ8b~Zv}=yYviK{54>{#*@Soa>GxQ>MF+cHwd3{oX|!(aVjeiUmednjg$WKh zi)hAAGx!6Ax{A%13S(VGxr>AMA-V5U^8{v!K8GEcuuZ>O`xa(Um9>bez}XO2#GLkY zBV_q`9{k})>)pK0x$4pjPWY%rh`1eE{i5j>$< zL(5i9kLj)QNy;<2NS2*VW3XQEiuuK!JyjuN*tNTG)l|4lvrJ8zmsx@MIaI7?r>juU zy=GSUOA*mc_b=#l)kw`e3fjUv3830I$|0yw`kgB&_FN&$z`ULAXmF=8qmn7rgWtgZ zy}I7)my{Yfeg9NK)6IvM*~5v%d|^iw4W~TTrL*2O?I4?5`WDFLj0{1?NZ4(m^cm$4 z_Iuph%_$X_>@S-nat+5a<<}(cddf@zq-CpjUPqo$T>kujlRuy6jjg<6-2}WJmCpV3 z-n~{r?TRY#%qn=gG!q_r8VcmpXHc_~8p%Gv^&+=sD&{Z8JCWD(d-yf|xJv`D`bv z*2sHqvS~3`kK|jF>2r&0!DO%tHOT4ynq+pfuF|zIk31g+=y2a)(18zh4votz`G8M> zG}lGlVo#>>ddJ~daqfMaZ?~y899HdG1S882V1e=HGm!EH-5!1gf&#ITdmF~nn?T19 z{p|aS^)Rlc@||1ECMffg${0StCQ(Wnbj5_$+Px)ilG7X%vKeM1Skzw@LATPOb4*of zK(_O0wwag<#md(8PTcLj|%*kx@mrI$vG~XHPx=ru#M8%Id;woXm@gw8=ad z5>|sf4^L;$oLbrVDM(68d7rgi(l$A)y4vluWTH=NXBYTV^6Kt`ulZ)S7|gB>Ce1|< zl7Ck@bE!&d%2?H;)Bnu>z_{&^o}BM${(hIoS$L|am!4k2Q7@}Z5q)Xaj|{3$sV#Z~ zQd@Bt%))osa4$%3fC*Vy(2fq7_R(LVjjqR&%SCXUr!WDG}@g%NU8vw(MQ0hq+H>U9|U~&!vEh z@MJ+F5eW-xX7N@@LEcMVN$59ThLlQ}^~o>GNdg&DCz1or8WnBomsjG1P(0u;X6obj zRjh&A(^c+g8U{SWpUB}eZ@aVx)dMS(W$`7Sz04G0O5#N$9Y-P~MvkYRvzALNn_b*@ zq|ER)tN)Er9oC|@7#or4bQu-_wtAk<$Eq{EC#QmFokwk4K!c7-sz?jhf3VI((h~0G zBNW}F2XODsYD@9KI(ct1yN?wYc1@s0v3SLN&Si8Yqew>+SgoL*!YQ4<;&pJ|wBpbh z0aKP$f)!r_M!RH4-RWGaux3Gwc|7tYciYG@%PNc2IGT>6R``cQ>=WHrf3ohb+$wS} zX*>=%n*}P`wDnZe9_yOl+tp&^qpoF(uS%ePwWErMsx4ji@XoZFn!;;NT=%kl?J^j|n00R9^!De-pS z31?=-x#sYptI@aOrHrpR3}uvh@VmDYHE>ypUT~{FQ}GrZaI^fP=kwNG6M4TRBqTE` zJ+IlTR94y4n+YJsDn6>UkG$y%8CSp~&3skJnz)UOA&hP0H-Xcr$j zr;Y(aUsY_J^%j3u|{cOp2n0doJgpF3`BR`<9J-H zD=myX%6qyG&*tXKbRhhj1^~ZprXa!Kj^*U(Sg_5;dRM?7D@p{vy0CHWJCW{v)cF+c zS4HA^W&+R84W#ALpd}B5hSH`{O~f!uBz12i!!~l)V|wH#SGD72Bk2EQ~f%-IKrGR9o5_`^8x7 zDS4hX?mVpa?CU8^B)e{*EcI92Xwf;G(i&qFcU#u0c%tg@$>2UM;vdyFADKu!JW;6w z4Tu6$j~M)JCS>9=gXh@{=af$=pVBVA4YiC2R*3qGE_35q)+(<4XC&>)qwd2KU!y7g zlzeEF2{4bss<=`Sy$8s*z=)`i8u&qqZHH z4*+Mrg}7O5Z0#*pObKM0gSkyOK&i1#Gvc79o)OOkeM}>LKW|{ zD)R%Egy)2K{lZS<-5PczXH_LQHrJC*ivu_YCJaXAYIwbBX+XcMO_)7Dc>dbF3BB$5 z*dTb@xC5Z=AjJ+jDUI(FKnCHTLR{(%xj9r5fSxss!v{cKS8HpGgiZGo9l4%*W=(^5 zHzllyfugUp{SC?#t{&A=;*)z7Kb$lV#<$X;*l_AQUZDgNogVlTZs4eyufRW;NcUYc zuf+?A$O$&Rz8uKkIq#In$A%g8>QB{u<@C_0~W#<$LTrmreTe+tRe<`l;X8LQOdP`51$J&5@9I)!E69iO;C9x?Q>^afC4tZ&Hpd8Torq z4b&eVa0}s^bnWtbwXUkH18It-)u`si=i?x+M6`ppH}0BPM5mei6e+_R_nZ*?!QR@ORk0o~;u!^0*+lzV z9m)qVH_+Q%U&`|SovLDMreqn}U^)YQ?t+@X4tnelb1&*N`JT^EZ+O{f3ukg3Zb=G@ zoX=w5&*(EfC!HC5vL9Zc`;_S=JG)P8R9225bvwcGF<zUzt_R;TKm6_+DiM4lw0}*d*6WA;{ z!TjY6$PJ2w0Jysl*OyrEB)1M$>e)o|;^wi#D;JNkH4@nTPH7aRl;aa$UGYwKZQpD; zDfr1|59K*yx|`qSM3j)G5s$pq^Zendr&q?j>@S<&UnQtCibRxrk%ePww%Xqb_dJ(_ zX-cfz2JA8;I6AcT>_$_5o_5!>8F5l(-K_1;L%tqfJpd?5Nt(IPw-a0O4*(Im98^n7 zfNj_Vz(iT^c}=FRSLXgCXodjWz9x-aL?j#Wl)cqU_TFEB&`kF;yBGktjWXU$5-?sX z&qDr%_fU&424UsOFlQB;Hv(BNChvWJb{4y&)iG;JX&PJEGD>y{wz{;=EVI=ZCNAK- zc6QaY@EqJPv9cVEi3}yP5h(l^MV(2tKrD4TNZ+*-+f@0KeKuE$`o>awmo=uRd0@^0 z2Lnn{S|o;7krQVXH5eF@`$+~bQ>B+LyvHD?$X9r3Tl;N!wby-+v#uMk*Y=~6Rm;y@ z4N2+41lL{FBEy0N)uak|boOmgMN$_nlkmZ!r1kGsM^I90KTQnqlZf z@bJZ9rGIc%M@B&b_A#Nw?I>5BWV5=kJ!gjmwT-uvE92-S6!AD=z*cpC9S+uZc1|)I z&9qz^Jpjto`u7k=krs)clazRt3<}LKDEbt(+A9I z+?($jH6DP(9H~7|l8IwcU-b5jtO^|@sypS-&@WroBA`tprt_%;=*&J{c)pU&S_Kxy z^}pcEfvzaCkqxtOjQK6fPdhf< zW(Z~w$7(s#F6!T>fst>-mHAYV*$mORP!~ZXjN^%Sxs3nXsfYf$NBmV3sQe>MS5I`S z_M#zSQNYCMo`$>*!Gn}oL!=@zNDqPO6 zFqt%PD94w1(us=cz9V*)_W^J`(gNl$ivD0>-oUS)7wEX_kfHE^@f^Nu-SVs!|lX%2$B7um2X?_dLX$#J zCV@6T7(XYs&RB?_rx!bT>Ymh-Z_mAFxcgQxb!K44IA&{`gupiyzeJAgMOF6mY=HP#?Xt&8md|OHtTHGQmU4&AnAgNONUkV)&h;+Ect6t9yh@W*9ZE0dEX zx7BIm;IRil$_@Us>G0+Kme2Q09!2#98s1mKjwPrEz;)42zdo7)rsRC0Zn6pY@;Ok* zJ>AZjvjvYAGI8*V?9Ug?@(g=^k<+jy+ipDUAKOYqBRrp1^jH8=VvC57z z+6_xfH$HgM#)q49_a|_tiy&5sh)E9vz;s*&+z1Hr?${SJUYFe0`K>*eQY;|RI)YWZQH%2}{QHG0dc)+lmh#QCRM_i@bEq=vm`hq8s zQ`H9)vzZ^(h$Ups4+JJfzv8~v6_<8A1{a4n{L-$})5dUKSpcUW#@`8@1~+)M%{pos zTzeuLjnS{pNaoMy3vk8{@Qv3jx6vj^jA_({xCdZd{m7;|zKJ6& z!_jN$p$kb3p`##~{IRvbtoecQZqQc;Cmc}5n_Z1bdF^s6|2A-fjF<|)sv=;^RssKnY>X-xKt@L%;56?1mpvPv7XyedW_Ig4svolrp-c7@ zkSqwHhKHoHBz6YX)9WzN-WHV)cbnzx?~f=?R5d^@gE^p{iFcD1o~WeKj6a2yad8a| zT_Bq({(+}CF`Gg1v97_e!vyL*zrq&ppe0sFA^NS5iuJ{dxlqYLhC;8e`rhT~a<6s; z1f9C8(rINw=G#%807P z^f$2LZB_v5aUn`RVfn7z*dMnbO6?N3tE2L+5>L0|aoFkZNF-&;<2apD;Pb%f@Mmc% zQV@W)GB{D}F1Vw_gKD2rQ*iUzU&)@!RP?fhBlU|*LK*}^*!+TVvrLR=t^*ZNNJZgX zZ4lYg@!duNiNo|_eHDxU4sX!;1spCuQ24HrinL2Lv}}5K>6_&B9IpnXCglfQgb2n3 zQ+qH3u?*J}oQn_Hlg2q@iiOn&BqwZ=IS+gJ)Y77VuO#LpFmHZwmniZH^a z_280vUUvh<+Nj#oe{-PQq4gYfv5t%GKq^V$iFR0NEu3mD#dIT>C7Wz(25-j@Zr%4KS1F79aXxrSZ-1rj~SxA*K zVQSaeN0CVvkd>LGtm|p>+wL)Mok8v~`m#j6W?g{P+i)DuM3JeSXc|<4S{h6KBiUe7 zV4h_uMNF)?^~hi}9u_%GE&r}qu8o!0h!4x@FThs(D{}n@Gs+$rYpCnB$$9MmFY^JA zk)Te{5D&cH#bewEF#-Nr@=5H0u4Tl(VYLBV5au&Tnh2bpTF@NSD&mL;s7*3%Vex=N zg&zP}Fj4BCO|WMQ`eK_!_oP3gpTyITCtnZ8lsOUl0wdAF1!4hqiL zDFxcyQz#D(-hAWOQ&}`R5|Q&IFvohFoKr6) zdUmP*Ni5^|&BRpoVd zQR=_R!Jw)RR8=+P;XmMQil=$yCbFQEZ2Q>)x2yOxPj|_7r^vS~2`40(Wp`Djnv{EBH> zFuPat$|aAlXrJ~N!T08Fo#mW%rjWX6B?h(!fbIRKu4T41zYwo*`rRpYd0Gl`MmJhS zp4QMdO7&Tl;sfA`|HL-Eb@;eP6E#arZ?l?qLagb&7Q?1?f3P)Ja%G^FA!a9_gxM&a zNAe#$CEnQqwpGoVIs`sW3`)cWfquA_l??8?E^D>1YM>HN@2 zG&t#by{${fK!U0jcX5Q+`b{Zj7mWEODkRsN1r6Z9=r8vNs=nXVY*_CJXy!PQM4%gV9#;Pk3d@zH5pHI?ca4t3QIT60tl0kYwY*YF#+P+O0atj{v zsH?8WV8x{VT0n;i{iK&RmT&DBtV-V~dHUVot}NYD>A9Uvd_~~N1jz)*GT{y5;-|h? ze}PiwHe+T40jl85`x9n@8(ug+tej&bRP2!TeBeutrA2T5-^=@;{4*1ls&dRI*9QRm z#YbD2)1}i%4WoTtr{nSp=CZfk&N|IOMv9+V^J)|HqTg^XVf#9l$o#`+PZX$`F;jT; zbj9c7&O2S#yyE#2T9Je|(hKqX=x?j>j$u}d0uBTXf;DoMWD%RBg|bW{KoleFY;LD@ zx<>a?8mWgA{pDO&9@*a%{fuQsY7kT6TFDoDJS{nyj| z^xoW+Gtc74CRDVM=WF`T9oTnNw!W9il);a2bv99anR_dJeI|@aR^UVyOBupZRR`@! zjhU}05dTPE`G1)68M4|D*~!wyE;|#=9?q}j%Ij2Vb1T$H zuhuM7(2OA0oqXQe{y9(J3$6eJL6azESSM0$~{ZP zQ3J#wwI4;JuhcS((ph^gg{-;yF@@Eh629iU|2Mp0+48A&MzCe%L#JBL7@UYw3`zQ3 zL*x+=C=EM>DZghoWEK$D_{djt1CR%m z$(>aRC}|+@+q`CwhG7~-eB!TWJHNaipS{j@pEVkDZbqY-NqZ+_am!dK;yH&0=C)Io z9oht^r(07YUpx*V!-?>&Z_aO`zsGDHYE>0hR&nfmPyc?~TQvCece@mI59h@M2}+X2 z`Q8N1r}T_tEQlxZa{D$h(rY%`G;`-B3TE8i>UDSh!9yzprK7F+KF%)e_j4?w`0fnC zZcbcL7AnrN4X7dL{ExE45g(G&l_*TG3N} z!owguJnhIGu5*&9BXFs8p^jj@mY-djLdR65kc!@8ALf=mh-ADC@E~;qxJ? z53g}=P>Pb%rl$1T1;t4OEB5RR_j#uSA7Y;GxxI)|aO82aX~>U)Fibld@_K%agcF1W z6-nIE!**UPi@NM%-M;hrio(Tm21&vK@0ZA?_SFNvE^DWMZpv8VuqJr_V+8T~F#143 zBXMEf0R0Vv)Nk%p%pJ~D$dBNqE9BXD&y7YY^Isj&=>pK}_rIPG3cnJSc!Xfr>dt=J zC)db+-kwpGq@n&9r(Y!Mtcr_rC^hM}WMZQJX=`%EkBJM8XF#huFH_B2CdcCe5PskU zr=PRdy3bp*@ ziFQ{lj%Q_EKJ<0t>4lm}#X;IjkFwe}T^ZEO*cLF2eFQWdw#*?3*4a<; zM{qdXgH*( z`2(IHVG%4d`{UK1szxn&<<040_riSA!rm~cZd$-;C;&v?yvcuRO-^&4wd+99C3(@3 zTW_+(v~H??Y3-gtuJ6dtE?HL*4S9FJkVzb zp;uOW_tf~}S3)mi*j%Rtt^jhC5i%Zl|Pbm-4C zM)jBWw$=1v?o}nXob~!_CRE}g94SNf`|$nUZnt1G^;A+^lLWibi;u*GEobt&dZ6Ei z_T@Dmnb?#{{bbxO)HXgm3~zHcNGS<*DG-tSS4h(`$=4f_d6S1Pur%AL#grzLhr}O& zrywV{=kbX?ZA-=O!=QOiZ&-X6o0myfc%{)S_Ho0yCP#(ySw`E%#LA`$miQ2MC%M#Y zz5H*Ou92~p_j@HBJ>JlW*RZ*b{*_&GWVjNPJn65SVA9lv$)5+n+Y@c~ERB+=>7NZ# z8dF6Is%znG1Jg;@f0A{w4A77O}TQz1CK?g4;_(2YMp zdnLxTGl~o}l1K68)HUJ%_R^ZsyeC&Cd-dW}ER?pPFDWJB{!OL*Qm!yut^AKAs*!15 zEaXRm@MRsPz>Ic2f~q<(Lx+G|cr0w6$^?JD?Mlp#>eKjE{N%+#^~gGZWoWdSky{+WUj5L=E*IRv00K zQK=+i%qu|c&;_|FvSfHQK-Ggvsb=5)yG$HY=v~G2uY+A78@Z2K20e!19EjW$6*h~Rh7Taun z3fM++fM|+pkMWq|Xp)brYi?VCNy^)}Xvyu`Tyk1a`IiI@HGHSaHbWv~AC*P>9qaVW zY2KB{K+*v8Hd?M`nx-OKG#73uFTdfS27djiTWlyyNP=qv@5?; z<$Syx?XVP+Veed${zfTF7A)7levA)gytYr=Z62EKugut|NMX$;+ztZ~3Lvkeqv1*X za@3DxLQAZ&vWb1QgHeKD6P0qp54hCNHb%?+!Qf>(syCh12KCefu+|`JG6IQ9M|nVp zsVO;w$~A$Z_0qyIDqt{aF(An6mt$#JSpeEaY&Sl?%zVcJgVC5&Qk~;cGlX`A9(LPS z5V9^y5(dQ#_1P@hI4snjKR=Z)RRBkxJRHzUcvx)*b*?#AP!htFQOl#fVc zWgPT&9Mb&*bXpWgI4BJ#X(-{E(cR|N*7p27M)kMAVkN7-IP4MEBhWnems>BT&|mYlex)VKhP@3HaW@cj#?EN2yhHt$lZ25 zd(zTY`8kz4?X zC{`tzr%Y(zWIG3OI)A1a3I0^_#_$C{ce6%r!{x-U@qheEjK+6fK1@Gxj+X`PFb^17 zoZQ0${%iAs%MN@W4k z!m|ycxtlur+7>Nen4bFFdfnEwvD^K{9Y!myu$3&E1Tc1PfDR%=O6Z50>F3UUHrvz5 zE#ucDRCTG#;G*i%hM3d`z^e9uuhb$t!2!w$mo6z$VB(Xg?TzB9bNZ<4TYab97=mb0 z;aliXQCp%l$TFOyocB5b^7fka($+S5lkMV2=p{s&ibC7p>WtHJR|1KVD=o2dUGMMr z!`KNHSeai+0%uKOnVKpQ=7@*CpHJEUC(SN%|ABqlg1{eHm9aVc^PbJ?&=4znT$i)9 z#8;@J8qa33_as*ALdL5Gt@ZIPw{wV5f$`MY#Xq_0~ zFIBz1hG0Ka{3mh67?QX+Gg4brQJJ4HKTCMl>@z}BQds35O@+<6#A_ttgn>lfY>OuS zBzVVNEy5Dd_Zv_d+;J;Zn*fAUs6Ee+$KU*HJH*p_tRzY61uMK2ulj1Z)LQOPR#2uK zuf<~iw*6P9+x`xkG4On(&hU&y^361UVO*t(|6nwHS>^!aNAA@G$$^l$youYfyw7>L zG#-CLy$E<3)p`kY4YH^Jh3*6L-X%RXP2Q1+E6)8gCrhf8<=|jU|HV=)k*{aqk)crT zVhXWS#Z{7KBck($PyCVjjwQo0js$)Uh12==1vN#@eswl-PVTy_!cx&y)Gh&^@JqXG ztTkLb$z8s62;EbSvUU*tm4#jo+*RPqlj`ZFK693w2R@=;lHlPRxh|=%1;Ncn42N^g zWH2~PSzZ}KLe9;4%s8BXduAJ)Iw%Q*Z;W@@)94blkJ`GgZReec(qd1P(v(-&ASHfN zPXLAnomK5ajaxwV%k7tse0e89fmg$8*1wbV3{t}x+>{uDt<(CC5`@e7nIH<5#jz%R zBV~AwlP^m{?SK@DB^9fTHFseMjJui_5;I6I*q}1V8d@%PCpoH~asA18Deh?^4DuV7 zV|-VC&@5=s>2WGZmxT|S{SYWDIg%~9p8Ggd^?6``cj^IvZ&s`X!{YFe5*Ar~ia!Wx zpQyaqR`>d*GZPDytP4ga3n*E z@QdHEGuecPt*{KzW|g%2O$w&=^i;)L&J5JZi$W%m5}zlhiZabh0Q2Uc1;M zs^2G(-Rbpxpe|w{!n7-w@gNl4#fLE(u4IqGl{7cI&Qn7LAz?+6r9ZO zfitv>PTO6A;{pF{X7$G1{*1=YwS(COYo1^BTr8o}C;NbTL3#+md+HV2drp{^4v_-} zcMWw6Lct&kJ9#ZNdQuXM^yo-TIqnpt{YGBx#8<9^+VVb(60#`jV@U9F8*%yp=ynDH zfk~g|SC}ueZ2rxK(^M_w)^?Qt&2O19uY|n)+&NpBq`cgQU?zpw?yO6zZdu3UVNO#nc;OF1=O(FGwbxnKe&kR-@Dh)@KED zJJJoe-=s$y2W0pxcVpx#OG+2E+$Ytq$a%vxf6Rjq{qydP<5I==xNqVPsf8ccUBW;^ zuaJmy)AGF~RF3?}?{m8!Y(iK@dGkpA_w1OwMWG(=28Cw{bb$&<=u3;s#Z|lX=`Kpa zRxrC7%ssdFg#W7fg&cRm$rkp&HgKVJ%jUd$UZyNE!_kCW0j5xYDM6nci#8{e4l^J%6i1k*!aw`h>;>esO=na`QOH`&Lq}Vt z0D-h((zn6FF%OrbvqIk3^)+4y@#(&_$1S>jR#@JyXY5E`C}!34fs#usTq`0xfG`*1 zQLb90>lKx&!l%quiV}j_SwK!n!&n$@K7YMC=9?&1g}(A47Myc6km% z5wOXL&vjqqr7d!|H3v$wz=l9huAM89zakqsY7_fy!~Dw1g|~wB_!VpM*yJK<<#b^~ zlUSG%e8nR#k{V3+XM}v81PT!!<%wej1P&O2sDc*RFeb}D!qh`(#cQ7f(aLa0beVy@ zBPUDpz{X+1=qugNVCFPn*aAtdkVLgAZ5WKD8aSnJOn?RgBNm8PX`XOx*<}|a6#@2? zOj6QlCgA?gfS8#BHqkzHzu}Oy>9_B+rex~n;_CB(cz%|-k{5Ll+QuhvXib}-?m8jZ;Ts-_<$)6Ygko3W`&*?1 zw|pgKU~+yCZ2tPBJcSGnT`^G5b$U8TAb{HSQbCNId2qcqr*RghdkVewJUZ~&12(Nl z^1|hLux};{vdGPwDw`sH^fXX=qOkmxu9FfxV6hBz+NYByLUCv6ta^!M5u=U2`nFX3 zGe5b-P6&JPw8p#Y>l_YxmSksB{o-m=Dhd??PNa976!QFXw?m);!bOrY*~4wr$6=zu z${&~ckA)A46BwXfpWa50|Dn^+jHq}3pc0XtULQ?`*j^hiQh3w%+DB@^_ZK_hE-SVl zcOibs{?T}5^p#&!|2%=Wd@8GPn^bjZ#-O5C(fRWE75=hso*_GHBO39jhG;!{QdU_} z$yS+N`^X9XA}g(NNLGVm2*VmN-(n$2DSesj6yN6vFlrfppV%{jdE^&*op2UKHF81= zTh?$tuC@l61@8965_r6py}0*5ymwnxeCPAVzwmodnW3o1k)o#^k~^ha3O8$@Ij;opiyBuvQXHgTrwkn10w?v_FWZtZyQ~vD$jfb=?!cAAirsy&9`|04Az|lY>P~I;8g9^eCK2)o6bn?&*B2-jRo9> zC55f#ywY6u$3bvfb~GP!!1eGSf%K}w?c}L+O_9UIl_j0_dBf6DgTHgT*Kg998sSQ* zINQq=AhhT%OVkf?nfV5bKsdRs-XU{q2H1~Q!{>&?yrN26MCwUd9J{8IF4(asqnWiLst@$Mw=8HS z6`?qQcjw)_X;7J_OrbdwPS8;g89ZDL{(7od?5ro^EO@9BP*dEYCsrQ^nwW*N={F!e zm-A{1=bXyQ5j67-OT<(Vc#x6w#TBF0ozSEW_l8Cb>ysG9%(E%Dho0irEEazim4}D2 z#F&IEKyjzc(fQ1Iwo3e!?8}0zx2@Cn9p$2ObJ8B<&`BOj!a>L6g5(>7Sne}Mn$M;s z%4`H9yu|xm4*;y4e9a>ANF1S;638#QCY;uFrSY;%1bpF9cgMH`rQXe3KV5eg9Q~sq zk%muXSmRP0Sg_ETN~15?MF3<-#sns>No^s{b?}n) z&WrM$KVSZc_U1NZx0h1%yemBWt=TDE9-60}65p`?70hrQvNK4opweLECHo}k}l2eFcV?H zo~v4~_#!q z2nm^2Wxb4K!$#}N4myYL%xVo1!Q1+XguL;SKqS16x^Zj6&z^rrkBax$*x0a0n;V2n zV&^dp<{u9mB8c(UZsJdB6Zbv{nkY}V8bQ*D+zSwbCY@o-z9BM|A<_x%vl(YFd7F}| z{C0hzi+_eWU^xVZM&~;bdtajg!i- z+%)(Fjw4rmsPC4T(d_EWbOnb9p+Ha8@~Pu#jsDG8@gec9 zw*4}*-bw3D`OP70_Z;y#GIgyvPnG&$=1qhMMmDcvO^F%321Yj_Y>4`>7$p={CmB;3 zF8W5_+kVO%kHH5YP!#%vT$*m1HBq3T3#z+Z!@#69PNi-YEH9E`6W`?0)k@Z}5vpwR zC9ykMAfjZ%^h-?w?%JC-?oS{GHWQm*3`HoWq}E@rWeyF#{!5C*ZXz4IZ`uu}<3uv! zf}?_NxlOX%8On09R8J?Vx^C~SU+ zOfXJb+m%wxBtAZOP#*JvO#ZfY$IAQ{C)rP8b0<7?hMoPb+_g#X6B~E)1>`!ith6lY zL={X*r8v0y`LF`sbUY1@D#t?7mw~XQ(I7O_1NCRZflr5z!tf^sC0l&^QBHeQ66)4Ad1uN z11Fpf4(Qz=9>F*evlM;X_EQJ0P_N%dq^A8kKl8TV8k%w#9~PfTd+=_9u-0iJyZl60 zA+NA<-^L-UA_K`}cN97$IfNS@@fSAetcF-+l`qO>jU32Ax__{%` z{&na@IKU(($8{G}QFBJZoGd7nO8B9v6o6*f{?E&d5sZQDYgzMeEwG)pR#m zdB>oL*-R_Rxu3BXQCF&q5THw-J+RwU`zJEEKM3($m2?cbp`8{VtrcCm5Wv8u>c~jJ zpHR4J1DpdL7$l0++zp09`8|YQt~B}-*Op2+$W8H?S!+_JHaWApNxlqTy!>&+!#hcs z7xR87et(s2^v{0oBMmVvu{i(n_2cZXF7zHN@swoj@mKYh$<_WI?r=DFLV%l9xahCWVp;j zBri{9L!n?2eR`1ZLv$c~lSur?DUkUi#G3dt7x%q&Q{jv+q-Z^?Y%#=-Js(w^5iJ!) zU)6f#ym=$j@pQh0YuYlm9iGizKAA}p*W0R}`N;6`-|?P~ONPJl-VVvlcX3&{UAazj z<641GcjSiMMbrwHNyT1`&wWGtvSWwp5oDjo=-DmT(+5CI|7N^(q7i$1DvGJ#EvF+y zl=zM%m6ehuTdFu|bHMft<8*mA<$6yddzZ^tZ{g3FHjA9NMjRtGAg^wcx@W&iiJCH; z6=FyfdkvobrQnqDs;aEy)-@9j#^)dl)&1D!^s&(8i~#HwVL*r?&_vZ% zFloAFG^)!>uvz!>m3lH?)Y|=f@J>V>ay)+RJZ~+>*f#r^bhrPXTAZP(6k)C<5QR!Q zojR-ir#h>#$EGpox6>Q`I$;}NyO-+~sLPV&zz%uJ^yJmwqokb@M}EhQnN1rkF@HSVv!A#yd1YrjpC0#nrbZ;X!HR_Lg&(90A>KBj5vMQr zCNsvQ&8lfyK@vC$9S9cg5&}}iIlkRAF$FZO?*Ys0U zTm|JwQ+LI-yN25s%tv_m0nm~#wysM|3+9G}gsjS)|E~aKKbyeKMgZ+Tma;WvMVDzD z9DkN|pyxiHP{`azI?dsykQa%7BxHU60Ml7a;xW4%90P-nGo{#uH2_{CsalIf&`!t} z;|e)LhG^ZkvghD~kG8nHbhS;UTDyhb>qly&{wXZb8n0T&430`Vk-#B9KO?rgLat#P zbi$(ye;WqSdlSDuNVUdBM_dgIVw})CQl^n~wmUCSid2u5H=~Z3L*!%wg(^^xg ztzW{Vx7zCEt!j04r-GuL!E^z~xW;feJQA$E&U9m#8nG~NC&X3pmU&;T;+}Z}G}h>+ zRtG#8WM9*eNFNx-8m#MU>LQitn_*II?0|DmSvtlB^2#Q(ftFS z$G(yhxlDn|0`f>%*p5&AHP*njjYNwpTaqQM;R&Z|n5gel38Jl*SSNYN3ajKhE5U2o^)YIVt!&S#Z9+r_L_kfQa3`xZYnYXY!BFLp!@*wy3^rrNK@D^7U!mh z;Z*fi7id?_*kl`3C6BuxD1Rv!4cL%L=Gt0Tzuc%T^lT)WvZ^Q)M}};(f!p7{o-Q;I z(oi&1rY27{k_0l0sv|JT`}+@_4qR)Ico|Pmt26Xxrn;ul);&KJWxk?{?{2Fm7~UE8 z#NJzwz;Fop0FB&x=s#F>ggp&QG_|bFXv0F)!C1#a<$spb>;;x0!~)=9ayx5-`X{e$ z{ReVtn_RM`G=)DGPU=i@745x!h{?kaN7QkyvGD%@#p_2`s?x=iV9TQoLHeMKEr((P4CRs6ciuo57$W14gWe;8x3 zFmZ<5kJr6LakupSs;;_~VXvHdaa{=6I@0-NVyKcL31S!{FgQ4Dka-%mFP%9R#z?9u zVWy~9%Cw0lL*e9{S2(I!%qZB*2xE2jH*=^&074?8(y= zJvreU){0?0EG;WapsRzpgsY)HBO@x+aSK;#W`cUE-dwnviA5lnGUERaCWlZFV& zj0YnuKpE~i*Fbf(_WRP^Ezne68qAVbd5vpY8j5hz*Ur0vsw{D~BGJw;NFfw3E1gx; z4oVSx{{U!=ZAQOk)fAWKkTc@TWfi0Iz=95aFI!-jSw=XK9XXkO=t) zk>5lwy;Va;ZKiIkf|8CsyQ`(AjjBAVbsx-ayIXQ@Dl$~#aU={Ap|M=8)t2V1u-ss+ zqEJi^0WOZ9k(FVeb;kscY-#`)c$bWe+6%rI+KReGjl_|z3Y7#ip4d6haqY;}W7pMh zSsZB8@V#CXEbr@=j^q>hy~aq-IMe-V($~Q(vRq-ImTW8uKLSVR0b7*^86!RU933Kc zJskF%bFI|HSt393_n}E6B=Ao?i1*L7s>P!LijZO@6+#+Hno82tbQMTxt7Hv?k~ahr zLFGs!H^xwW<4FA{XRl7EYdyukhcy^saS|vyLU3?B%Mv!9`GM`H<52|EZw*EY%9z_H z<{W=f^!M$dwpihlsO65Of0bWJ5FxU1^yI5Y&tOw;9|P&DTau#{kmCx@19q0TOA7@G zTbSC?XsY>H>QiAu+()~Pa1KBxIU2aSR_$NXRLYikq82k%DQXIa4dpvvkgfTY5_8T* zbe(LhrnU;H=z=F20$7k-sp@U@b#ze>9hynuXz<9u4{w9tBZK?v?wWaItKJjKyt|JH|LxK<`6zHrhcA^NS;Z+1L||*pH=JWT(ee>Qic}WQV19pIbfMxzNn7Fd{t>&R ze!7oSBCKPKZXk2pzN5oX<(@ZU%%>op<5+pCG>C%*McAWMARYk506E7z z_ss}bjWk)iu>*Li()S*tr=`AFX{h>wijXu?Pe~@(7?Gb-C~Sd`a4+=7ljGAYhE>`!mDb$QFtmU^R2M?)>b-%nF6C2DGtOCVR_iSLt)`yDPG zjI&uS^z{_bNVN$tNgAf5W@vHg+8KQ#a0a@a=884)*+Xu~UU+Wfj{f6B`o@?&)}F4Fzn;}E z2~zlA69hIt{JZ_NGo~mK8&zV4^J-SHBMc99WnyqX>OXyVL8KKpm9ud&`A-!MbAAb= zT6r7HRE9!KVD|6Me^afrwneG7wL{BM48ChA*)d>o_Z{_(J*BGd)uIZh+x}DUw8p`-6 zD6MpJ$5!gm$K_PE=%e(Eu^ovAXdfg2@2JTwlaMo^gi>$Jqq?i7s;H@jb*flu3dXNA zN(uZzWQLIr05~}Z=rhg^lkK;8XeNr|YLvrKOu^8G!$RQjew_aRO>~U*ihHFi$5${( zBUFwxj2*PKp@{laj}{#wm{wJUywgrA5}*V|s7{Ngww@lYmA&iR0*djXhW& zX`W@RiE;RQ{i5iuGMiUe%9fjqapIk(1(Ko0GQMErBxH@idkmb6F?f&E&oopMPeUZu z%0RP6Q%6#(FX9|v07vBK80QDM1aYn^s(CA0gefFrb{b5u$GV^WXqA(%E_P4%d1RKN zp8x_IETD7P_a8&A4xK@<79ZwZok;R^S6tZS6s{|4DyXTaWInlomj+FX*@7G#WO_ju z?W=-YCEiaPrmjh>S5B6*#WlK-sjB>$ZN89&B!~i9To+x$?c1JLg(+SHPiN{rzv;{K z-L3v@wh2FyCE9$);dd#)R{%+Y$Rv(Vb$r$I)7;~hu9+!nYUhY#dVJ!U$io4iGuXCt zjG02eAGFm=+~w}D@eb!}v2`^C)}rTZ>w4PXDk=#fXvg8CcLF$x;z+Q|+Z}@qSoJhN z>_<#mXsM#8o(nYrPbLH}=CF*HI9~pyW1igoG{e+A9|hi;It$GNS2`+YK3z3LD&AQt zMpO_6KyE)N`sfE$^sUafq~f>q?c#fF!j4*bDCSB>lg&tnrU^lBF+E#YoH#oMNdraA zO+YBiac7|Vitp9cvr}BH3v#9u(1~m9o?FvFEP%L*L@q<|v~E&y?guAF?A4Zw-CNt~ zs%cQuRGMjMs^$shNI3xu6Tv-(3N{WlkaZWJsjGZF=y~ne>Pk9mfoW@MDyb?SSte&< zNp#qu7y!5ck&JuiRqS@!UZm=Kjp|E$aikRDDq_%p^He(Tk;qbWu{Qd-r}06G0e zNI1quc+~(ESiVZlW?r_7gi=vkY5IoKSx-C24AMg{of1x?a;p_NJBwff02l=6;wUI# zwV4b48CVt`k-kDW%MSP-_189AdY$Z5DGNsRl1mmy{HWu18_CWO*c|uIwyir4SyJ_E zeqAg=o#77{fU(>-7~|YT}(Ncdp zOsL%a?gNr|0As$Kyg2bL-_+HWR(gRoEdFuww<4@z`N9E{hZrPhIpZ3uC4wrHu7+w> zQX_5Atr06vFaWH`Gm(*;5sdf7sDBhG**tf(+%8fx z(_ABC0aj%yvBJ1RjAIOkk8pna&X&efPk>@F{WUrU!wN%AwE1ZSP^4<-yBqMRi*Qy!bO3*Be6dpwtrnt z+qlsW#ZJVBQOQi_Khse|ZFE^NjF88+ewx}UKr2jSWhkY~cLN{#&NUD)novj~8M04v zqyxBPwB{cjJWQ$SpBHE>R>@-gG`9DOut++Cbq+&PI=QuUz!yzpA>5@lt;gaJF44X|7br=^b9t zB1inK*e;=Pa1XKg(#J#fwH#5+akx@e*4(aA%Fhjo6Ib{s#sVaY2s=UqMaDqb#PY{I zAeNe6{=_SVCe5GPJoN`w&(d_!CHBqb8jwQNP&k$)a-`wc^YBUS&mXR;u951xx|K@{ znuzBG>fouTsRztC&*j^K2+mG(k*hMZt~!32kjr#i)deF+s6~HI)hu%r=LzT_oDB1W z^~RHA>yDSb+#qhBlAUE0LE}ZCU_dM~2N@%7bd*v79Dl51IvPEUuDXZ`s_7BTWt16m z7-dc@lB4d%^bA^FOIUBIv{>;ZG8m2l$^8^xTvtvXTfD0wo+uBUtvhI3D8)IZ!_M_ogk9#Wlw3 zUo}I}w&=_Y8QLQQIb+W!x3||!JzkX(*{Z6emZVA$A<8yRSaIJT{{YiS6u-p$^oETP zN#+3_?^6~(^5c?m`srBLiuab1d`62^u4qRl}y_l90hQ zD%GsOtNBPsD&6uu$Jm_@vE7BSYYk*H^sN4xhJ|)39FlhrWBz{p^Q~7dtBS!;l(e%* z)b6|5Am5Swn2&S*e>(F5O)T=Y4az>h!>A{`SJ6{c7^|t~1{OeY$WA#O_XDj}egxj@ zDa6zalGHFI-~gPk{{RUCx#LauJA+i;s4guOlbYGq30H6)toll^924?=^=b54bw^Xw z(^6YwiiS#=8aT;7S_sB}nDPY}01V@k#-7<3R_-V$J?9$BVY*ONG?LTQB+y~WPY)y*tuYsVr8*6-<&G zSJbos$y+J>#hV_K1PmZJ82jg)JXi4drXjmA>Dx^`H0=V)rxuIa0zs&%C;Wu@O@EYcvIa66MEe;-k+qLS5S>kCTVx?VMd6k53cD2boj+SC2pSg5 z6+iN_oU4za2O1aYREtAKYUCrl?E8(CVh=}jrh>W#Y&=y+9UOk#exI&%ztR0MLD7Qe zOI1w&0EVh8=_HW@9AUHR$QVO|fOt|nd}&_E{i5owl+w4WNKUYx&^MW+~~SmxdgQceQr!! zS;;KBi6#BOPg>2H(+imp~5JyvGmOzkAAdQrjUwIe?Ib2}lJPlPH zDbx*et1!Ip6lyZ69I_z2%Qgdo|2B^LiA}3E7T7&Z9IgdAz-1Gj4KTCJ@KmB#Lk@R8?T9dJy%(7 zo$dCdm`zhoa;30IV}eQsUAQ>o^wX`f!3_-!zLscwiKMA&q)p$=g(AS(8WODk?c9= z;O9>;$c331U6xf19!CUx1N8p@&ibsXEi|_|e;-L&_8Y2U-eiw-6C8JVhA>a~mj~~r zPM_*YYU_)^hsM?I^t>f zyE_ptWoZj!ZpR1*bB!|iThPi^rx!2cCBqpS>SgDk4L9;t>0M8)foown}oTi4BPNlHawVrtu>#8K2Fbftx zGDnUwM{ll1HP&p1!_z;*APCFv5KmM1!+Ew%LrZF^uENigX3Od2doyB)cM~Kv~G)n>gAJ&%fJFI8z!DJH@NBum`lcIxK%2nyk>#xT{2` zrt&w4qkmkiOFurKp-*k2xf-)7?I|_6qoJ#gnxY3X2$5rwM2~jp`|Wigx6oF$ z+G=>*v58J{?UU+u?s49N$XjYo zxk?%xr(}+>v0CY?3iV~4wk)hdDoXfW-H+wrm~wyb&W2TAx_yGJ+B%9_wId?Bl~e}r zkm?Hkj|Ww^O!yblogdScS8l8PKUG&kqs@)|G|;nf4$;dDoDY&f1K&yYT?N-y5lvIj zved|vLnDhZuglS(I1zy{_3)sPk%D%P+^unAd)#5n*i~u(6Y#eeq_wUF1`AT59Id<*1`~#i`8f!KSMFl(yM?GE2u9=x0 zSY$DXv)p5G{_y7*KRvOlAEJ8d-`4g;uCrBBNk)>2Sw%d8MT%{qnFt(?3C7{h57$7i z_CrNoFrxA((kI&-kwzj^92~G7oMiXlchqZ@i*b1N0F6Obs)`>c%Bfdisf26*W5Lb` z9+A&&MCu-rqUtzfXH~39a!O!gSbjIA6Mqn;!iM@P)aw+R{sF8`s<(?rk=EIGBi^n?m{rg{{SU^zeA}5?ulgP|)yz(s4-h*}d5^=oi0+WZOVqUS)=26QNl7Sq6!1@^k%7m4 z`rZD)_=nN;Q{6g78K!DXyvEHhnif2dQHLS$bCMJ`J~eV!tu0G=mU$8{AYm2zU;;@z z=bU_T_R(qYRSv~tltl`mTrVD;K0a_g{xsffVvi9M`WwbBsQxC_9=+)slyxcy{B1om zXd`|nb!dOs-PbDbLqMY zo~Nj4J9R28jl9IUS4e~gIKWuQKBvh!&kOg_{{Ry0d%AkI3mpVx!9@XqunTR0{V^}o zAE!ODq;p(ZKpp(g5M52mi=?^^{bjJ&`o4w=iaKSF%&Gh%?q{Yi>5*9RyB){(NFQBV zeRV}@>iZ;BQ_-55hxJ7~5qiiV;IJeDr0{t7KKifv3#)2+9%`Fo+eEg-s(EW^+(-&Z zuqv!;k&L$Aq0hE+rpSCmrM~$zbdu9l)mBVereNhw32nHCBxExCMsc2SNY-U+o~J79 z=j}5XP_;Rys=PnESgNC}v)dXoxFA|-Jl|7ij1C6_k)94Ue0V3&HCI}CJKs_@LsTnm zYA0n^LfHVE44yss?eE61z1gnuQ$Z5k0!vv0v(u(`h1kp)K;xf$XCM)%_dE41qQyx= z()Vdzo@zfXNgyUUUEPa1NWI2ck79B#0pnYl&Sd~Y$Nub{%PRMMj;pHZXyvGeM~ZOi z3|nMvfq?Zo@}P|2Tb!vs8k>Fc+)4H0>D!N= zu7=iHBfBj#z~U?AgEtF&sz`1a2Z^!CjlkM|2?tYc62QdI9B&#qBX6?$hDJVl{+fil zA?r&syxv;KwKVI;IVrop*b|SYgc2@oTqD!dL$P*3O=#iQ1YOT+B{(xru_R=RW)LeegA7+xoNpn(4cp&hm%OlB%S!X#%>1%K$U+ zxl|8)4~0lUit6gf>A-mP=EfPzUKK* zJCXpv&XjDFR@yG4+ij**gQxBB3dxlK^CZSmLB?5t%N9QD2VtOe_4S@6UE{Z2lA7OY zj7uymBLLGCM*d?tZ_B_3IUd;>#$IO6YTPbh0;}51N0V~tOON8EfoYhuZ9^SXQk9OP z1{rOUZ>S9-7a2*+cl_4MgU}SNe>g9)_txfzY zdsWhTC}^TU1-`8asFY_c?4#u8Y3 z(!DKp>r0Zz9pR){^9Xdrv#xjuyairQslB<^Nc3MzQ`v1zL2eOEFd3Sfl10NfTpYJK zH~=0=Z!o(@=5z?xV5(5Q%bv9ky8sC1coi@#z`Lh z@r(h_eI88Qy`#iRW8Whp+d3*q+AD>^;T5&8Uv!v0K_`uYxdZ|E+Xp8AC>oR1Hkx{9 z;i$JwGjhfX+Delg>OkbPg*=~paCHjh)iksZKbAbn80^V7J^jAgUu(zl=u(lRYKrkI zm5F&MqmA2+1~J^8+MFw~vSYNex$6rj`(0j@y>)u5imDh5Jv_A(t2AM72;5OsiOBDv z6+hYAsOtJk%hdfW^xaEGTp#gY!=&12NR`zXZoy^Rptl>9jxa|$=q`h{TzYzYo7Y&O zpt{n?e5oW+DUmk+0Cl+-Ae;~|LmvLj?|n*Tfqax{p+#v7tW|tt0(^ewJ`&e{s+N17%iebMZUHQ$r6T^l}sDFsLobW8@L_w z*yH~IdZW7M!*uFOf5%fo)YaPq?~N6KQQSz1J-Fu?`O^yH>IJZ)^Dq{~b+I~fssU|| zMo3%AU`(*8We?SV!bkoMFWoHE)6%_VvHZvmcjX3Gp3c7i0DtN=&)p&7UtRcP9W5=| zG1OL&6~75pD;eQI{{Y*N@}uB3_@AI8i@i{ienK4m3bKW`{|;B)m3qgWu9*)BH7wWE${Q&TIy5DnVO!I z8QYvjxke`(kMRBUW$WqeHuG@mE2MV%3S?h21i~W|z-JjONjLy<#{g>?^{MH-Z9Zij z!Qx-T2$&t%9+AlY85BAk0K~vp`%EWvbvFLKn586$wYscWAk8;Fg^*+HA{5tH*0rPq3MK_ToqU{ zGkPY8J*Nj7zy#wwjprE7x)LfXyQM&Sxw9N(fS>`lpKsTk>lrN8+6u`WQpP2!TuVBp zBrTo^ao<1m)k%~fDtB>AoT!OY*Z%;vH_Pi?C9F2-X(UIXrrSs@PhqGtypq$@DX2v|u&SYV56W9PJ@JftUx{U>iGYSAK3>*iA>_BP zA07VyZF@Bx(HzMlz$w7~Xr{q(0Vin40R8pv)Q~9v6Fni_jmlo@ZM#@x-*=9kV`3F& zf?)8w=N^)9$Vv8L`s>Pzypc&uFm`yU&h==|sIrbjkD=AORY)sktEIcz((`J#s+Xdg zhj5RM0Vjeu_rV`Jt?#}K*VwPIFGgM{};2hwe zzN*FPUbb3rmVrSvtzDBQzBu#FNF?%qW1p{WF3WrBd2h2>D=Oo)$4dyBspBF^PT{}> za0xjBEu3x~bFVffYbr$1kc|;0_?2~%`_@rdt<4;?RyuvxDTo1Ooq%PGe2@Z?dB;CG z1JU!!Dx;>SgJ$_=p0;_{YpNq{Rl(r1kVwa6`)Ox$8=U4^iB(!=^CW!lW7w9*b|7Ou z`hB(2HA1v-Oc~59W!|ItOO8E9f!LGw(YLiL#8R7Up7jILbj612OJ}W^T}x!N#x@yH(N%k<+y;o^R#Lv=>@s3m_Teo^hVvx$li!_e)dtA5Q)X#sxfH ze5z`U4yEK&!h%mMfrvQ(_Tx~zG4P!9rDevytdbbs6b6p4PKKG{$Em{{6&#F~0QVeo zti;HGmhbT#)&S$*<^k}>#EZ3(ww~M6w7ylMy<5nCzz7)0{pkY$Jj3~Ooc0~K)u-1@ z1#N<8B-v3^SbBt$DA^!kqr0I!jI+bZ5kjaQ5f&m*wrH>7Rv;mNy0gwhsI#Yms;8gOlCYaoO z=*qiyPgbU3j;eZS{{R^kO+gHlbCoG6j9~It7|(6o4o0Bf`ij1ns$`X|7U~Z^ogSXD zSY(>2KtShotUw^~=>TvvSD`P~>ns(u6xQi0YbdDmM?8tXo*DgKrWxd%eq+XS^P`;^ z9Gy+m6J77o{{SDl*H0oewQ#JMDCcMy9*ERGWdLOB%j_J8n0>~COirERF?S*frHMgP`NxnB(Ip%FY#WgM0Aw%))^gD zio{)&`{dwf82Htt()KH0qN)+AiskjnKqY{6;X{1w_8IO$(x-#{JY%xSd=!w=&0RCS zJcV-NAfZ9ea0v&u2c1}{OG>c=Ex}q?!DT2`Hc_3wl}>w%XTG4y;`oU42f2}M2xXCw z%@B$KwlJWqs(X;cNl&BDWi&Fk+#-WAQPPS{{Sv<4v0-#6%xxG z1v))quKDHKUMgS*WBxZk;Zg7LrSc)DLjzlhJ#S^U-8xw*x~6)13iOY41hK}lGQQ#H zUPElgcODm<_rTRUW2o@v(OZ1zyQGwt1!%=gH3UgZa!tT&cmUvx@Y&8XGBtKwI(7@p za+<>P*T|uoSwJEd8QZrx#y&IHoeaA8c|l!3vs{Ew&rYl&n7L@x+XFjz-;bwlR*a}A zj6qU%Ag>HOSc|9?X>T+i!m8L8eT*KR&VM2Ums8x}?jKzDbgHn)e6NPTt0eqI6f!ZR zwJU$S!_HP&NB5kJa6#MHdy*Q#(e_IWfvF&xO0c9FtZgO8^%dQ~9r*y{d}!LGQIsI4 zkVhi&?EYhe^!>etkmMHjYBfOvdxWa)t4~`EL{UjBkcJB8LlkHad@e>u9r2&vQDy3z ztz@ko9Enb?-^#}WJY|ka{+b>sO-AhGsLAyYQ*+-Ru8F{uus+${RD+N~+v)i_S>U1= zvm?PyC9BO@da0b`vo`PB`skF28V4&D14tQ{Cyp`3duwEFw88wn%IFIZzqYi^RlJ?d z;TR4|fHU!>lYOT4n_Zc9Eipw(&EFK*h<@;HyoQRJeK zJl6y-emOY*09{3{y3x=GTDImQ2Pa~ktlXZ!bNxNFg3g2xJi=4gEEDW#URIZJ0x`J! zf_^oww(KcmdWw{B1WLt-kDmU+Jn1^;;zW-N)7Yuqoca@Y^1%1pPb;6kp+{<#<4h}h zuI&`+cCn@mvF=Tw3>PZy_aC_?cI7o6E7;stT$8T-7pwxW|2p7~uVJ zskHFa$^84!ON7bl^6`X12N)k8e&<>uuLdoY(TaG~;XpvEjBtLsC1(6=&Wmlz$XI2S zS8*8alh}9Hm<1XY4NPI6l6kGIBSQp7&t)qnLZS{74gd$-_vhT}j_%!6O1)LoRP-@P zRYe_8fois@Ni#yB3!HFCV9ZFsB$I)_9Vyj)aZA#4H!6Bb3$?nMN%=d}ND-MvPyEP~G=oEZu#sQ_<{*q(5E=iHy1 z{;n|^OO+E*TwsPs9xo-5RwZPNyC zgZpK^#o`B43aUX=@YLJ4s?2Al0#;1&LktprzfNQs%oB_x^%_fx~`g{8f$f7ol<0xRT>sQlV(ZJ zU;!P#Be1~&U1|G?s1;jXjOhyn^jR%5b0|+Dl31c>Lv41+E$Tk`JGt%7mFTWFKB>G( zM`EIipApnl%Og}6VWx@B(~vNyKCCa$cGkOMrPA4npZ(ogUSf~!?erf^{dFEX_)R@k zt_pg)oKs3-fm%Q!k8lJIGxPO5^|(}UC(I0tL8wlPq~oito2QcGD{G4RQ6&?ZB98+* znD*p%=f14_PlG)s6*VT6E;Gg1 z0Sq28PDbEAx_KDWy-m#(l1jA!_hSgE!c1@*1cE+s_0;(@GdC1Ej6|Vr-a}u)T5X~V zYCx9Sa1uC}^R#CyNjwbwH3C`a>S{z*_#h1|W#p9!$vHUTef#9=RYmd&`o&WmZ3%E3 zOvQjudBTzXG(wI`Y&EYmP{$JhcRDF0v&qNl{(4Uy0yu7Kj+GG`bw}g@@Il(S;1iG7 zWapoKZAsvUjnrklwezHsGqCL+VVrZ@InVW;wJ?Ytrr|q7OA_y5S8E?&Ren!wog`eZ z)qOz5p3g?~6oFfOo@cH1W85E)Ve)hU7d!0>LKB+0O7I>%*6K2(Jhccz%A@duJp;UHq@p)`yD9w@&q#O=1Gon$#l=T~xi0r?c zoN2V9v<;r~RYbLKxga4h>I{KG@H?GHj)~hn7OeIyoFMtY$a-^9_J@7$4eCM`?XWA1Rm~K;*ZWQC3Yjnvv#ufF#-N zpP<)*mY(enR9xh234^ZUHuu37a(Tx;m;V4-hir;E zTA%pm@qS*`*%$mUN68)Wk(}Ut`P8b5n59A3yO4Aygg#+{7KaB^`Uo6Gsy@H7q0tJ1`f6 z$s@LL{YI4E6%v_g;%76lU;rhL9B1d>9~ss!zqp0kCiSCG)7xjEyGs#DnsY3%%1Li0 zC0a}axSWpqt9q9COES>f?T{q&^vI$mR0jkMD(xM!+>fyN)ZV%mDLUJ!l1W6bPF9RK z84C&ws;NDO6q0_2NtT;@_XZU&D^t*~sTwCx@{#)GM}9kG|ZxxaCwZR zxnFW~?~(w=*FM^9{x@lo(LD9la6>|btfoiW%rVGO-I$DiTZYU2 zlM4Gc{$Hju@1(2DP7HNRBFObJlE4B2xa8n_0(sYx$fGc|9ZN%};(D4oK-Flm%_MP6 z5K;+MQrnNV&@w)CV)$NZY3IGwR4IxHX8!fx^`i5j1 z=^>o%jbz=mlOqd}z&JR;`uEk5Z-SaA6@@C(%jGYaHy6WW)B!!fIp;!0q%{ui4}+#+ zju7*z%P=`J6M_%G_SCeuq$~pM{#n7x6S$s92ZNKJrl$!+#np00eB+PPJZcS1RJ1Xd ziUB0@DZvtg-Le#X6Z7ryr1C8^Y6a?>lvQ=DZJ{yA6(Pjy8{~Pzf(v##dti1s)-(%K zH}X;yh9>m8B47iIj(GV006j>fprei`rMO7HiDhw=xgD;56n1A+AYv#+PF8_a4OIK~SO2^xsf zMui7E#~H}UnlbP7AYh$ zV{1jg$-rQ9@yXY+RJ(vANcWEW9FNp!y)tI<5WQ!T5z*=uiEYS31GtVg9!iz+M2cE5 zk-eF=;CtZvj&b_vOtK^AeMLD3k4bJl{xu(HL`k5i+vzGtoUOF?&JGIaT2Ptm5R|ok zQh&LuPOmTw%t-WL=LheObRv7aqDEv`B`n8iV$Gg@2mW)c^E1SeQ^*~&yNNTw`s;Nq zEOu?R60DWAY=jvxkfZ$vZFnFP3r%#^>y6d6MG8O<9+lVt&$}Mo^lm#;*4Y*)8lT7m z0a1T32RT1nf8o@7Wx}SWMsd6rKSD?mpQ602k|g%!Pr93 zY&ROzTA3tZ8*&nHoZ~*j=S^@?(?=pC@*+XTP$2$VvDNoVK#s&U4B`)CWoAjjmtq%i7z5yB6UQ15;!CaBWvh-cNk*&W z#E-Zcw&1b<0Glkt{{Z;>YJAKnx{`w`N>|V{S@~$3HVOMVKcUmr0<8!)xb@UMCr#9y zEm&-Hu&u_vQ+3}VV!w(2iAxfAR#hMYz+yP$=Z)y=CcD(d6|6}YauFBuffE%GaGldj$;f#T7!`jIv~?j#$GYm14NT!7Y&8`{*6>rmHLKO>Hczb(*E5 zbpQj9xj0V#k=q|oIn$+gN87FK_*j|iWQA4gWoa2pJ90(=#xU6Zbp_!589lZcVMVsh zS1enRnY@DGc5((aodD;$1=Q@h>&upghrSjTVLRh0^PVU$^9loPcjYNvb8bU&kNciXDMNlquSB1A) zFTqn&OEQULc%&tl@({ds>;?em>8nzX_O2Jt+Uq%JV=IR9)o@tn-F^EVay_@KB(z46 zB%(0#Md}fsU(v98V^8${EmD63T4+$EYy_&z6pt$-usols=bb`ul5VAAC2Cw%m%BZl zszTJYG_s*>l97}+86+@0x$Tbm(R8US)YH_gh8K)TgZrge0zYtdRoc95vo)d`Ny4>o z*d&gkh%{<&0}w`X4l*)7u8KqA^#wFDrBzbYN(NP8r({4s!-6><1oAPAV?t5BA#)Pk zl4)nA5y3P-Mhl{^Vylu*9^YLcR^ANKGrP$$D&8Ar& zX$RE7P6zoP*F{k(>IU|OsmhW{mfe-W$8O*E)j>_w)Urexu1TsU5gLe^F|&Fr5W#^2 zXCNH?4zj56Rzo3qrI4hDfC<3I=sV-F~Lz~?91x6@H%@iajXkR*Y_ zGlnuo?VY{<0AAYn13kz`ih)lQ-fWE0D#$R|10NXnIvlmMsUV6y@;2;bVMb5<#Qy-E zlq!1u3aF!!nbKH@U{#}KUCZs;BZJ1Z(e zrn%W^g(>pJNSdW%3N!vAg+IQB)m9i}dRmpBT$BF*?_rfm_WYpt$9*Ust)?u?B#%8I zCt}G6W4k{j_Vns4ZT5i5rr#6F@<>DOP>81=lqb)=duT`|Zy_J0>LrHM&|5_&PJL0I zTMvv8jQi*I8iNN?*BJy7q{SLAcgYypj=*jMohi_DeGLkEwG@)fN^XcD3Qs>?-H#fB zbMbDH`$Ysc`z1xWdzt2}Ss94PBVwokey6s)xXC@BmlYnWs-d=1Pghdw1w3QaRfzx- z{v7k)<3Q!=aEhy9o--V|X$+{~pO1n8=O;PRcl%{b!B%(`i{K2XIp@D^XkM$LcPeeD z!~zK?C-lh2wwy62gU&HfRKYTlxRO=6>{sfaSc zPbZf0$iZ+6AR{NaCj)AY#F9xmfk|?-Pf-lFil}Q^rc#i?)NWOKl5w1N)_q^pOAgy* z5v`F#X*9L4&-i88y8;g_d1b-uNKx;&=?H;Nwqr|@Bnxd!Aw@iACN?S<@*mu{-Hu5o z=Tf?ww9!!7?nulHqPZui_x?jGM<;)P!M{x@+IotbjDh4Ac1@s%Vd?(>O-TO$X(_4= zL=}{|q^KTSBR1{IbKmsNtP8t{4XJBT-hYH#VyKE>&>V*Zf*F6rJ^nv!9_w2qU0)i_ zMiHyf2pG#y6g);jg~JV}Xkx>Vz#M%wQ(7*Rn_#xBW4SR1p_P!G;XoNYcg8<$5U248 zsN}d)#-^%y7=9JqR4S0XoMR)f$JwLr*ow2(;{ z#yI2Ow;J=H;fnoquCZUzSgK=;NV3F%OU1lmMGfgZ91+{!+Zx^0m&yyO^!>^R{EFJ@ zR+6COJZ}L{Z=K&CH4R1DJoC9qWPTonU2#yjVq zZ@t0dogFN+0^@I}l1l0dbdIhdNJiY}5+QHOc^Mz_{dG(|1vK-_)ihM{PTr?2oUuHU z#~J?suAPC08!>sTx8fD3s8^$*oui%BT6JOOxZjd9la3D~JY&9xszw?h<(G2}r+0FD u58qp3NhW3yC248{jFf*a^%OBzv7IbHBHl4(de%R0S({3m!pa0nfLN%EH literal 0 HcmV?d00001 diff --git a/backend/corpora/gallica/tests/__init__.py b/backend/corpora/gallica/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/corpora/gallica/tests/data/figaro/Issues.xml b/backend/corpora/gallica/tests/data/figaro/Issues.xml new file mode 100644 index 000000000..667c8483d --- /dev/null +++ b/backend/corpora/gallica/tests/data/figaro/Issues.xml @@ -0,0 +1,4 @@ + + +01 janvier 1930 + diff --git a/backend/corpora/gallica/tests/data/figaro/OAIRecord.xml b/backend/corpora/gallica/tests/data/figaro/OAIRecord.xml new file mode 100644 index 000000000..2ac45c5a1 --- /dev/null +++ b/backend/corpora/gallica/tests/data/figaro/OAIRecord.xml @@ -0,0 +1,62 @@ + + +all + + +
+oai:bnf.fr:gallica/ark:/12148/bpt6k296099q +2024-06-21 +gallica:corpus:BnPlCo00 +gallica:corpus:Pam1 +gallica:corpus:bresil +gallica:theme:0:07 +gallica:typedoc:periodiques:fascicules +
+ + +https://gallica.bnf.fr/ark:/12148/bpt6k296099q +1930-01-01 +01 janvier 1930 +1930/01/01 (Numéro 1). + +Figaro : journal non politique +Villemessant, Hippolyte de (1810-1879). Directeur de publication +Jouvin, Benoît (1810-1886). Directeur de publication +Figaro (Paris) +texte +text +publication en série imprimée +printed serial +fre +Notice du catalogue : http://catalogue.bnf.fr/ark:/12148/cb34355551z +Bibliothèque nationale de France +domaine public +public domain +http://gallica.bnf.fr/ark:/12148/cb34355551z/date +Appartient à l’ensemble documentaire : BIPFPIG00 +Appartient à l’ensemble documentaire : BIPFPIG63 +Appartient à l’ensemble documentaire : BIPFPIG69 +Appartient à l’ensemble documentaire : Pam1 +Appartient à l’ensemble documentaire : BnPlCo00 +Appartient à l’ensemble documentaire : BnPlCo01 +Appartient à l’ensemble documentaire : FranceBr +Nombre total de vues : 164718 + + +
+
+bnf.fr +07 +0 +Bibliothèque nationale de France +fascicule +0.0 +Figaro : journal non politique +1930-01-01 +15/10/2007 +false + + + + +
diff --git a/backend/corpora/gallica/tests/data/figaro/RoughText.html b/backend/corpora/gallica/tests/data/figaro/RoughText.html new file mode 100644 index 000000000..4501d9320 --- /dev/null +++ b/backend/corpora/gallica/tests/data/figaro/RoughText.html @@ -0,0 +1,2 @@ +Figaro : journal non politique | Gallica

Reminder of your request:


Downloading format: : Text

View 1 to 8 on 8

Number of pages: 8

Full notice

Title : Figaro : journal non politique

Publisher : Figaro (Paris)

Publication date : 1930-01-01

Contributor : Villemessant, Hippolyte de (1810-1879). Directeur de publication

Contributor : Jouvin, Benoît (1810-1886). Directeur de publication

Relationship : http://catalogue.bnf.fr/ark:/12148/cb34355551z

Relationship : https://gallica.bnf.fr/ark:/12148/cb34355551z/date

Type : text

Type : printed serial

Language : french

Format : Nombre total de vues : 164718

Description : 01 janvier 1930

Description : 1930/01/01 (Numéro 1).

Description : Collection numérique : Bibliographie de la presse française politique et d'information générale

Description : Collection numérique : BIPFPIG63

Description : Collection numérique : BIPFPIG69

Description : Collection numérique : Arts de la marionnette

Description : Collection numérique : Commun Patrimoine: bibliothèque numérique du réseau des médiathèques de Plaine commune

Description : Collection numérique : La Commune de Paris

Description : Collection numérique : France-Brésil

Rights : Consultable en ligne

Rights : Public domain

Identifier : ark:/12148/bpt6k296099q

Source : Bibliothèque nationale de France

Provenance : Bibliothèque nationale de France

Online date : 15/10/2007

The text displayed may contain some errors. The text of this document has been generated automatically by an optical character recognition (OCR) program. The +estimated recognition rate for this document is 0%.


SOMMAIRE DE FIGARO PAGE 2. Les Cours, les Ambassades, le Monde et la Ville. Les Echos. La fin du Bulletin vert. 1929-1930.

PAGE 3. La Dernière Heure. Avant la Conférence de La Haye. Les méfaits de la tempête.

PAGE 4. La Vie sportive. Revue de la Presse. Anne Douglas Sedgwick Marthe Ludérac.

PAGE 5. Henri Rebois L'Art espagnol à l'Exposition de Barcelone. Robert Brussel Le Mouvement musical. Guy de Passillé Les Etrennes. Jacques Patin Les Premières. Les Alguazils Courrier des Lettres. Marc Hélys Revues étrangères. PAGE 6. La Bourse La Cote des Valeurs. Le Programme des spectacles.

PAGE 7. Courrier des théâtres. Les Courses LA POLITIQUE

La diplomatie


diff --git a/backend/corpora/gallica/tests/data/figaro/Years.xml b/backend/corpora/gallica/tests/data/figaro/Years.xml new file mode 100644 index 000000000..4ca05725c --- /dev/null +++ b/backend/corpora/gallica/tests/data/figaro/Years.xml @@ -0,0 +1,102 @@ + + +1854 +1855 +1856 +1857 +1858 +1859 +1860 +1861 +1862 +1863 +1864 +1865 +1866 +1867 +1868 +1869 +1870 +1871 +1872 +1873 +1874 +1875 +1876 +1877 +1878 +1879 +1880 +1881 +1882 +1883 +1884 +1885 +1886 +1887 +1888 +1889 +1890 +1891 +1892 +1893 +1894 +1895 +1896 +1897 +1898 +1899 +1900 +1901 +1902 +1903 +1904 +1905 +1906 +1907 +1908 +1909 +1910 +1911 +1912 +1913 +1914 +1915 +1916 +1917 +1918 +1919 +1920 +1921 +1922 +1923 +1924 +1925 +1926 +1927 +1928 +1929 +1930 +1931 +1932 +1933 +1934 +1935 +1936 +1937 +1938 +1939 +1940 +1941 +1942 +1944 +1945 +1946 +1947 +1948 +1949 +1950 +1951 +1952 +1953 + diff --git a/backend/corpora/gallica/tests/test_import.py b/backend/corpora/gallica/tests/test_import.py new file mode 100644 index 000000000..72ad4bb36 --- /dev/null +++ b/backend/corpora/gallica/tests/test_import.py @@ -0,0 +1,33 @@ +from datetime import datetime +import requests + +from conftest import mock_response +from addcorpus.python_corpora.load_corpus import load_corpus_definition + + +target_documents = [ + { + "content": "SOMMAIRE DE FIGARO PAGE 2. Les Cours, les Ambassades, le Monde et la Ville. Les Echos. La fin du Bulletin vert. 1929-1930. PAGE 3. La Dernière Heure. Avant la Conférence de La Haye. Les méfaits de la tempête. PAGE 4. La Vie sportive. Revue de la Presse. Anne Douglas Sedgwick Marthe Ludérac. PAGE 5. Henri Rebois L'Art espagnol à l'Exposition de Barcelone. Robert Brussel Le Mouvement musical. Guy de Passillé Les Etrennes. Jacques Patin Les Premières. Les Alguazils Courrier des Lettres. Marc Hélys Revues étrangères. PAGE 6. La Bourse La Cote des Valeurs. Le Programme des spectacles. PAGE 7. Courrier des théâtres. Les Courses LA POLITIQUE La diplomatie ", + "contributor": [ + "Villemessant, Hippolyte de (1810-1879). Directeur de publication", + "Jouvin, Benoît (1810-1886). Directeur de publication", + ], + "date": "1930-01-01", + "id": "bpt6k296099q", + "issue": "01 janvier 19301930/01/01 (Numéro 1).", + "url": "https://gallica.bnf.fr/ark:/12148/bpt6k296099q", + } +] + +def test_gallica_import(monkeypatch, gallica_corpus_settings): + monkeypatch.setattr(requests, "get", mock_response) + corpus_def = load_corpus_definition("figaro") + sources = corpus_def.sources( + start=datetime(year=1930, month=1, day=1), + end=datetime(year=1930, month=12, day=31), + ) + documents = list(corpus_def.documents(sources)) + assert len(documents) == 1 + for document, target in zip(documents, target_documents): + for target_key in target.keys(): + assert document.get(target_key) == target.get(target_key) diff --git a/backend/requirements.txt b/backend/requirements.txt index be911bd7c..3ac3fa29f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -132,7 +132,7 @@ h11==0.14.0 # wsproto humanize==4.9.0 # via flower -ianalyzer-readers==0.2.1 +ianalyzer-readers==0.2.2 # via -r requirements.in idna==3.4 # via @@ -249,7 +249,9 @@ pygments==2.16.1 # rich # seleniumbase pyjwt[crypto]==2.8.0 - # via django-allauth + # via + # django-allauth + # pyjwt pynose==1.4.8 # via seleniumbase pyopenssl==23.2.0 From c5deac5e05af2922b9ae4a49d0e249715ae4f898 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 4 Dec 2024 10:44:04 +0100 Subject: [PATCH 2/2] revert change to addcorpus.validation.indexing.py --- backend/addcorpus/validation/indexing.py | 27 +++++++++--------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/backend/addcorpus/validation/indexing.py b/backend/addcorpus/validation/indexing.py index 92d0c89c2..a3763ef7b 100644 --- a/backend/addcorpus/validation/indexing.py +++ b/backend/addcorpus/validation/indexing.py @@ -1,9 +1,9 @@ ''' This module defines functions to check if a corpus is ready for indexing. ''' -import os -import requests + import warnings +import os from addcorpus.validation.creation import primary_mapping_type @@ -87,19 +87,12 @@ def validate_has_data_directory(corpus): return config = corpus.configuration - if not config.data_directory and not config.data_url: - raise CorpusNotIndexableError('Missing data directory or url') - - if corpus.data_dircetory and not os.path.isdir(config.data_directory): - raise CorpusNotIndexableError('Configured data directory does not exist.') + if not config.data_directory: + raise CorpusNotIndexableError( + 'Missing data directory' + ) - if corpus.data_url: - headers = {} - if corpus.data_api_key: - headers = {"Authorization": f"Token {corpus.data_api_key}"} - try: - requests.get(corpus.data_url, headers=headers) - except ConnectionError: - raise CorpusNotIndexableError( - 'Cannot connect to the configured data url. Do you need to provide an API key?' - ) + if not os.path.isdir(config.data_directory): + raise CorpusNotIndexableError( + 'Configured data directory does not exist.' + )