Skip to content

Commit

Permalink
PoS spider: use OAI-PMH spider
Browse files Browse the repository at this point in the history
Signed-off-by: Victor Balbuena <[email protected]>
  • Loading branch information
vbalbp committed Feb 28, 2019
1 parent 89c2834 commit 4fc4ef9
Show file tree
Hide file tree
Showing 17 changed files with 724 additions and 650 deletions.
33 changes: 16 additions & 17 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ services:
depends_on:
scrapyd:
condition: service_healthy
http-server.local:
pos-http-server.local:
condition: service_healthy

unit:
Expand Down Expand Up @@ -118,22 +118,6 @@ services:
- ${PWD}/tests/functional/wsp/fixtures/ftp_server/WSP:/home/ftpusers/bob/WSP
- ${PWD}/tests/functional/wsp/fixtures/ftp_server/pureftpd.passwd:/etc/pure-ftpd/passwd/pureftpd.passwd

http-server.local:
image: nginx:stable-alpine
volumes:
- ${PWD}/tests/functional/pos/fixtures/https_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf
- ${PWD}/tests/functional/pos/fixtures/https_server/conf/ssl:/etc/nginx/ssl
- ${PWD}/tests/functional/pos/fixtures/https_server/records:/etc/nginx/html/
ports:
- 443:443
healthcheck:
timeout: 5s
interval: 5s
retries: 5
test:
- "CMD-SHELL"
- "curl https://localhost:443/"

functional_cds:
<<: *service_base
command: py.test -vv tests/functional/cds
Expand Down Expand Up @@ -173,6 +157,21 @@ services:
- "CMD-SHELL"
- "curl http://localhost:80/"

pos-http-server.local:
image: nginx:stable-alpine
volumes:
- ${PWD}/tests/functional/pos/fixtures/http_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf
- ${PWD}/tests/functional/pos/fixtures/http_server/records:/etc/nginx/html/
ports:
- 80:80
healthcheck:
timeout: 5s
interval: 5s
retries: 5
test:
- "CMD-SHELL"
- "curl http://localhost:80/"

rabbitmq:
image: rabbitmq
healthcheck:
Expand Down
3 changes: 0 additions & 3 deletions hepcrawl/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@
# user-agent
USER_AGENT = 'hepcrawl (+http://www.inspirehep.net)'

# Allow duplicate requests
DUPEFILTER_CLASS = "scrapy.dupefilters.BaseDupeFilter"

# URI base prefix for $schema to be used during record generation
SCHEMA_BASE_URI = os.environ.get(
'APP_SCHEMA_BASE_URI',
Expand Down
Loading

0 comments on commit 4fc4ef9

Please sign in to comment.