Skip to content
This repository has been archived by the owner on Nov 21, 2023. It is now read-only.

Commit

Permalink
chore: Comprime arquivos .pdf
Browse files Browse the repository at this point in the history
  • Loading branch information
pvfrota committed Feb 15, 2021
1 parent b53a4f9 commit f52b42b
Show file tree
Hide file tree
Showing 27 changed files with 62 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,6 @@ dmypy.json

# Cython debug symbols
cython_debug/

# Decompressed PDF files
data/decompressed/
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
FROM python:3.8.7-slim

RUN echo "deb http://http.us.debian.org/debian jessie non-free" >> /etc/apt/sources.list

RUN apt-get -yq update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
automake \
make \
gcc \
g++ \
rar \
unrar \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

Expand Down
45 changes: 43 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,27 @@

SERVICE_NAME=vacina-manaus-backend

COMPRESSED_FILE_ROOT_DIR=data/compressed
DECOMPRESSED_FILE_DIR=data/decompressed

LATEST_COMPRESSED_FILE_ROOT_DIR=$(shell ls ${COMPRESSED_FILE_ROOT_DIR} | tail -n 1)
LATEST_DECOMPRESSED_FILE=$(shell ls ${DECOMPRESSED_FILE_DIR} | tail -n 1)

FILE_TO_COMPRESS=${LATEST_DECOMPRESSED_FILE}
FILE_ROOT_DIR_TO_DECOMPRESS=${LATEST_COMPRESSED_FILE_ROOT_DIR}

COMPRESSED_FILE_DIR=${COMPRESSED_FILE_ROOT_DIR}/$(shell echo ${FILE_TO_COMPRESS} | sed 's/.pdf//g')
COMPRESSED_FILE_NAME=$(shell echo ${FILE_TO_COMPRESS} | sed 's/.pdf/.rar/g')

DECOMPRESSED_FILE_NAME=${FILE_ROOT_DIR_TO_DECOMPRESS}.pdf
DECOMPRESSION_FILE_REGEX=${FILE_ROOT_DIR_TO_DECOMPRESS}/${FILE_ROOT_DIR_TO_DECOMPRESS}.rar*

COMPRESSION_LEVEL=5
COMPRESSION_MAX_FILE_SIZE=50M
COMPRESSION_PARAMS=-idq -ep -m${COMPRESSION_LEVEL} -v${COMPRESSION_MAX_FILE_SIZE} ${COMPRESSED_FILE_DIR}/${COMPRESSED_FILE_NAME} ${DECOMPRESSED_FILE_DIR}/${FILE_TO_COMPRESS}

DECOMPRESSION_PARAMS=-idq ${COMPRESSED_FILE_ROOT_DIR}/${DECOMPRESSION_FILE_REGEX}

.PHONY: all
all: build data

Expand All @@ -13,15 +34,31 @@ build:
build-no-cache:
@docker-compose build --no-cache

.PHONY: compress-data
compress-data:
@echo 'Comprimindo arquivo ${FILE_TO_COMPRESS}...'
@mkdir -p ${COMPRESSED_FILE_DIR}
@docker-compose run --user=$(shell id -u) --rm ${SERVICE_NAME} rar a ${COMPRESSION_PARAMS}

.PHONY: data
data: download-data extract-data process-data
data: download-data compress-data extract-data process-data

.PHONY: decompress-data
decompress-data:
@echo 'Descomprimindo arquivo ${DECOMPRESSED_FILE_NAME}...'
@if [ -f "${DECOMPRESSED_FILE_DIR}/${DECOMPRESSED_FILE_NAME}" ]; then \
echo "Arquivo ${DECOMPRESSED_FILE_NAME} já descomprimido, pulando..."; \
else \
docker-compose run --user=$(shell id -u) --rm ${SERVICE_NAME} unrar e ${DECOMPRESSION_PARAMS} \
&& mv ${DECOMPRESSED_FILE_NAME} ${DECOMPRESSED_FILE_DIR}/${DECOMPRESSED_FILE_NAME}; \
fi

.PHONY: download-data
download-data:
@docker-compose run --user=$(shell id -u) --rm ${SERVICE_NAME} python src/download_data.py

.PHONY: extract-data
extract-data:
extract-data: decompress-data
@docker-compose run --user=$(shell id -u) --rm ${SERVICE_NAME} python src/extract_data.py

.PHONY: process-data
Expand All @@ -35,3 +72,7 @@ process-main:
.PHONY: lint
lint:
@docker run --rm -v $(shell pwd):/apps alpine/flake8:3.8.4 --config=.flake8 $(shell find src/*.py)

.PHONY: shell
shell:
@docker-compose run --rm ${SERVICE_NAME} bash
2 changes: 1 addition & 1 deletion data/analyzed/last_update_date.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
last_update_date
12/02/2021
12/02/2021
Loading

0 comments on commit f52b42b

Please sign in to comment.