From 22a7eb7091dd3e6baac71b9e276188bb39769262 Mon Sep 17 00:00:00 2001 From: Ivan Ogasawara Date: Wed, 31 Jan 2024 19:21:44 -0400 Subject: [PATCH] fix: Fix initial linter issues and tests (#2) --- .github/CODE_OF_CONDUCT.md | 46 ----- .github/CONTRIBUTING.md | 12 -- .github/ISSUE_TEMPLATE/bug_report.md | 23 +-- .github/ISSUE_TEMPLATE/feature_request.md | 17 +- .github/PULL_REQUEST_TEMPLATE.md | 66 ++++++- .github/workflows/main.yaml | 3 +- .gitignore | 1 + .pre-commit-config.yaml | 5 +- .vscode/tasks.json | 37 ---- CODE_OF_CONDUCT.md | 75 ++++++++ LICENCE | 2 +- README.md | 38 +++- docs/index.md | 38 +++- examples/advanced_search/README.md | 6 +- examples/advanced_search/main.py | 1 - .../author_cooccurence_analysis/README.md | 18 +- examples/author_cooccurence_analysis/main.py | 8 +- examples/simple_search/README.md | 4 +- examples/simple_search/main.py | 2 - pyproject.toml | 5 + src/pymedx/api.py | 173 ++++++++++-------- src/pymedx/article.py | 67 ++++--- src/pymedx/book.py | 62 ++++--- src/pymedx/helpers.py | 48 ++--- tests/test_core.py | 3 + 25 files changed, 448 insertions(+), 312 deletions(-) delete mode 100644 .github/CODE_OF_CONDUCT.md delete mode 100644 .github/CONTRIBUTING.md delete mode 100644 .vscode/tasks.json create mode 100644 CODE_OF_CONDUCT.md create mode 100644 tests/test_core.py diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md deleted file mode 100644 index a80a3b3a..00000000 --- a/.github/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,46 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at gijswobben+github@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md deleted file mode 100644 index d9e71ece..00000000 --- a/.github/CONTRIBUTING.md +++ /dev/null @@ -1,12 +0,0 @@ -# Contributing - -> Alone we can do so little; together we can do so much. — Helen Keller - -## Testing -TBD - -## Code conventions -All code is formatted with the Black code formatter. This leaves little room for discussion on the code conventions and helps us focus on the actual quality of the code. More information on the Black formatter can be found here: [Python Black](https://github.com/ambv/black) - -## Submitting changes -Any changes have to be commit to a feature branch and can only get into the master branch through an approved pull request. Please provide clear commit messages and pull requests so the reviewer knows what has changed and what the impact / effect is. diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index eb0b40d7..318fa029 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,28 +1,25 @@ --- name: Bug report about: Create a report to help us improve - --- -**Describe the bug** -A clear and concise description of what the bug is. +**Describe the bug** A clear and concise description of what the bug is. + +**To Reproduce** Steps to reproduce the behavior: -**To Reproduce** -Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error -**Expected behavior** -A clear and concise description of what you expected to happen. +**Expected behavior** A clear and concise description of what you expected to +happen. -**Screenshots** -If applicable, add screenshots to help explain your problem. +**Screenshots** If applicable, add screenshots to help explain your problem. **Environment (please complete the following information):** - - OS: [e.g. Windows / Linux] - - Version [e.g. Python 3.7.0] -**Additional context** -Add any other context about the problem here. +- OS: [e.g. Windows / Linux] +- Version [e.g. Python 3.7.0] + +**Additional context** Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 066b2d92..bd9fd963 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,17 +1,16 @@ --- name: Feature request about: Suggest an idea for this project - --- -**Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] +**Is your feature request related to a problem? Please describe.** A clear and +concise description of what the problem is. Ex. I'm always frustrated when [...] -**Describe the solution you'd like** -A clear and concise description of what you want to happen. +**Describe the solution you'd like** A clear and concise description of what you +want to happen. -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. +**Describe alternatives you've considered** A clear and concise description of +any alternative solutions or features you've considered. -**Additional context** -Add any other context or screenshots about the feature request here. +**Additional context** Add any other context or screenshots about the feature +request here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2f89cb7b..0337778b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,15 +1,61 @@ -### All Submissions: +## Pull Request description -* [ ] Have you followed the guidelines in our Contributing document? -* [ ] Have you checked to ensure there aren't other open [Pull Requests](../../pulls) for the same update/change? + -### New Feature Submissions: + -1. [ ] Does your submission pass tests (if applicable)? -2. [ ] Have you lint your code locally prior to submission (use flake8)? +## How to test these changes -### Changes to Core Features: + + +- `...` + + + +## Pull Request checklists + +This PR is a: + +- [ ] bug-fix +- [ ] new feature +- [ ] maintenance + +About this PR: + +- [ ] it includes tests. +- [ ] the tests are executed on CI. +- [ ] the tests generate log file(s) (path). +- [ ] pre-commit hooks were executed locally. +- [ ] this PR requires a project documentation update. + +Author's checklist: + +- [ ] I have reviewed the changes and it contains no misspelling. +- [ ] The code is well commented, especially in the parts that contain more + complexity. +- [ ] New and old tests passed locally. + +## Additional information + + + + + +## Reviewer's checklist + +Copy and paste this template for your review's note: + +``` +## Reviewer's Checklist + +- [ ] I managed to reproduce the problem locally from the `main` branch +- [ ] I managed to test the new changes locally +- [ ] I confirm that the issues mentioned were fixed/resolved . +``` diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 621c4afc..9985977b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -35,7 +35,7 @@ jobs: - "3.9" - "3.10" - "3.11" - # - '3.12' + - '3.12' os: - "ubuntu" # - 'macos' @@ -66,7 +66,6 @@ jobs: - name: Install dependencies run: | - sudo apt install ffmpeg poetry install - name: Run tests diff --git a/.gitignore b/.gitignore index 8249fdbb..7e0c1939 100644 --- a/.gitignore +++ b/.gitignore @@ -104,6 +104,7 @@ venv.bak/ .mypy_cache/ .ruff_cache/ +.vscode/ DSS.egg-info/ test-reports/ .coveragerc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b37b22e8..8c7de445 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,8 +32,7 @@ repos: language: system exclude: | (?x)( - docs| - tests + docs ) pass_filenames: true types: @@ -48,7 +47,7 @@ repos: exclude: | (?x)( docs| - tests + examples| ) types: - python diff --git a/.vscode/tasks.json b/.vscode/tasks.json deleted file mode 100644 index ed361f02..00000000 --- a/.vscode/tasks.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - // See https://go.microsoft.com/fwlink/?LinkId=733558 - // for the documentation about the tasks.json format - "version": "2.0.0", - "tasks": [ - { - "label": "Install package", - "type": "shell", - "command": "pip install -e .", - "problemMatcher": [] - }, - { - "label": "Build package", - "type": "shell", - "command": "rm -rf ./build ./dist ./venv && python setup.py sdist bdist_wheel", - "problemMatcher": [] - }, - { - "label": "Publish package", - "type": "shell", - "command": "twine upload -u ${input:pypiUsername} -p ${input:pypiPassword} --repository pypi dist/*", - "problemMatcher": [] - } - ], - "inputs": [ - { - "id": "pypiUsername", - "type": "promptString", - "description": "PyPi username for publishing the package" - }, - { - "id": "pypiPassword", - "type": "promptString", - "description": "PyPi password for publishing the package" - } - ] -} \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..7f770953 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,75 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of +experience, nationality, personal appearance, race, religion, or sexual identity +and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual attention or + advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic + address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban temporarily or permanently any +contributor for other behaviors that they deem inappropriate, threatening, +offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at opensciencelabs@gmail.com. The +project team will review and investigate all complaints, and will respond in a +way that it deems appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an +incident. Further details of specific enforcement policies may be posted +separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/LICENCE b/LICENCE index fe14d39a..e735afad 100644 --- a/LICENCE +++ b/LICENCE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/README.md b/README.md index 70e3dcfa..5c7492de 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,19 @@ -> PyMedX is a fork from a archived project called PyMed: https://github.com/gijswobben/pymedx +> PyMedX is a fork from a archived project called PyMed: +> https://github.com/gijswobben/pymedx # PyMedX - PubMed Access through Python -PyMedX is a Python library that provides access to PubMed through the PubMed API. + +PyMedX is a Python library that provides access to PubMed through the PubMed +API. ## Why this library? -The PubMed API is not very well documented and querying it in a performant way is too complicated and time consuming for researchers. This wrapper provides access to the API in a consistent, readable and performant way. + +The PubMed API is not very well documented and querying it in a performant way +is too complicated and time consuming for researchers. This wrapper provides +access to the API in a consistent, readable and performant way. ## Features + This library takes care of the following for you: - Querying the PubMed database (with the standard PubMed query language) @@ -14,7 +21,10 @@ This library takes care of the following for you: - Parsing and cleaning of the retrieved articles ## Examples -For full (working) examples have a look at the `examples/` folder in this repository. In essence you only need to import the `PubMed` class, instantiate it, and use it to query: + +For full (working) examples have a look at the `examples/` folder in this +repository. In essence you only need to import the `PubMed` class, instantiate +it, and use it to query: ```python from pymedx import PubMed @@ -23,12 +33,22 @@ results = pubmed.query("Some query", max_results=500) ``` ## Notes on the API -The original documentation of the PubMed API can be found here: [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/tools/developers/). PubMed Central kindly requests you to: + +The original documentation of the PubMed API can be found here: +[PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/tools/developers/). PubMed +Central kindly requests you to: > - Do not make concurrent requests, even at off-peak times; and -> - Include two parameters that help to identify your service or application to our servers -> * _tool_ should be the name of the application, as a string value with no internal spaces, and -> * _email_ should be the e-mail address of the maintainer of the tool, and should be a valid e-mail address. +> - Include two parameters that help to identify your service or application to +> our servers +> - _tool_ should be the name of the application, as a string value with no +> internal spaces, and +> - _email_ should be the e-mail address of the maintainer of the tool, and +> should be a valid e-mail address. ## Notice of Non-Affiliation and Disclaimer -The author of this library is not affiliated, associated, authorized, endorsed by, or in any way officially connected with PubMed, or any of its subsidiaries or its affiliates. The official PubMed website can be found at https://www.ncbi.nlm.nih.gov/pubmed/. + +The author of this library is not affiliated, associated, authorized, endorsed +by, or in any way officially connected with PubMed, or any of its subsidiaries +or its affiliates. The official PubMed website can be found at +https://www.ncbi.nlm.nih.gov/pubmed/. diff --git a/docs/index.md b/docs/index.md index 70e3dcfa..5c7492de 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,12 +1,19 @@ -> PyMedX is a fork from a archived project called PyMed: https://github.com/gijswobben/pymedx +> PyMedX is a fork from a archived project called PyMed: +> https://github.com/gijswobben/pymedx # PyMedX - PubMed Access through Python -PyMedX is a Python library that provides access to PubMed through the PubMed API. + +PyMedX is a Python library that provides access to PubMed through the PubMed +API. ## Why this library? -The PubMed API is not very well documented and querying it in a performant way is too complicated and time consuming for researchers. This wrapper provides access to the API in a consistent, readable and performant way. + +The PubMed API is not very well documented and querying it in a performant way +is too complicated and time consuming for researchers. This wrapper provides +access to the API in a consistent, readable and performant way. ## Features + This library takes care of the following for you: - Querying the PubMed database (with the standard PubMed query language) @@ -14,7 +21,10 @@ This library takes care of the following for you: - Parsing and cleaning of the retrieved articles ## Examples -For full (working) examples have a look at the `examples/` folder in this repository. In essence you only need to import the `PubMed` class, instantiate it, and use it to query: + +For full (working) examples have a look at the `examples/` folder in this +repository. In essence you only need to import the `PubMed` class, instantiate +it, and use it to query: ```python from pymedx import PubMed @@ -23,12 +33,22 @@ results = pubmed.query("Some query", max_results=500) ``` ## Notes on the API -The original documentation of the PubMed API can be found here: [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/tools/developers/). PubMed Central kindly requests you to: + +The original documentation of the PubMed API can be found here: +[PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/tools/developers/). PubMed +Central kindly requests you to: > - Do not make concurrent requests, even at off-peak times; and -> - Include two parameters that help to identify your service or application to our servers -> * _tool_ should be the name of the application, as a string value with no internal spaces, and -> * _email_ should be the e-mail address of the maintainer of the tool, and should be a valid e-mail address. +> - Include two parameters that help to identify your service or application to +> our servers +> - _tool_ should be the name of the application, as a string value with no +> internal spaces, and +> - _email_ should be the e-mail address of the maintainer of the tool, and +> should be a valid e-mail address. ## Notice of Non-Affiliation and Disclaimer -The author of this library is not affiliated, associated, authorized, endorsed by, or in any way officially connected with PubMed, or any of its subsidiaries or its affiliates. The official PubMed website can be found at https://www.ncbi.nlm.nih.gov/pubmed/. + +The author of this library is not affiliated, associated, authorized, endorsed +by, or in any way officially connected with PubMed, or any of its subsidiaries +or its affiliates. The official PubMed website can be found at +https://www.ncbi.nlm.nih.gov/pubmed/. diff --git a/examples/advanced_search/README.md b/examples/advanced_search/README.md index e16049e2..6b313528 100644 --- a/examples/advanced_search/README.md +++ b/examples/advanced_search/README.md @@ -1,5 +1,7 @@ # Example - Simple search -This example shows how to perform an advanced search on the PubMed database and retrieve information about the relevant articles. -You can use [PubMed query builder](https://www.ncbi.nlm.nih.gov/pubmed/advanced) for creating the query syntax. +This example shows how to perform an advanced search on the PubMed database and +retrieve information about the relevant articles. +You can use [PubMed query builder](https://www.ncbi.nlm.nih.gov/pubmed/advanced) +for creating the query syntax. diff --git a/examples/advanced_search/main.py b/examples/advanced_search/main.py index 3dff429b..d1c83a37 100644 --- a/examples/advanced_search/main.py +++ b/examples/advanced_search/main.py @@ -13,7 +13,6 @@ # Loop over the retrieved articles for article in results: - # Extract and format information from the article article_id = article.pubmed_id title = article.title diff --git a/examples/author_cooccurence_analysis/README.md b/examples/author_cooccurence_analysis/README.md index 378efbfd..3f5362cb 100644 --- a/examples/author_cooccurence_analysis/README.md +++ b/examples/author_cooccurence_analysis/README.md @@ -1,10 +1,24 @@ # Example - Author co-occurrence analysis -This example retrieves articles from PubMed and uses the author information to construct a nodes and edges list that can be used for an author co-occurrence analysis (e.g. with a tool like Gephi). A nodes list is created by taking every unique author found in the result set, and the edges list is created by creating a list of all unique combinations of authors. The combinations of authors is also counted so that there is a "weight" column available that indicates the number of times these authors co-occurred. The CSV files produced by this script can be loaded directly into Gephi for further processing. + +This example retrieves articles from PubMed and uses the author information to +construct a nodes and edges list that can be used for an author co-occurrence +analysis (e.g. with a tool like Gephi). A nodes list is created by taking every +unique author found in the result set, and the edges list is created by creating +a list of all unique combinations of authors. The combinations of authors is +also counted so that there is a "weight" column available that indicates the +number of times these authors co-occurred. The CSV files produced by this script +can be loaded directly into Gephi for further processing. ## Why? -Analyzing all the literature in a field can be a very complicated task. First steps could be to visualize the entire field in a graph. One of these visualizations is the author co-occurrence graph, that shows how authors are inter-connected. The graph helps to answer questions like: What authors connect multiple research groups together? + +Analyzing all the literature in a field can be a very complicated task. First +steps could be to visualize the entire field in a graph. One of these +visualizations is the author co-occurrence graph, that shows how authors are +inter-connected. The graph helps to answer questions like: What authors connect +multiple research groups together? ## Example - Field of occupational health + Here a visualization of the field of occupational health: ![Field of occupational health](./author_co_occurrence_occupational_health.png "Field of occupational health") diff --git a/examples/author_cooccurence_analysis/main.py b/examples/author_cooccurence_analysis/main.py index ebc7184d..c9a9dd1f 100644 --- a/examples/author_cooccurence_analysis/main.py +++ b/examples/author_cooccurence_analysis/main.py @@ -3,7 +3,6 @@ from pymedx import PubMed - # Create a PubMed object that GraphQL can use to query # Note that the parameters are not required but kindly requested by PubMed Central # https://www.ncbi.nlm.nih.gov/pmc/tools/developers/ @@ -39,7 +38,10 @@ edges = list( itertools.chain.from_iterable( [ - [combination for combination in itertools.combinations(co_author_list, 2)] + [ + combination + for combination in itertools.combinations(co_author_list, 2) + ] for co_author_list in [ [ nodes[f'{author["lastname"]} {author["firstname"]}'] @@ -57,7 +59,6 @@ # Open the nodes file with open("./nodes.csv", "w", encoding="utf8", newline="") as nodes_file: - # Create a CSV writer writer = csv.writer(nodes_file, delimiter=",") @@ -70,7 +71,6 @@ with open("./edges.csv", "w", encoding="utf8", newline="") as edge_file: - # Create a CSV writer writer = csv.writer(edge_file, delimiter=",") diff --git a/examples/simple_search/README.md b/examples/simple_search/README.md index d17e771d..80b6d7cf 100644 --- a/examples/simple_search/README.md +++ b/examples/simple_search/README.md @@ -1,2 +1,4 @@ # Example - Simple search -This example shows how to perform a simple search on the PubMed database and retrieve information about the relevant articles. + +This example shows how to perform a simple search on the PubMed database and +retrieve information about the relevant articles. diff --git a/examples/simple_search/main.py b/examples/simple_search/main.py index 88277c10..073a644f 100644 --- a/examples/simple_search/main.py +++ b/examples/simple_search/main.py @@ -1,6 +1,5 @@ from pymedx import PubMed - # Create a PubMed object that GraphQL can use to query # Note that the parameters are not required but kindly requested by PubMed Central # https://www.ncbi.nlm.nih.gov/pmc/tools/developers/ @@ -15,7 +14,6 @@ # Loop over the retrieved articles for article in results: - # Print the type of object we've found (can be either PubMedBookArticle or PubMedArticle) print(type(article)) diff --git a/pyproject.toml b/pyproject.toml index 78e83aaf..6229a095 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ src = ["./"] ignore = ["PLR0913"] exclude = [ 'docs', + 'examples', ] select = [ "E", # pycodestyle @@ -88,3 +89,7 @@ quote-style = "double" [tool.mypy] no_strict_optional = false +exclude = [ + '^docs/$', + '^examples/$', +] diff --git a/src/pymedx/api.py b/src/pymedx/api.py index 80c84890..eb458d02 100644 --- a/src/pymedx/api.py +++ b/src/pymedx/api.py @@ -1,40 +1,41 @@ import datetime -import requests import itertools - import xml.etree.ElementTree as xml from typing import Union -from .helpers import batches +import requests + from .article import PubMedArticle from .book import PubMedBookArticle - +from .helpers import batches # Base url for all queries BASE_URL = "https://eutils.ncbi.nlm.nih.gov" class PubMed(object): - """ Wrapper around the PubMed API. - """ + """Wrapper around the PubMed API.""" def __init__( - self: object, tool: str = "my_tool", email: str = "my_email@example.com" + self: object, + tool: str = "my_tool", + email: str = "my_email@example.com", ) -> None: - """ Initialization of the object. - - Parameters: - - tool String, name of the tool that is executing the query. - This parameter is not required but kindly requested by - PMC (PubMed Central). - - email String, email of the user of the tool. This parameter - is not required but kindly requested by PMC (PubMed Central). - - Returns: - - None + """Initialization of the object. + + Parameters + ---------- + - tool String, name of the tool that is executing the query. + This parameter is not required but kindly requested by + PMC (PubMed Central). + - email String, email of the user of the tool. This parameter + is not required but kindly requested by PMC (PubMed Central). + + Returns + ------- + - None """ - # Store the input parameters self.tool = tool self.email = email @@ -47,17 +48,18 @@ def __init__( self.parameters = {"tool": tool, "email": email, "db": "pubmed"} def query(self: object, query: str, max_results: int = 100): - """ Method that executes a query agains the GraphQL schema, automatically - inserting the PubMed data loader. + """Method that executes a query agains the GraphQL schema, automatically + inserting the PubMed data loader. - Parameters: - - query String, the GraphQL query to execute against the schema. + Parameters + ---------- + - query String, the GraphQL query to execute against the schema. - Returns: - - result ExecutionResult, GraphQL object that contains the result - in the "data" attribute. + Returns + ------- + - result ExecutionResult, GraphQL object that contains the result + in the "data" attribute. """ - # Retrieve the article IDs for the query article_ids = self._getArticleIds(query=query, max_results=max_results) @@ -73,15 +75,16 @@ def query(self: object, query: str, max_results: int = 100): return itertools.chain.from_iterable(articles) def getTotalResultsCount(self: object, query: str) -> int: - """ Helper method that returns the total number of results that match the query. + """Helper method that returns the total number of results that match the query. - Parameters: - - query String, the query to send to PubMed + Parameters + ---------- + - query String, the query to send to PubMed - Returns: - - total_results_count Int, total number of results for the query in PubMed + Returns + ------- + - total_results_count Int, total number of results for the query in PubMed """ - # Get the default parameters parameters = self.parameters.copy() @@ -90,23 +93,32 @@ def getTotalResultsCount(self: object, query: str) -> int: parameters["retmax"] = 1 # Make the request (request a single article ID for this search) - response = self._get(url="/entrez/eutils/esearch.fcgi", parameters=parameters) + response = self._get( + url="/entrez/eutils/esearch.fcgi", parameters=parameters + ) # Get from the returned meta data the total number of available results for the query - total_results_count = int(response.get("esearchresult", {}).get("count")) + total_results_count = int( + response.get("esearchresult", {}).get("count") + ) # Return the total number of results (without retrieving them) return total_results_count - + def _exceededRateLimit(self) -> bool: - """ Helper method to check if we've exceeded the rate limit. + """Helper method to check if we've exceeded the rate limit. - Returns: - - exceeded Bool, Whether or not the rate limit is exceeded. + Returns + ------- + - exceeded Bool, Whether or not the rate limit is exceeded. """ - # Remove requests from the list that are longer than 1 second ago - self._requestsMade = [requestTime for requestTime in self._requestsMade if requestTime > datetime.datetime.now() - datetime.timedelta(seconds=1)] + self._requestsMade = [ + requestTime + for requestTime in self._requestsMade + if requestTime + > datetime.datetime.now() - datetime.timedelta(seconds=1) + ] # Return whether we've made more requests in the last second, than the rate limit return len(self._requestsMade) > self._rateLimit @@ -114,21 +126,22 @@ def _exceededRateLimit(self) -> bool: def _get( self: object, url: str, parameters: dict, output: str = "json" ) -> Union[dict, str]: - """ Generic helper method that makes a request to PubMed. - - Parameters: - - url Str, last part of the URL that is requested (will - be combined with the base url) - - parameters Dict, parameters to use for the request - - output Str, type of output that is requested (defaults to - JSON but can be used to retrieve XML) - - Returns: - - response Dict / str, if the response is valid JSON it will - be parsed before returning, otherwise a string is - returend + """Generic helper method that makes a request to PubMed. + + Parameters + ---------- + - url Str, last part of the URL that is requested (will + be combined with the base url) + - parameters Dict, parameters to use for the request + - output Str, type of output that is requested (defaults to + JSON but can be used to retrieve XML) + + Returns + ------- + - response Dict / str, if the response is valid JSON it will + be parsed before returning, otherwise a string is + returend """ - # Make sure the rate limit is not exceeded while self._exceededRateLimit(): pass @@ -152,22 +165,25 @@ def _get( return response.text def _getArticles(self: object, article_ids: list) -> list: - """ Helper method that batches a list of article IDs and retrieves the content. + """Helper method that batches a list of article IDs and retrieves the content. - Parameters: - - article_ids List, article IDs. + Parameters + ---------- + - article_ids List, article IDs. - Returns: - - articles List, article objects. + Returns + ------- + - articles List, article objects. """ - # Get the default parameters parameters = self.parameters.copy() parameters["id"] = article_ids # Make the request response = self._get( - url="/entrez/eutils/efetch.fcgi", parameters=parameters, output="xml" + url="/entrez/eutils/efetch.fcgi", + parameters=parameters, + output="xml", ) # Parse as XML @@ -180,16 +196,17 @@ def _getArticles(self: object, article_ids: list) -> list: yield PubMedBookArticle(xml_element=book) def _getArticleIds(self: object, query: str, max_results: int) -> list: - """ Helper method to retrieve the article IDs for a query. + """Helper method to retrieve the article IDs for a query. - Parameters: - - query Str, query to be executed against the PubMed database. - - max_results Int, the maximum number of results to retrieve. + Parameters + ---------- + - query Str, query to be executed against the PubMed database. + - max_results Int, the maximum number of results to retrieve. - Returns: - - article_ids List, article IDs as a list. + Returns + ------- + - article_ids List, article IDs as a list. """ - # Create a placeholder for the retrieved IDs article_ids = [] @@ -205,13 +222,17 @@ def _getArticleIds(self: object, query: str, max_results: int) -> list: parameters["retmax"] = max_results # Make the first request to PubMed - response = self._get(url="/entrez/eutils/esearch.fcgi", parameters=parameters) + response = self._get( + url="/entrez/eutils/esearch.fcgi", parameters=parameters + ) # Add the retrieved IDs to the list article_ids += response.get("esearchresult", {}).get("idlist", []) # Get information from the response - total_result_count = int(response.get("esearchresult", {}).get("count")) + total_result_count = int( + response.get("esearchresult", {}).get("count") + ) retrieved_count = int(response.get("esearchresult", {}).get("retmax")) # If no max is provided (-1) we'll try to retrieve everything @@ -219,8 +240,10 @@ def _getArticleIds(self: object, query: str, max_results: int) -> list: max_results = total_result_count # If not all articles are retrieved, continue to make requests untill we have everything - while retrieved_count < total_result_count and retrieved_count < max_results: - + while ( + retrieved_count < total_result_count + and retrieved_count < max_results + ): # Calculate a cut off point based on the max_results parameter if (max_results - retrieved_count) < parameters["retmax"]: parameters["retmax"] = max_results - retrieved_count @@ -237,7 +260,9 @@ def _getArticleIds(self: object, query: str, max_results: int) -> list: article_ids += response.get("esearchresult", {}).get("idlist", []) # Get information from the response - retrieved_count += int(response.get("esearchresult", {}).get("retmax")) + retrieved_count += int( + response.get("esearchresult", {}).get("retmax") + ) # Return the response return article_ids diff --git a/src/pymedx/article.py b/src/pymedx/article.py index 6068d4b6..42ade06e 100644 --- a/src/pymedx/article.py +++ b/src/pymedx/article.py @@ -1,16 +1,14 @@ -import json import datetime +import json +from typing import Optional, TypeVar from xml.etree.ElementTree import Element -from typing import TypeVar -from typing import Optional from .helpers import getContent class PubMedArticle(object): - """ Data class that contains a PubMed article. - """ + """Data class that contains a PubMed article.""" __slots__ = ( "pubmed_id", @@ -34,9 +32,7 @@ def __init__( *args: list, **kwargs: dict, ) -> None: - """ Initialization of the object from XML or from parameters. - """ - + """Initialization of the object from XML or from parameters.""" # If an XML element is provided, use it for initialization if xml_element is not None: self._initializeFromXML(xml_element=xml_element) @@ -57,7 +53,9 @@ def _extractTitle(self: object, xml_element: TypeVar("Element")) -> str: def _extractKeywords(self: object, xml_element: TypeVar("Element")) -> str: path = ".//Keyword" return [ - keyword.text for keyword in xml_element.findall(path) if keyword is not None + keyword.text + for keyword in xml_element.findall(path) + if keyword is not None ] def _extractJournal(self: object, xml_element: TypeVar("Element")) -> str: @@ -68,7 +66,9 @@ def _extractAbstract(self: object, xml_element: TypeVar("Element")) -> str: path = ".//AbstractText" return getContent(element=xml_element, path=path) - def _extractConclusions(self: object, xml_element: TypeVar("Element")) -> str: + def _extractConclusions( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//AbstractText[@Label='CONCLUSION']" return getContent(element=xml_element, path=path) @@ -80,7 +80,9 @@ def _extractResults(self: object, xml_element: TypeVar("Element")) -> str: path = ".//AbstractText[@Label='RESULTS']" return getContent(element=xml_element, path=path) - def _extractCopyrights(self: object, xml_element: TypeVar("Element")) -> str: + def _extractCopyrights( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//CopyrightInformation" return getContent(element=xml_element, path=path) @@ -93,16 +95,23 @@ def _extractPublicationDate( ) -> TypeVar("datetime.datetime"): # Get the publication date try: - # Get the publication elements - publication_date = xml_element.find(".//PubMedPubDate[@PubStatus='pubmed']") - publication_year = int(getContent(publication_date, ".//Year", None)) - publication_month = int(getContent(publication_date, ".//Month", "1")) + publication_date = xml_element.find( + ".//PubMedPubDate[@PubStatus='pubmed']" + ) + publication_year = int( + getContent(publication_date, ".//Year", None) + ) + publication_month = int( + getContent(publication_date, ".//Month", "1") + ) publication_day = int(getContent(publication_date, ".//Day", "1")) # Construct a datetime object from the info return datetime.date( - year=publication_year, month=publication_month, day=publication_day + year=publication_year, + month=publication_month, + day=publication_day, ) # Unable to parse the datetime @@ -116,15 +125,17 @@ def _extractAuthors(self: object, xml_element: TypeVar("Element")) -> list: "lastname": getContent(author, ".//LastName", None), "firstname": getContent(author, ".//ForeName", None), "initials": getContent(author, ".//Initials", None), - "affiliation": getContent(author, ".//AffiliationInfo/Affiliation", None), + "affiliation": getContent( + author, ".//AffiliationInfo/Affiliation", None + ), } for author in xml_element.findall(".//Author") ] - def _initializeFromXML(self: object, xml_element: TypeVar("Element")) -> None: - """ Helper method that parses an XML element into an article object. - """ - + def _initializeFromXML( + self: object, xml_element: TypeVar("Element") + ) -> None: + """Helper method that parses an XML element into an article object.""" # Parse the different fields of the article self.pubmed_id = self._extractPubMedId(xml_element) self.title = self._extractTitle(xml_element) @@ -141,18 +152,18 @@ def _initializeFromXML(self: object, xml_element: TypeVar("Element")) -> None: self.xml = xml_element def toDict(self: object) -> dict: - """ Helper method to convert the parsed information to a Python dict. - """ - + """Helper method to convert the parsed information to a Python dict.""" return {key: self.__getattribute__(key) for key in self.__slots__} def toJSON(self: object) -> str: - """ Helper method for debugging, dumps the object as JSON string. - """ - + """Helper method for debugging, dumps the object as JSON string.""" return json.dumps( { - key: (value if not isinstance(value, (datetime.date, Element)) else str(value)) + key: ( + value + if not isinstance(value, (datetime.date, Element)) + else str(value) + ) for key, value in self.toDict().items() }, sort_keys=True, diff --git a/src/pymedx/book.py b/src/pymedx/book.py index 1e2d5d16..55b19c73 100644 --- a/src/pymedx/book.py +++ b/src/pymedx/book.py @@ -1,15 +1,13 @@ -import json import datetime +import json -from typing import TypeVar -from typing import Optional +from typing import Optional, TypeVar from .helpers import getContent class PubMedBookArticle(object): - """ Data class that contains a PubMed article. - """ + """Data class that contains a PubMed article.""" __slots__ = ( "pubmed_id", @@ -33,9 +31,7 @@ def __init__( *args: list, **kwargs: dict, ) -> None: - """ Initialization of the object from XML or from parameters. - """ - + """Initialization of the object from XML or from parameters.""" # If an XML element is provided, use it for initialization if xml_element is not None: self._initializeFromXML(xml_element=xml_element) @@ -57,7 +53,9 @@ def _extractAbstract(self: object, xml_element: TypeVar("Element")) -> str: path = ".//AbstractText" return getContent(element=xml_element, path=path) - def _extractCopyrights(self: object, xml_element: TypeVar("Element")) -> str: + def _extractCopyrights( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//CopyrightInformation" return getContent(element=xml_element, path=path) @@ -73,19 +71,27 @@ def _extractLanguage(self: object, xml_element: TypeVar("Element")) -> str: path = ".//Language" return getContent(element=xml_element, path=path) - def _extractPublicationType(self: object, xml_element: TypeVar("Element")) -> str: + def _extractPublicationType( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//PublicationType" return getContent(element=xml_element, path=path) - def _extractPublicationDate(self: object, xml_element: TypeVar("Element")) -> str: + def _extractPublicationDate( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//PubDate/Year" return getContent(element=xml_element, path=path) - def _extractPublisher(self: object, xml_element: TypeVar("Element")) -> str: + def _extractPublisher( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//Publisher/PublisherName" return getContent(element=xml_element, path=path) - def _extractPublisherLocation(self: object, xml_element: TypeVar("Element")) -> str: + def _extractPublisherLocation( + self: object, xml_element: TypeVar("Element") + ) -> str: path = ".//Publisher/PublisherLocation" return getContent(element=xml_element, path=path) @@ -100,19 +106,23 @@ def _extractAuthors(self: object, xml_element: TypeVar("Element")) -> list: for author in xml_element.findall(".//Author") ] - def _extractSections(self: object, xml_element: TypeVar("Element")) -> list: + def _extractSections( + self: object, xml_element: TypeVar("Element") + ) -> list: return [ { "title": getContent(section, path=".//SectionTitle"), - "chapter": getContent(element=section, path=".//LocationLabel"), + "chapter": getContent( + element=section, path=".//LocationLabel" + ), } for section in xml_element.findall(".//Section") ] - def _initializeFromXML(self: object, xml_element: TypeVar("Element")) -> None: - """ Helper method that parses an XML element into an article object. - """ - + def _initializeFromXML( + self: object, xml_element: TypeVar("Element") + ) -> None: + """Helper method that parses an XML element into an article object.""" # Parse the different fields of the article self.pubmed_id = self._extractPubMedId(xml_element) self.title = self._extractTitle(xml_element) @@ -129,21 +139,21 @@ def _initializeFromXML(self: object, xml_element: TypeVar("Element")) -> None: self.sections = self._extractSections(xml_element) def toDict(self: object) -> dict: - """ Helper method to convert the parsed information to a Python dict. - """ - + """Helper method to convert the parsed information to a Python dict.""" return { key: (self.__getattribute__(key) if hasattr(self, key) else None) for key in self.__slots__ } def toJSON(self: object) -> str: - """ Helper method for debugging, dumps the object as JSON string. - """ - + """Helper method for debugging, dumps the object as JSON string.""" return json.dumps( { - key: (value if not isinstance(value, datetime.date) else str(value)) + key: ( + value + if not isinstance(value, datetime.date) + else str(value) + ) for key, value in self.toDict().items() }, sort_keys=True, diff --git a/src/pymedx/helpers.py b/src/pymedx/helpers.py index 50f45f77..ad13e6f8 100644 --- a/src/pymedx/helpers.py +++ b/src/pymedx/helpers.py @@ -2,41 +2,45 @@ def batches(iterable: list, n: int = 1) -> list: - """ Helper method that creates batches from an iterable. + """Helper method that creates batches from an iterable. - Parameters: - - iterable Iterable, the iterable to batch. - - n Int, the batch size. + Parameters + ---------- + - iterable Iterable, the iterable to batch. + - n Int, the batch size. - Returns: - - batches List, yields batches of n objects taken from the iterable. + Returns + ------- + - batches List, yields batches of n objects taken from the iterable. """ - # Get the length of the iterable length = len(iterable) # Start a loop over the iterable for index in range(0, length, n): - # Create a new iterable by slicing the original yield iterable[index : min(index + n, length)] def getContent( - element: TypeVar("Element"), path: str, default: str = None, separator: str = "\n" + element: TypeVar("Element"), + path: str, + default: str = None, + separator: str = "\n", ) -> str: - """ Internal helper method that retrieves the text content of an - XML element. - - Parameters: - - element Element, the XML element to parse. - - path Str, Nested path in the XML element. - - default Str, default value to return when no text is found. - - Returns: - - text Str, text in the XML node. + """Internal helper method that retrieves the text content of an + XML element. + + Parameters + ---------- + - element Element, the XML element to parse. + - path Str, Nested path in the XML element. + - default Str, default value to return when no text is found. + + Returns + ------- + - text Str, text in the XML node. """ - # Find the path in the element result = element.findall(path) @@ -46,4 +50,6 @@ def getContent( # Extract the text and return it else: - return separator.join([sub.text for sub in result if sub.text is not None]) + return separator.join( + [sub.text for sub in result if sub.text is not None] + ) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 00000000..bc9a3df8 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,3 @@ +def test_import(): + import pymedx + assert pymedx