Skip to content
This repository has been archived by the owner on Sep 5, 2024. It is now read-only.

Commit

Permalink
some formatting and more docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelharms committed Jan 19, 2020
1 parent 2e99a23 commit 9085554
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 5 deletions.
2 changes: 0 additions & 2 deletions comcrawl/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def download_single_result(result: Result) -> Result:
The provided result, extendey by the corresponding HTML String.
"""

offset, length = int(result["offset"]), int(result["length"])

offset_end = offset + length - 1
Expand Down Expand Up @@ -68,7 +67,6 @@ def download_multiple_results(results: ResultList,
HTML strings.
"""

results_with_html = []

# multi-threaded download
Expand Down
1 change: 0 additions & 1 deletion comcrawl/utils/initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ def fetch_available_indexes() -> IndexList:
A list containing available indexes and information about them.
"""

index_list = (requests
.get("https://index.commoncrawl.org/collinfo.json")
.json())
Expand Down
26 changes: 26 additions & 0 deletions comcrawl/utils/multithreading.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,34 @@

def make_multithreaded(func: Callable,
threads: int) -> Callable:
"""Creates a multithreaded version of a function
Args:
func: Function that is meant to be executed on
a list of input objects.
threads: The number of threads the multithreaded
version of the function should use.
Returns:
A multithreaded version of the `func` input function.
"""

def multithreaded_function(input_list: List, *args) -> List:
"""Executes function on input list using multiple threads
Args:
input_list: The list of objects a function should be
executed on.
*args: Variable length argument list of additional
parameters needed for the function to be executed.
Returns:
List of results after all input list elements were
processed. Input order might not be preserved in
output list.
"""
results = []

with futures.ThreadPoolExecutor(max_workers=threads) as executor:
Expand Down
2 changes: 0 additions & 2 deletions comcrawl/utils/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def search_single_index(index: str, url: str) -> ResultList:
List of results dictionaries found in specified Index for the URL.
"""

results: ResultList = []

url = URL_TEMPLATE.format(index=index, url=url)
Expand Down Expand Up @@ -55,7 +54,6 @@ def search_multiple_indexes(url: str,
Common Crawl indexes.
"""

results = []

# multi-threaded search
Expand Down

0 comments on commit 9085554

Please sign in to comment.