GMDSantana · GMDSantana · Nov 17, 2024 · Nov 17, 2024 · Nov 17, 2024 · Nov 17, 2024
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+
diff --git a/.github/workflows/code-style.yml b/.github/workflows/code-style.yml
@@ -0,0 +1,29 @@
+name: Code Style Check
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out the code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.10"
+
+      - name: Install flake8
+        run: pip install flake8
+
+      - name: Run flake8
+        run: flake8 crivo tests
+
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -0,0 +1,35 @@
+name: Python Tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+
+    steps:
+      - name: Check out the code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements-dev.txt
+
+      - name: Run tests
+        run: pytest --cov=crivo
+
diff --git a/crivo/file_processor.py b/crivo/file_processor.py
@@ -11,6 +11,7 @@
 
 """
 
+
 def process_file(file_path):
     """
     Reads the content of a text file and returns it as a string.
@@ -22,11 +23,10 @@ def process_file(file_path):
         str: The content of the file.
     """
     try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        with open(file_path, "r", encoding="utf-8") as file:
             content = file.read()
         return content
     except FileNotFoundError:
         raise FileNotFoundError(f"Error: The file '{file_path}' was not found.")
     except IOError as e:
         raise IOError(f"Error reading the file '{file_path}': {e}")
-
diff --git a/crivo/filter_engine.py b/crivo/filter_engine.py
@@ -14,6 +14,7 @@
 import re
 import os
 
+
 def load_valid_tlds(file_path):
     """
     Loads a list of valid TLDs from a file and constructs a regex pattern.
@@ -25,18 +26,27 @@ def load_valid_tlds(file_path):
         str: A regex pattern matching valid TLDs.
     """
     try:
-        with open(file_path, 'r', encoding='utf-8') as f:
+        with open(file_path, "r", encoding="utf-8") as f:
             tlds = [line.strip() for line in f if line.strip()]
         if not tlds:
             raise ValueError("The TLD list is empty.")
         # Join TLDs into a single regex group
-        return '|'.join(tlds)
+        return "|".join(tlds)
     except FileNotFoundError:
         raise FileNotFoundError(f"Error: The file '{file_path}' was not found.")
     except IOError as e:
         raise IOError(f"Error reading the file '{file_path}': {e}")
 
-def filter_content(content, scope_filters=None, filter_ip=False, filter_ipv4=False, filter_ipv6=False, filter_domain=False, filter_url=False):
+
+def filter_content(
+    content,
+    scope_filters=None,
+    filter_ip=False,
+    filter_ipv4=False,
+    filter_ipv6=False,
+    filter_domain=False,
+    filter_url=False,
+):
     """
     Filters the provided content based on specified parameters.
 
@@ -53,12 +63,12 @@ def filter_content(content, scope_filters=None, filter_ip=False, filter_ipv4=Fal
         list: A list of unique filtered strings based on the criteria.
     """
     # Load the TLD regex dynamically
-    tld_regex = load_valid_tlds('valid_tlds.txt')
-    domain_pattern = rf'\b(?:[a-zA-Z0-9-]+\.)+(?:{tld_regex})\b'
+    tld_regex = load_valid_tlds("valid_tlds.txt")
+    domain_pattern = rf"\b(?:[a-zA-Z0-9-]+\.)+(?:{tld_regex})\b"
 
     # Other regex patterns
-    ipv4_pattern = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'
-    ipv6_pattern = r'\b(?:[a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4}\b'
+    ipv4_pattern = r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b"
+    ipv6_pattern = r"\b(?:[a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4}\b"
     url_pattern = r'\bhttps?://[^\s<>"\'#]+\b'
     results = set()
 
@@ -82,7 +92,8 @@ def filter_content(content, scope_filters=None, filter_ip=False, filter_ipv4=Fal
 
     # Apply scope filtering if provided
     if scope_filters:
-        results = {item for item in results if any(scope in item for scope in scope_filters)}
+        results = {
+            item for item in results if any(scope in item for scope in scope_filters)
+        }
 
     return sorted(results)
-
diff --git a/crivo/main.py b/crivo/main.py
@@ -18,6 +18,7 @@
 from crivo.filter_engine import filter_content
 from crivo.version import __version__
 
+
 def parse_arguments():
     """
     Sets up the command-line arguments for Crivo.
@@ -28,22 +29,50 @@ def parse_arguments():
     parser = argparse.ArgumentParser(
         description="Crivo - A tool for extracting and filtering URLs, IPs, domains, and subdomains from text or web pages, with built-in web scraping capabilities."
     )
-
-    parser.add_argument("-f", "--file", help="Input file with text to be filtered", type=str)
-    parser.add_argument("-w", "--webpage", help="URL of a webpage to have its content filtered", type=str)
-    parser.add_argument("-W", "--webpage-list", help="File containing a list of webpage URLs to be filtered", type=str)
+
+    parser.add_argument(
+        "-f", "--file", help="Input file with text to be filtered", type=str
+    )
+    parser.add_argument(
+        "-w",
+        "--webpage",
+        help="URL of a webpage to have its content filtered",
+        type=str,
+    )
+    parser.add_argument(
+        "-W",
+        "--webpage-list",
+        help="File containing a list of webpage URLs to be filtered",
+        type=str,
+    )
     parser.add_argument("-o", "--output", help="Write the output to a file", type=str)
-    parser.add_argument("-s", "--scope", help="Comma-separated sequence of scope filters (ips, urls, domains, subdomains)", type=str)
+    parser.add_argument(
+        "-s",
+        "--scope",
+        help="Comma-separated sequence of scope filters (ips, urls, domains, subdomains)",
+        type=str,
+    )
     parser.add_argument("--ip", help="Filter only IPs", action="store_true")
     parser.add_argument("--ipv4", help="Filter only IPv4", action="store_true")
     parser.add_argument("--ipv6", help="Filter only IPv6", action="store_true")
-    parser.add_argument("--domain", help="Filter only domains and subdomains", action="store_true")
+    parser.add_argument(
+        "--domain", help="Filter only domains and subdomains", action="store_true"
+    )
     parser.add_argument("--url", help="Filter only URLs", action="store_true")
-    parser.add_argument("-v", "--verbose", help="Enable verbose output", action="store_true")
-    parser.add_argument("-V", "--version", help="Show the programme version", action="version", version=f"Crivo {__version__}")
+    parser.add_argument(
+        "-v", "--verbose", help="Enable verbose output", action="store_true"
+    )
+    parser.add_argument(
+        "-V",
+        "--version",
+        help="Show the programme version",
+        action="version",
+        version=f"Crivo {__version__}",
+    )
 
     return parser.parse_args()
 
+
 def main():
     """
     Main logic for Crivo.
@@ -53,15 +82,27 @@ def main():
     if args.file:
         content = process_file(args.file)
         filtered_output = filter_content(
-            content, args.scope.split(",") if args.scope else [], args.ip, args.ipv4, args.ipv6, args.domain, args.url
+            content,
+            args.scope.split(",") if args.scope else [],
+            args.ip,
+            args.ipv4,
+            args.ipv6,
+            args.domain,
+            args.url,
         )
         print("\n".join(filtered_output))
 
     elif args.webpage:
         print(args.webpage)
         content = scrape_webpage(args.webpage)
         filtered_output = filter_content(
-            content, args.scope.split(",") if args.scope else [], args.ip, args.ipv4, args.ipv6, args.domain, args.url
+            content,
+            args.scope.split(",") if args.scope else [],
+            args.ip,
+            args.ipv4,
+            args.ipv6,
+            args.domain,
+            args.url,
         )
         print("\n".join(filtered_output))
 
@@ -76,18 +117,32 @@ def main():
                 try:
                     content = scrape_webpage(url)
                     filtered_output = filter_content(
-                        content, args.scope.split(",") if args.scope else [], args.ip, args.ipv4, args.ipv6, args.domain, args.url
+                        content,
+                        args.scope.split(",") if args.scope else [],
+                        args.ip,
+                        args.ipv4,
+                        args.ipv6,
+                        args.domain,
+                        args.url,
                     )
-                    print("\n".join(filtered_output))  # Print results directly after the URL
+                    print(
+                        "\n".join(filtered_output)
+                    )  # Print results directly after the URL
                 except Exception as e:
                     print(f"Error fetching content from {url}: {e}", file=sys.stderr)
 
         except FileNotFoundError:
-            print(f"Error: The file '{args.webpage_list}' was not found.", file=sys.stderr)
+            print(
+                f"Error: The file '{args.webpage_list}' was not found.", file=sys.stderr
+            )
             sys.exit(1)
     else:
-        print("Error: An input file (-f), a webpage (-w), or a webpage list (-W) is required.", file=sys.stderr)
+        print(
+            "Error: An input file (-f), a webpage (-w), or a webpage list (-W) is required.",
+            file=sys.stderr,
+        )
         sys.exit(1)
-
+
+
 if __name__ == "__main__":
     main()
diff --git a/crivo/utils.py b/crivo/utils.py
@@ -11,6 +11,7 @@
 
 """
 
+
 def normalise_text(text):
     """
     Normalises text by stripping whitespace and converting to lowercase.
@@ -23,6 +24,7 @@ def normalise_text(text):
     """
     return text.strip().lower()
 
+
 def validate_scope(scope):
     """
     Validates the provided scope filters and ensures they are in the correct format.
@@ -37,6 +39,7 @@ def validate_scope(scope):
         return []
     return [s.strip().lower() for s in scope.split(",") if s.strip()]
 
+
 def log_verbose(message, verbose):
     """
     Prints a message if verbose mode is enabled.
@@ -47,4 +50,3 @@ def log_verbose(message, verbose):
     """
     if verbose:
         print(message)
-
diff --git a/crivo/web_scraper.py b/crivo/web_scraper.py
@@ -15,6 +15,7 @@
 from urllib.parse import urljoin
 import re
 
+
 def scrape_webpage(url, resolve_links=False):
     """
     Fetches the content of a webpage and optionally resolves relative links.
@@ -47,4 +48,3 @@ def scrape_webpage(url, resolve_links=False):
         return content
     except requests.exceptions.RequestException as e:
         raise RuntimeError(f"Error fetching the webpage '{url}': {e}")
-
diff --git a/crivo_cli.py b/crivo_cli.py
@@ -13,12 +13,13 @@
 
 from crivo.main import main
 
+
 def run_cli():
     """
     Runs the Crivo command-line interface.
     """
     main()
 
+
 if __name__ == "__main__":
     run_cli()
-
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -3,6 +3,7 @@
 # Testing framework
 pytest>=7.0
 pytest-mock>=3.0
+pytest-cov>=3.0
 
 # Code style and linting
 flake8>=5.0