Skip to content
This repository was archived by the owner on Apr 3, 2024. It is now read-only.

Fingerprint update py module #74

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
31 changes: 14 additions & 17 deletions Wappalyzer/Wappalyzer.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@

from typing import Callable, Dict, Iterable, List, Any, Mapping, Set
from typing import Callable, Dict, Iterable, List, Any, Mapping, Set, Union
import json
import logging
import pkg_resources
import re
import os
import pathlib
import requests

from datetime import datetime, timedelta
from typing import Optional

from Wappalyzer.fingerprint import Fingerprint, Pattern, Technology, Category
from Wappalyzer.fingerprint import Fingerprint, Pattern, Technology, Category, get_latest_tech_data
from Wappalyzer.webpage import WebPage, IWebPage

logger = logging.getLogger(name="python-Wappalyzer")
Expand Down Expand Up @@ -88,8 +86,6 @@ def latest(cls, technologies_file:str=None, update:bool=False) -> 'Wappalyzer':
from `AliasIO/wappalyzer <https://github.com/AliasIO/wappalyzer>`_ repository.

"""
default=pkg_resources.resource_string(__name__, "data/technologies.json")
defaultobj = json.loads(default)

if technologies_file:
with open(technologies_file, 'r', encoding='utf-8') as fd:
Expand All @@ -107,31 +103,32 @@ def latest(cls, technologies_file:str=None, update:bool=False) -> 'Wappalyzer':
# Get the lastest file
if should_update:
try:
lastest_technologies_file=requests.get('https://raw.githubusercontent.com/AliasIO/wappalyzer/master/src/technologies.json')
obj = lastest_technologies_file.json()
obj = get_latest_tech_data()
_technologies_file = pathlib.Path(cls._find_files(
['HOME', 'APPDATA',],
['.python-Wappalyzer/technologies.json'],
create = True
).pop())

if obj != defaultobj:
with _technologies_file.open('w', encoding='utf-8') as tfile:
tfile.write(lastest_technologies_file.text)
logger.info("python-Wappalyzer technologies.json file updated")
with _technologies_file.open('w', encoding='utf-8') as tfile:
tfile.write(json.dumps(obj))
logger.info("python-Wappalyzer technologies.json file updated")

except Exception as err: # Or loads default
logger.error("Could not download latest Wappalyzer technologies.json file because of error : '{}'. Using default. ".format(err))
obj = defaultobj
obj = None
else:
logger.debug("python-Wappalyzer technologies.json file not updated because already updated in the last 24h")
with _technologies_file.open('r', encoding='utf-8') as tfile:
obj = json.load(tfile)

logger.info("Using technologies.json file at {}".format(_technologies_file.as_posix()))
else:
obj = defaultobj
obj = None

if obj is None:
from Wappalyzer.technologies import TECHNOLOGIES_DATA
obj = TECHNOLOGIES_DATA

return cls(categories=obj['categories'], technologies=obj['technologies'])

Expand Down Expand Up @@ -194,11 +191,11 @@ def _has_technology(self, tech_fingerprint: Fingerprint, webpage: IWebPage) -> b
if pattern.regex.search(content):
self._set_detected_app(webpage.url, tech_fingerprint, 'headers', pattern, value=content, key=name)
has_tech = True
# analyze scripts patterns
for pattern in tech_fingerprint.scripts:
# analyze scripts src patterns
for pattern in tech_fingerprint.scriptSrc:
for script in webpage.scripts:
if pattern.regex.search(script):
self._set_detected_app(webpage.url, tech_fingerprint, 'scripts', pattern, value=script)
self._set_detected_app(webpage.url, tech_fingerprint, 'scriptSrc', pattern, value=script)
has_tech = True
# analyze meta patterns
for name, patterns in list(tech_fingerprint.meta.items()):
Expand Down
33 changes: 33 additions & 0 deletions Wappalyzer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,39 @@

:see: `Wappalyzer` and `WebPage`.
"""
from typing import TextIO
import logging
import sys

# Setup stdout logger
def _setup_logger(
name: str,
verbose: bool = False,
quiet: bool = False,
outstream: TextIO = sys.stdout
) -> logging.Logger:

format_string = "%(levelname)s - %(message)s"

if verbose:
verb_level = logging.DEBUG
elif quiet:
verb_level = logging.ERROR
else:
verb_level = logging.INFO

log = logging.getLogger(name)

log.setLevel(verb_level)
std = logging.StreamHandler(outstream)
std.setLevel(verb_level)
std.setFormatter(logging.Formatter(format_string))
log.handlers = []
log.addHandler(std)

return log

_setup_logger('python-Wappalyzer')

from .Wappalyzer import Wappalyzer, analyze
from .webpage import WebPage
Expand Down
4 changes: 4 additions & 0 deletions Wappalyzer/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import argparse
import json
import sys

from . import _setup_logger
from .Wappalyzer import analyze

def get_parser() -> argparse.ArgumentParser:
Expand All @@ -20,4 +23,5 @@ def main(args) -> None:
print(json.dumps(result))

if __name__ == '__main__':
_setup_logger('python-Wappalyzer', verbose=False, quiet=True, outstream=sys.stderr)
main(get_parser().parse_args())
Loading