Skip to content

Commit

Permalink
WIP: added regex scanning for http(s) urls and emails using ripgrep a…
Browse files Browse the repository at this point in the history
…nd apktool
  • Loading branch information
eUgEntOptIc44 committed Oct 31, 2021
1 parent 3316427 commit 14261c9
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 2 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/exodusscan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ jobs:
run: python -m pip install --upgrade pip

- name: install dexdump
run: sudo apt-get install dexdump
run: sudo apt-get install -y dexdump

- name: install ripgrep
run: sudo apt-get install -y ripgrep

- name: install apktool
run: sudo apt-get install -y apktool

#- name: install brotli
# run: apt-get install brotli || true

Expand All @@ -63,6 +69,9 @@ jobs:

- name: run main.py
run: python TinyWeatherForecastGermanyScan/main.py

- name: run rg-pattern-search.py
run: python TinyWeatherForecastGermanyScan/rg-pattern-search.py || true

- name: run logtohtml.py
run: python TinyWeatherForecastGermanyScan/logtohtml.py || true
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
### TWFG ###
TinyWeatherForecastGermanyScan/
TinyWeatherForecastGermanyApk/

### Git ###
# Created by git for backups. To disable backups in Git:
Expand Down
2 changes: 1 addition & 1 deletion .gitpod.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
tasks:
- init: sudo apt install -y dexdump && python -m pip install --upgrade pip && pip install -r requirements.txt
- init: sudo apt install -y dexdump && sudo apt install -y ripgrep && python -m pip install --upgrade pip && pip install -r requirements.txt
vscode:
extensions:
- ms-python.python
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ wheel==0.37.0
Markdown==3.3.4
htmlmin==0.1.12
pygments==2.10.0
ripgrepy==1.1.0
110 changes: 110 additions & 0 deletions rg-pattern-search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""
ATTENTION: status -> WIP!
license: GPLv3
author: Jean-Luc Tibaux
DISCLAIMER:
use only at your own risk. your mileage might vary.
no warranty or guarantee of any kind provided.
"""

from pathlib import Path
from pprint import pprint
import json
import regex
import logging
import subprocess

from ripgrepy import Ripgrepy
# The Ripgrepy class takes two arguments. The regex to search for and the folder path to search in
# docs: https://ripgrepy.readthedocs.io/en/latest/

workingDir = Path("TinyWeatherForecastGermanyScan")
workingDir.mkdir(parents=True, exist_ok=True) # create directory if not exists

try:
logging.basicConfig(format=u'%(asctime)-s %(levelname)s [%(name)s]: %(message)s',
level=logging.DEBUG,
handlers=[
logging.FileHandler(str(Path(workingDir / "debug.log").absolute()), encoding="utf-8"),
logging.StreamHandler()
])
except Exception as e:
logging.error("while logger init! -> error: "+str(e))

apkFiles = list(workingDir.glob('*.apk'))
pprint(apkFiles)

if len(apkFiles) == 0:
logging.error("failed to find apk file -> aborting execution")
sys.exit(1)

logging.debug("found apk file(s):")
apkFilePath = apkFiles[0]

logging.debug("reverse engineering '"+str(apkFilePath)+"' using apktool ... ")
# reverse engineering apk -> output: smali code
subprocess.run(["apktool","d",str(apkFilePath),"-o","TinyWeatherForecastGermanyApk","-f"])

logging.debug("saved extracted contents of '"+str(apkFilePath)+"' to 'TinyWeatherForecastGermanyApk/' ")

rg = Ripgrepy('(?im)http(s)*://', 'TinyWeatherForecastGermanyApk/smali')
http_matches_list = rg.H().n().json().run().as_dict

logging.debug("found "+str(len(http_matches_list))+" matches for '(?im)http(s)*://' in smali code ")

#with open("temp.json","w+",encoding="utf-8") as fh:
# fh.write(str(json.dumps(http_matches_list, indent=4)))

http_cleaned_matches = {}

for http_match_dict in http_matches_list:
try:
url_temp = str(regex.findall(r'(?im)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', str(http_match_dict["data"]["lines"]["text"]).strip())[0][0]).strip()
if url_temp not in http_cleaned_matches:
http_cleaned_matches[url_temp] = 1
else:
http_cleaned_matches[url_temp] += 1
except Exception as e:
logging.error("failed to parse http url match dict -> error: "+str(e))

pprint(http_cleaned_matches)

#with open("temp2.json","w+",encoding="utf-8") as fh:
# fh.write(str(json.dumps(http_cleaned_matches, indent=4)))

# --------------- email ----------------------

rg = Ripgrepy('(?im)^[a-z0-9]+[\._]?[a-z0-9]+[@]\w+[.]\w{2,3}$', 'TinyWeatherForecastGermanyApk/smali')
email_matches_list = rg.H().n().json().run().as_dict

logging.debug("found "+str(len(email_matches_list))+" matches for email pattern in smali code ")

#with open("temp.json","w+",encoding="utf-8") as fh:
# fh.write(str(json.dumps(email_matches_list, indent=4)))

email_cleaned_matches = {}

for email_match_dict in email_matches_list:
try:
email_temp = str(email_match_dict["data"]["lines"]["text"]).strip()
if email_temp not in email_cleaned_matches:
email_cleaned_matches[email_temp] = 1
else:
email_cleaned_matches[email_temp] += 1
except Exception as e:
logging.error("failed to parse email match dict -> error: "+str(e))

pprint(email_cleaned_matches)

#with open("temp2.json","w+",encoding="utf-8") as fh:
# fh.write(str(json.dumps(email_cleaned_matches, indent=4)))

with open("TinyWeatherForecastGermanyScan/rg-pattern-matches.json","w+",encoding="utf-8") as fh:
fh.write(str(json.dumps({"http":http_cleaned_matches,"emails":email_cleaned_matches}, indent=4)))

print("done")

0 comments on commit 14261c9

Please sign in to comment.