Skip to content

Commit

Permalink
pptx & xlsx rendered for --doc. test-samples and re-fuzzed
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Nov 4, 2024
1 parent 08fab8f commit 7c98bad
Show file tree
Hide file tree
Showing 87 changed files with 794 additions and 887 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
- name: Analysing the code with pylint for NEW missed docstrings of classes or functions
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
run: |
pylint --disable=E,R,W,C0114,C0103,C0412,C0413,C0415,C0200,C0201,C0325 --verbose credsweeper
pylint --disable=E,R,W,C0114,C0103,C0303,C0412,C0413,C0415,C0200,C0201,C0325 --verbose credsweeper
# # # Documentation check

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
'__version__'
]

__version__ = "1.9.1"
__version__ = "1.9.2"
6 changes: 6 additions & 0 deletions credsweeper/deep_scanner/deep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
from .lang_scanner import LangScanner
from .pdf_scanner import PdfScanner
from .pkcs12_scanner import Pkcs12Scanner
from .pptx_scanner import PptxScanner
from .tar_scanner import TarScanner
from .xlsx_scanner import XlsxScanner
from .xml_scanner import XmlScanner
from .zip_scanner import ZipScanner
from ..common.constants import DEFAULT_ENCODING
Expand All @@ -47,8 +49,10 @@ class DeepScanner(
LangScanner, #
PdfScanner, #
Pkcs12Scanner, #
PptxScanner, #
TarScanner, #
XmlScanner, #
XlsxScanner, #
ZipScanner
): # yapf: disable
"""Advanced scanner with recursive exploring of data"""
Expand Down Expand Up @@ -79,7 +83,9 @@ def get_deep_scanners(data: bytes, file_type: str) -> List[Any]:
deep_scanners.append(ZipScanner)
# probably, there might be a docx, xlxs and so on.
# It might be scanned with text representation in third-party libraries.
deep_scanners.append(XlsxScanner)
deep_scanners.append(DocxScanner)
deep_scanners.append(PptxScanner)
elif Util.is_bzip2(data):
deep_scanners.append(Bzip2Scanner)
elif Util.is_tar(data):
Expand Down
42 changes: 42 additions & 0 deletions credsweeper/deep_scanner/pptx_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import io
import logging
from abc import ABC
from typing import List

from pptx import Presentation

from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider

logger = logging.getLogger(__name__)


class PptxScanner(AbstractScanner, ABC):
"""Implements pptx scanning"""

def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> List[Candidate]:
"""Tries to scan pptx text elements for all slides"""
candidates = []
try:
pptx_lines = []
presentation = Presentation(io.BytesIO(data_provider.data))
for slide in presentation.slides:
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
pptx_lines.append(paragraph.text)
string_data_provider = StringContentProvider(lines=pptx_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|pptx")
pptx_candidates = self.scanner.scan(string_data_provider)
candidates.extend(pptx_candidates)
except Exception as pptx_exc:
logger.error(f"{data_provider.file_path}:{pptx_exc}")
return candidates
41 changes: 41 additions & 0 deletions credsweeper/deep_scanner/xlsx_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import io
import logging
from abc import ABC
from typing import List

from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider

import pandas as pd

logger = logging.getLogger(__name__)


class XlsxScanner(AbstractScanner, ABC):
"""Implements xlsx scanning"""

def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> List[Candidate]:
"""Tries to scan xlsx text elements for all slides"""
candidates = []
try:
book = pd.read_excel(io.BytesIO(data_provider.data), sheet_name=None, header=None)
sheet_lines = []
for sheet_name, sheet_data in book.items():
text = sheet_data.fillna('').astype(str)
for i in text.values:
sheet_lines.append('\t'.join(i))
string_data_provider = StringContentProvider(lines=sheet_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|xlsx:{sheet_name}")
sheet_candidates = self.scanner.scan(string_data_provider)
candidates.extend(sheet_candidates)
except Exception as xlsx_exc:
logger.error(f"{data_provider.file_path}:{xlsx_exc}")
return candidates
4 changes: 2 additions & 2 deletions credsweeper/secret/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
".bz2",
".gz",
".tar",
".xlsx",
".zip"
],
"documents": [
".xlsx",
".docx",
".pptx",
".pdf"
],
"extension": [
Expand Down Expand Up @@ -43,7 +44,6 @@
".ogg",
".pak",
".png",
".pptx",
".psd",
".pyc",
".pyd",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
#

#
# skip MAX�SEARCH_MA4206074328-qdv6fi3eh31q6h7c35vsi4p89p1258g1.apps.googleusercontent.com","CEKPET":"GOCSPX-Fogleucontent.com","CEKPET":"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",
# skip MAX�SEARCH_MA4206074328-qdv6fi3eh31q6h7c35vsi4p89p1258g1.apps.googleusercontent.com","CEKPET":"OGCSPX-Fcontent,com","CEKPET":"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",
Binary file not shown.
Binary file added fuzz/corpus/133c106277c5052dfa163005cb1027ec2571063c
Binary file not shown.
1 change: 0 additions & 1 deletion fuzz/corpus/20bb3787c7f914def39aff2ed2b9f36ca5eeeb91

This file was deleted.

Binary file removed fuzz/corpus/24a5d4021dd6275163567ba983d68ab71489efc7
Binary file not shown.
2 changes: 0 additions & 2 deletions fuzz/corpus/2d41d950f43caddc85821d0d5a4f7ee5358fb1ff

This file was deleted.

2 changes: 2 additions & 0 deletions fuzz/corpus/2f158b179ca65b6a077d3c84d4dcfd5f8683cc22
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
glsa_ThisI5NtTheTok3nYou8Leor0k1ngF0r_0a2a3df7
glpl_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7
14 changes: 0 additions & 14 deletions fuzz/corpus/37a22693c8945b248f4387cc98ebcc669ccb4f77

This file was deleted.

Binary file added fuzz/corpus/396d225ffb868601030c32ec6730b91795c69505
Binary file not shown.
47 changes: 0 additions & 47 deletions fuzz/corpus/3997395e39d0628e5a630428c259eb79b07ed175

This file was deleted.

92 changes: 0 additions & 92 deletions fuzz/corpus/3dd6e45c6a0cccb29c3416762b2df85012a7b67b

This file was deleted.

14 changes: 14 additions & 0 deletions fuzz/corpus/4212be42c2885853256a8b5a70a4004783be4973
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"Prl23Db#@"

Passwd:Prl23Db#@ Prl23Db#@
PW:Prl23Db#@,password:Prl23Db#@
password:Prl23Db#@,비번:Prl23Db#@
passwd=Prl23Db#@
--pass Prl23Db#@
PIN:Prl23Db#@
paasword:Prl23Db#@
password:Prl23Db#@, paasword:Prlord: keep empty
암호 : @@@hl@@@비번@@@endhl@@@

FP# 10.0.0.1 8888 TLSv#;'
eo(s)
6 changes: 0 additions & 6 deletions fuzz/corpus/45db8bda86e1b35af588058e1e21192c96dd4683

This file was deleted.

1 change: 0 additions & 1 deletion fuzz/corpus/470054018f2aa757a1e10b6d64a54a97e57eb815

This file was deleted.

1 change: 0 additions & 1 deletion fuzz/corpus/488dc029b1fa81152d46ca7a90d4a151e22e008e

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ id:master pass:dipPr122Gg!
아이디:master 패스워드:dipPr123Gg!
user:master pw:dipPr124Gg!
Username:master/Password:dipPr125Gg!
userId:master,password:dipPr126Gg!
userId:master,pXssword:dipPr126Gg!
--user master --password dipPr127Gg!
dipPr128Gg! ID:master dipPr128Gg! Password:dipPr128Gg!
ANYid:master,pw:dipPr129Gg!
Expand Down
7 changes: 0 additions & 7 deletions fuzz/corpus/4ba45f243da9091d865bc7a0f449bfde576550b6

This file was deleted.

2 changes: 2 additions & 0 deletions fuzz/corpus/556041d17c7f7991e47f2041e4c12b80dbf4ef7a
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The items are AKIAGIREOGIAWSKEY123,AKIAGIREOGIAWSKEY45X
the coma is necessary there REAL12 --access-key <xcFsdeGddSAdI/KFRS2CB/3fGCsdCYEXAMPLEKEY>
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ passes = "cackle!"
</td>
2EC0JQLFdN3tqanQ_Bc1HA2yL9kK22WD*e2QLxA0RKuqrtable>

ithub_pat_31ADLV2EC0JQLFdN3tqanQ_Btxr 0000j
ithub_pJQLFdN3tqanQ_Btxr 0000j
r
Loading

0 comments on commit 7c98bad

Please sign in to comment.