Skip to content

Commit

Permalink
code commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ihandmine committed Mar 7, 2022
1 parent ddf28a6 commit 14adf3c
Show file tree
Hide file tree
Showing 13 changed files with 1,065 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# IDE
.vscode/
.idea/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
132 changes: 131 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,131 @@
# anti-header
# anti-header



> info: fake chrome, firefox, opera browser header anti header
## Features

- more header params
- more request method

### Installation

```shell
pip install anti-header
```

### Usage

```python
import anti_header
from anti_header import Header
from pprint import pprint

hd = Header(platform='windows', min_version=90, max_version=100).base
hd = Header(platform='windows', min_version=90,max_version=100).random
print(anti_header.VERSION)

# must_header param useage
hd = Header(must_header={'aa': 'bb'}).random
hd = Header(must_header={'aa': 'bb'}).base

# rand_header param useage
hd = Header(rand_header={'cc': 'dd'}).random
hd = Header(rand_header={'cc': 'dd'}).base

# default_header param useage
for i in range(10):
hd = Header(default_header={'ee': 'ff'}).base
pprint(hd.to_unicode_dict())

"""
base example
{'cjito': 'azhbmf',
'ee': 'ff',
'referer': 'https://www.google.com/',
'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.7.3455.76 Safari/537.36'}
random example
{'accept-encoding': 'gzip, deflate',
'accept-type': 'utf-8',
'ee': 'ff',
'origin': 'https://www.google.com/',
'referer': 'https://www.google.com/',
'sec-ch-ua-mobile': '?0',
'sec-fetch-mode': 'navigate',
'te': 'Trailers',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (SM-G3609 Build/KTU84P; WIFI) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.5.6492.87 Safari/537.36',
'x-forwarded-for': '1',
'xorsv': 'pvmcue'}
"""
```


If You want to requests method useage just:

```python
# test.py
import requests
from anti_header import Header

_url = 'https://www.google.com/'
hd = Header(url=_url, platform='windows')
requests.get(_url, header=hd.random.to_unicode_dict())

```

If You want to scrapy downloadmiddleware method useage just:

```python

# random_header.py
from anti_header import Header


class RandomHeaderMiddleware(object):
def __init__(self):
pass

def process_request(request, spider):
request.headers = Headers(url=request.url).random

def process_response(request, response, spider):
return response
```

If You want to specify param just:

```python
from anti_header import Header
hd = Header(logger=True)

# the default install loguru
try:
from loguru import logger
except:
install("loguru")
from loguru import logger

# close default singleton
hd = Header(dry=True)

```



Make sure that You using latest version

```
pip install -U anti-header
```

Check version via python console:

```
import anti_header
print(anti_header.VERSION)
```

143 changes: 143 additions & 0 deletions anti_header/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# encoding: utf-8
import json
import string
import time
import hashlib

from random import sample, randint, choice
from urllib.parse import urlsplit

from anti_useragent import UserAgent
from anti_header.exceptions import UserAgentError, NotFoundParamError
from anti_header.headers import Headers
from anti_header.utils import logging


class UsageHeader(object):
_INSTANCE = None

def __new__(cls, *args, **kwargs):
"""
singer instance if set dry param is True then destory current instance and create a new instance
:param dry: True|False
:return cls
"""
if not cls._INSTANCE:
cls._INSTANCE = super().__new__(cls)
if kwargs.get('dry', False) is True:
return super().__new__(cls)
return cls._INSTANCE

def __init__(self, url: str = None,
rand_header: dict = None,
must_header: dict = None,
default_header: dict = None,
logger=False, **kwargs):
assert isinstance(logger, bool), "logger param must bool type"
self.logger = logger or logging.get_logger('anti_header')
self._headers: Headers = Headers({})

if default_header:
self._headers.update(default_header)
self.url: str = url or "https://www.google.com/"
self.headers_must = self.set_headers_must(must_header or {})
self.headers_param = self.set_header_rand(rand_header or {})
if kwargs.get('dry'):
del kwargs['dry']
self._ua = UserAgent(**kwargs)

def __getitem__(self, rule: str):
return self.__getattr__(rule)

def __getattr__(self, rule: str):
if rule == 'random':
self.headers_must.append(self.user_agent())
return getattr(self, "process")()
elif rule == 'base':
for param in self.headers_must:
self._headers.update(param)
self._headers.update(self.user_agent())
return self._headers

def set_headers_must(self, header: dict) -> list:
_header_must = [
{''.join(sample(string.ascii_lowercase, 5)): ''.join(sample(string.ascii_lowercase, 6))},
{'referer': self.url},
]
_header_must.extend([{key: value} for key, value in header.items()])
return _header_must

def set_header_rand(self, header: dict) -> list:
parse_url = urlsplit(self.url)
accept_string_all = '*/*,text/html,application/xhtml+xml,application/xml;q=0.9,' \
'image/avif,image/webp,image/apng,*/*;q=0.8,application/signed' \
'-exchange;v=b3;q=0.9'.split(',')
_header_params = [
{'accept': ''.join(sample(accept_string_all, randint(0, len(accept_string_all))))},
{'accept-type': 'utf-8'},
{'accept-encoding': 'gzip, deflate'},
{'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8'},
{'authority': parse_url.netloc},
{'cache-control': choice(['max-age=0', 'no-cache'])},
{'cache-type': 'any'},
{'content-from': 'google'},
{'connection': 'keep-alive'},
{"content-type": "application/x-www-form-urlencoded"},

{'cookie': self._md_hex},

{'DNT': '1'},
{'Host': parse_url.netloc},
{'Origin': self.url},
{'Origin-content': 'false'},
{'referer-rec': 'ture'},
{'pragma': 'no-cache'},
{'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="97", "Chromium";v="97"'},
{'sec-ch-ua-mobile': '?0'},
{'sec-ch-ua-platform': '"Windows"'},
{'sec-fetch-site': 'same-origin'},
{'sec-fetch-mode': 'navigate'},
{'sec-fetch-user': '?1'},
{'sec-fetch-dest': 'document'},
{'TE': 'Trailers'},
{'upgrade-type': 'none'},
{'upgrade-insecure-requests': '1'},
{'X-Forwarded-For': '1'}
]
_header_params.extend([{key: value} for key, value in header.items()])
return _header_params

@property
def _md_hex(self) -> str:
_m = hashlib.md5()
_m.update(''.join(sample(string.ascii_lowercase, 5)).encode())
_m_hex = _m.hexdigest()
return f'Hd_ert_{_m_hex}={time.time()}; Hd_ervt_{_m_hex}={time.time()}'

def user_agent(self) -> dict:
try:
_ua = self._ua.random
except UserAgentError:
_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' \
' (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
return {"User-Agent": _ua}

def process(self) -> dict:
new_header_params = sample(self.headers_param, randint(0, len(self.headers_param)))
for param in new_header_params + self.headers_must:
self._headers.update(param)
self.logger.debug('[Headers]: ' + str(self._headers))
return self._headers


Header = UsageHeader
__version__ = '0.0.1'

VERSION = __version__


__all__ = [
Header,
UsageHeader,
VERSION
]
16 changes: 16 additions & 0 deletions anti_header/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import absolute_import, unicode_literals


class AntiUserAgentError(Exception):
pass


class HeaderDeprecationWarning(Warning):
pass


class NotFoundParamError(Exception):
pass


UserAgentError = AntiUserAgentError
89 changes: 89 additions & 0 deletions anti_header/headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from .utils import headers_dict_to_raw
from .utils import CaselessDict
from .utils import to_unicode


class Headers(CaselessDict):
"""Case insensitive http headers dictionary"""

def __init__(self, seq=None, encoding='utf-8'):
self.encoding = encoding
super().__init__(seq)

def normkey(self, key):
"""Normalize key to bytes"""
return self._tobytes(key.title())

def normvalue(self, value):
"""Normalize values to bytes"""
if value is None:
value = []
elif isinstance(value, (str, bytes)):
value = [value]
elif not hasattr(value, '__iter__'):
value = [value]

return [self._tobytes(x) for x in value]

def _tobytes(self, x):
if isinstance(x, bytes):
return x
elif isinstance(x, str):
return x.encode(self.encoding)
elif isinstance(x, int):
return str(x).encode(self.encoding)
else:
raise TypeError(f'Unsupported value type: {type(x)}')

def __getitem__(self, key):
try:
return super().__getitem__(key)[-1]
except IndexError:
return None

def get(self, key, def_val=None):
try:
return super().get(key, def_val)[-1]
except IndexError:
return None

def getlist(self, key, def_val=None):
try:
return super().__getitem__(key)
except KeyError:
if def_val is not None:
return self.normvalue(def_val)
return []

def setlist(self, key, list_):
self[key] = list_

def setlistdefault(self, key, default_list=()):
return self.setdefault(key, default_list)

def appendlist(self, key, value):
lst = self.getlist(key)
lst.extend(self.normvalue(value))
self[key] = lst

def items(self):
return ((k, self.getlist(k)) for k in self.keys())

def values(self):
return [self[k] for k in self.keys()]

def to_string(self):
return headers_dict_to_raw(self)

def to_unicode_dict(self):
""" Return headers as a CaselessDict with unicode keys
and unicode values. Multiple values are joined with ','.
"""
return CaselessDict(
(to_unicode(key, encoding=self.encoding),
to_unicode(b','.join(value), encoding=self.encoding))
for key, value in self.items())

def __copy__(self):
return self.__class__(self)
copy = __copy__
6 changes: 6 additions & 0 deletions anti_header/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from anti_header.utils.datatypes import CaselessDict
from anti_header.utils.log import LogFormatter
from anti_header.utils.python import *
from anti_header.utils.http import headers_dict_to_raw

logging = LogFormatter()
Loading

0 comments on commit 14adf3c

Please sign in to comment.