-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
1,065 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
# IDE | ||
.vscode/ | ||
.idea/ | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,131 @@ | ||
# anti-header | ||
# anti-header | ||
|
||
|
||
|
||
> info: fake chrome, firefox, opera browser header anti header | ||
## Features | ||
|
||
- more header params | ||
- more request method | ||
|
||
### Installation | ||
|
||
```shell | ||
pip install anti-header | ||
``` | ||
|
||
### Usage | ||
|
||
```python | ||
import anti_header | ||
from anti_header import Header | ||
from pprint import pprint | ||
|
||
hd = Header(platform='windows', min_version=90, max_version=100).base | ||
hd = Header(platform='windows', min_version=90,max_version=100).random | ||
print(anti_header.VERSION) | ||
|
||
# must_header param useage | ||
hd = Header(must_header={'aa': 'bb'}).random | ||
hd = Header(must_header={'aa': 'bb'}).base | ||
|
||
# rand_header param useage | ||
hd = Header(rand_header={'cc': 'dd'}).random | ||
hd = Header(rand_header={'cc': 'dd'}).base | ||
|
||
# default_header param useage | ||
for i in range(10): | ||
hd = Header(default_header={'ee': 'ff'}).base | ||
pprint(hd.to_unicode_dict()) | ||
|
||
""" | ||
base example | ||
{'cjito': 'azhbmf', | ||
'ee': 'ff', | ||
'referer': 'https://www.google.com/', | ||
'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.7.3455.76 Safari/537.36'} | ||
random example | ||
{'accept-encoding': 'gzip, deflate', | ||
'accept-type': 'utf-8', | ||
'ee': 'ff', | ||
'origin': 'https://www.google.com/', | ||
'referer': 'https://www.google.com/', | ||
'sec-ch-ua-mobile': '?0', | ||
'sec-fetch-mode': 'navigate', | ||
'te': 'Trailers', | ||
'upgrade-insecure-requests': '1', | ||
'user-agent': 'Mozilla/5.0 (SM-G3609 Build/KTU84P; WIFI) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.5.6492.87 Safari/537.36', | ||
'x-forwarded-for': '1', | ||
'xorsv': 'pvmcue'} | ||
""" | ||
``` | ||
|
||
|
||
If You want to requests method useage just: | ||
|
||
```python | ||
# test.py | ||
import requests | ||
from anti_header import Header | ||
|
||
_url = 'https://www.google.com/' | ||
hd = Header(url=_url, platform='windows') | ||
requests.get(_url, header=hd.random.to_unicode_dict()) | ||
|
||
``` | ||
|
||
If You want to scrapy downloadmiddleware method useage just: | ||
|
||
```python | ||
|
||
# random_header.py | ||
from anti_header import Header | ||
|
||
|
||
class RandomHeaderMiddleware(object): | ||
def __init__(self): | ||
pass | ||
|
||
def process_request(request, spider): | ||
request.headers = Headers(url=request.url).random | ||
|
||
def process_response(request, response, spider): | ||
return response | ||
``` | ||
|
||
If You want to specify param just: | ||
|
||
```python | ||
from anti_header import Header | ||
hd = Header(logger=True) | ||
|
||
# the default install loguru | ||
try: | ||
from loguru import logger | ||
except: | ||
install("loguru") | ||
from loguru import logger | ||
|
||
# close default singleton | ||
hd = Header(dry=True) | ||
|
||
``` | ||
|
||
|
||
|
||
Make sure that You using latest version | ||
|
||
``` | ||
pip install -U anti-header | ||
``` | ||
|
||
Check version via python console: | ||
|
||
``` | ||
import anti_header | ||
print(anti_header.VERSION) | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# encoding: utf-8 | ||
import json | ||
import string | ||
import time | ||
import hashlib | ||
|
||
from random import sample, randint, choice | ||
from urllib.parse import urlsplit | ||
|
||
from anti_useragent import UserAgent | ||
from anti_header.exceptions import UserAgentError, NotFoundParamError | ||
from anti_header.headers import Headers | ||
from anti_header.utils import logging | ||
|
||
|
||
class UsageHeader(object): | ||
_INSTANCE = None | ||
|
||
def __new__(cls, *args, **kwargs): | ||
""" | ||
singer instance if set dry param is True then destory current instance and create a new instance | ||
:param dry: True|False | ||
:return cls | ||
""" | ||
if not cls._INSTANCE: | ||
cls._INSTANCE = super().__new__(cls) | ||
if kwargs.get('dry', False) is True: | ||
return super().__new__(cls) | ||
return cls._INSTANCE | ||
|
||
def __init__(self, url: str = None, | ||
rand_header: dict = None, | ||
must_header: dict = None, | ||
default_header: dict = None, | ||
logger=False, **kwargs): | ||
assert isinstance(logger, bool), "logger param must bool type" | ||
self.logger = logger or logging.get_logger('anti_header') | ||
self._headers: Headers = Headers({}) | ||
|
||
if default_header: | ||
self._headers.update(default_header) | ||
self.url: str = url or "https://www.google.com/" | ||
self.headers_must = self.set_headers_must(must_header or {}) | ||
self.headers_param = self.set_header_rand(rand_header or {}) | ||
if kwargs.get('dry'): | ||
del kwargs['dry'] | ||
self._ua = UserAgent(**kwargs) | ||
|
||
def __getitem__(self, rule: str): | ||
return self.__getattr__(rule) | ||
|
||
def __getattr__(self, rule: str): | ||
if rule == 'random': | ||
self.headers_must.append(self.user_agent()) | ||
return getattr(self, "process")() | ||
elif rule == 'base': | ||
for param in self.headers_must: | ||
self._headers.update(param) | ||
self._headers.update(self.user_agent()) | ||
return self._headers | ||
|
||
def set_headers_must(self, header: dict) -> list: | ||
_header_must = [ | ||
{''.join(sample(string.ascii_lowercase, 5)): ''.join(sample(string.ascii_lowercase, 6))}, | ||
{'referer': self.url}, | ||
] | ||
_header_must.extend([{key: value} for key, value in header.items()]) | ||
return _header_must | ||
|
||
def set_header_rand(self, header: dict) -> list: | ||
parse_url = urlsplit(self.url) | ||
accept_string_all = '*/*,text/html,application/xhtml+xml,application/xml;q=0.9,' \ | ||
'image/avif,image/webp,image/apng,*/*;q=0.8,application/signed' \ | ||
'-exchange;v=b3;q=0.9'.split(',') | ||
_header_params = [ | ||
{'accept': ''.join(sample(accept_string_all, randint(0, len(accept_string_all))))}, | ||
{'accept-type': 'utf-8'}, | ||
{'accept-encoding': 'gzip, deflate'}, | ||
{'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8'}, | ||
{'authority': parse_url.netloc}, | ||
{'cache-control': choice(['max-age=0', 'no-cache'])}, | ||
{'cache-type': 'any'}, | ||
{'content-from': 'google'}, | ||
{'connection': 'keep-alive'}, | ||
{"content-type": "application/x-www-form-urlencoded"}, | ||
|
||
{'cookie': self._md_hex}, | ||
|
||
{'DNT': '1'}, | ||
{'Host': parse_url.netloc}, | ||
{'Origin': self.url}, | ||
{'Origin-content': 'false'}, | ||
{'referer-rec': 'ture'}, | ||
{'pragma': 'no-cache'}, | ||
{'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="97", "Chromium";v="97"'}, | ||
{'sec-ch-ua-mobile': '?0'}, | ||
{'sec-ch-ua-platform': '"Windows"'}, | ||
{'sec-fetch-site': 'same-origin'}, | ||
{'sec-fetch-mode': 'navigate'}, | ||
{'sec-fetch-user': '?1'}, | ||
{'sec-fetch-dest': 'document'}, | ||
{'TE': 'Trailers'}, | ||
{'upgrade-type': 'none'}, | ||
{'upgrade-insecure-requests': '1'}, | ||
{'X-Forwarded-For': '1'} | ||
] | ||
_header_params.extend([{key: value} for key, value in header.items()]) | ||
return _header_params | ||
|
||
@property | ||
def _md_hex(self) -> str: | ||
_m = hashlib.md5() | ||
_m.update(''.join(sample(string.ascii_lowercase, 5)).encode()) | ||
_m_hex = _m.hexdigest() | ||
return f'Hd_ert_{_m_hex}={time.time()}; Hd_ervt_{_m_hex}={time.time()}' | ||
|
||
def user_agent(self) -> dict: | ||
try: | ||
_ua = self._ua.random | ||
except UserAgentError: | ||
_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' \ | ||
' (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36' | ||
return {"User-Agent": _ua} | ||
|
||
def process(self) -> dict: | ||
new_header_params = sample(self.headers_param, randint(0, len(self.headers_param))) | ||
for param in new_header_params + self.headers_must: | ||
self._headers.update(param) | ||
self.logger.debug('[Headers]: ' + str(self._headers)) | ||
return self._headers | ||
|
||
|
||
Header = UsageHeader | ||
__version__ = '0.0.1' | ||
|
||
VERSION = __version__ | ||
|
||
|
||
__all__ = [ | ||
Header, | ||
UsageHeader, | ||
VERSION | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from __future__ import absolute_import, unicode_literals | ||
|
||
|
||
class AntiUserAgentError(Exception): | ||
pass | ||
|
||
|
||
class HeaderDeprecationWarning(Warning): | ||
pass | ||
|
||
|
||
class NotFoundParamError(Exception): | ||
pass | ||
|
||
|
||
UserAgentError = AntiUserAgentError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from .utils import headers_dict_to_raw | ||
from .utils import CaselessDict | ||
from .utils import to_unicode | ||
|
||
|
||
class Headers(CaselessDict): | ||
"""Case insensitive http headers dictionary""" | ||
|
||
def __init__(self, seq=None, encoding='utf-8'): | ||
self.encoding = encoding | ||
super().__init__(seq) | ||
|
||
def normkey(self, key): | ||
"""Normalize key to bytes""" | ||
return self._tobytes(key.title()) | ||
|
||
def normvalue(self, value): | ||
"""Normalize values to bytes""" | ||
if value is None: | ||
value = [] | ||
elif isinstance(value, (str, bytes)): | ||
value = [value] | ||
elif not hasattr(value, '__iter__'): | ||
value = [value] | ||
|
||
return [self._tobytes(x) for x in value] | ||
|
||
def _tobytes(self, x): | ||
if isinstance(x, bytes): | ||
return x | ||
elif isinstance(x, str): | ||
return x.encode(self.encoding) | ||
elif isinstance(x, int): | ||
return str(x).encode(self.encoding) | ||
else: | ||
raise TypeError(f'Unsupported value type: {type(x)}') | ||
|
||
def __getitem__(self, key): | ||
try: | ||
return super().__getitem__(key)[-1] | ||
except IndexError: | ||
return None | ||
|
||
def get(self, key, def_val=None): | ||
try: | ||
return super().get(key, def_val)[-1] | ||
except IndexError: | ||
return None | ||
|
||
def getlist(self, key, def_val=None): | ||
try: | ||
return super().__getitem__(key) | ||
except KeyError: | ||
if def_val is not None: | ||
return self.normvalue(def_val) | ||
return [] | ||
|
||
def setlist(self, key, list_): | ||
self[key] = list_ | ||
|
||
def setlistdefault(self, key, default_list=()): | ||
return self.setdefault(key, default_list) | ||
|
||
def appendlist(self, key, value): | ||
lst = self.getlist(key) | ||
lst.extend(self.normvalue(value)) | ||
self[key] = lst | ||
|
||
def items(self): | ||
return ((k, self.getlist(k)) for k in self.keys()) | ||
|
||
def values(self): | ||
return [self[k] for k in self.keys()] | ||
|
||
def to_string(self): | ||
return headers_dict_to_raw(self) | ||
|
||
def to_unicode_dict(self): | ||
""" Return headers as a CaselessDict with unicode keys | ||
and unicode values. Multiple values are joined with ','. | ||
""" | ||
return CaselessDict( | ||
(to_unicode(key, encoding=self.encoding), | ||
to_unicode(b','.join(value), encoding=self.encoding)) | ||
for key, value in self.items()) | ||
|
||
def __copy__(self): | ||
return self.__class__(self) | ||
copy = __copy__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from anti_header.utils.datatypes import CaselessDict | ||
from anti_header.utils.log import LogFormatter | ||
from anti_header.utils.python import * | ||
from anti_header.utils.http import headers_dict_to_raw | ||
|
||
logging = LogFormatter() |
Oops, something went wrong.