Added manga-py source

This commit is contained in:
2019-12-14 22:33:14 -05:00
parent 9a4dd4b09b
commit 45067caea6
420 changed files with 18054 additions and 0 deletions

View File

@@ -0,0 +1,121 @@
from sys import stderr
from time import sleep
import requests
from manga_py.fs import get_temp_path, make_dirs, remove_file_query_params, basename, path_join, dirname, file_size
from .multi_threads import MultiThreads
from .request import Request
from .url_normalizer import normalize_uri
class Http(Request):
count_retries = 20
has_error = False
mute = False
def __init__(
self,
allow_webp=True,
referer='',
user_agent=None,
proxies=None,
cookies=None,
kwargs=None
):
super().__init__()
self.__set_param('allow_webp', allow_webp)
self.__set_param('referer', referer)
self.__set_param('user_agent', user_agent)
self.__set_param('proxies', proxies)
self.__set_param('cookies', cookies)
self.__set_param('kwargs', kwargs)
def __set_param(self, name, value):
if value is not None:
self_val = getattr(self, name)
_type = type(self_val)
if self_val is not None and not isinstance(value, _type):
raise AttributeError('{} type not {}'.format(name, _type))
setattr(self, name, value)
def _download(self, file_name, url, method):
now_try_count = 0
with open(file_name, 'wb') as out_file:
now_try_count += 1
response = self.requests(url, method=method, timeout=60, allow_redirects=True)
if response.status_code >= 400:
self.debug and print('\nERROR! Code {}\nUrl: {}\n'.format(
response.status_code,
url,
))
sleep(2)
if response.status_code == 403:
response = requests.request(method=method, url=url, timeout=60, allow_redirects=True)
if response.status_code < 400:
out_file.write(response.content)
response.close()
out_file.close()
def _safe_downloader(self, url, file_name, method='get') -> bool:
try:
make_dirs(dirname(file_name))
url = self.normalize_uri(url)
self._download(file_name, url, method)
except OSError as ex:
self.debug and print(ex)
return False
return True
def _download_one_file_helper(self, url, dst, callback: callable = None, success_callback: callable = None,
callback_args=()):
r = 0
while r < self.count_retries:
if self._safe_downloader(url, dst):
if file_size(dst) < 64:
return None
callable(success_callback) and success_callback(dst, *callback_args)
return True
r += 1
mode = 'Retry'
if r >= self.count_retries:
mode = 'Skip image'
callable(callback) and callback(text=mode)
return False
def download_file(self, url: str,
dst: str = None,
idx=-1,
callback: callable = None,
success_callback: callable = None,
callback_args=()) -> bool:
if not dst:
name = basename(remove_file_query_params(url))
dst = path_join(get_temp_path(), name)
result = self._download_one_file_helper(url, dst, callback, success_callback, callback_args)
if result is None and not self.mute:
self.has_error = True # issue 161
self.debug and print('\nWarning: 0 bit image downloaded, please check for redirection or broken content', file=stderr)
if ~idx:
self.debug and print('Broken url: %s\nPage idx: %d' % (url, (1 + idx)), file=stderr)
return result
def normalize_uri(self, uri, referer=None):
if not referer:
referer = self.referer
if isinstance(uri, str):
return normalize_uri(uri.strip(), referer)
return uri
def multi_download_get(self, urls, dst: str = None, callback: callable = None):
threading = MultiThreads()
for idx, url in enumerate(urls):
threading.add(self.download_file, (url, dst, idx))
threading.start(callback)
def get_redirect_url(self, url, **kwargs):
location = self.requests(url=url, method='head', **kwargs)
url = location.headers.get('Location', url)
return self.normalize_uri(url)

View File

@@ -0,0 +1,53 @@
import requests
from lxml.html import document_fromstring
class AutoProxy:
checked_url = 'https://httpbin.org/ip'
@staticmethod
def __strip(text):
return text.text_content().strip(' \n\t\r\0')
def _s(self, item):
td = item.cssselect('td')
proxy = self.__strip(td[4]) # proxy type
https = self.__strip(td[6]) # https (yes|no)
if (
proxy == 'anonymous'
or proxy == 'elite proxy'
) and https == 'yes':
return self.__strip(td[0]) + ':' + self.__strip(td[1])
return None
def _test_proxy(self, url):
proxies = {
'http': url,
'https': url,
}
try:
requests.head(url=self.checked_url, proxies=proxies, timeout=6)
except Exception:
return False
return proxies
def _change_checked_url(self, checked_url):
if checked_url:
self.checked_url = checked_url
def auto_proxy(self, checked_url=None):
self._change_checked_url(checked_url)
url = 'https://www.us-proxy.org'
items = document_fromstring(requests.get(url).text)
items = items.cssselect('#proxylisttable tbody tr')
for n, i in enumerate(items):
proxy = self._s(i)
test = False
if proxy:
test = self._test_proxy(proxy)
if test:
return test
return None
auto_proxy = AutoProxy().auto_proxy

View File

@@ -0,0 +1,33 @@
import random
import time
from manga_py.crypt.base_lib import BaseLib
from .request import Request
class GoogleDCP:
host = 'proxy.googlezip.net'
authkey = 'ac4500dd3b7579186c1b0620614fdb1f7d61f944'
http = None
def __init__(self, http: Request):
self.http = http
def randint(self):
return random.randint(0, 999999999)
def _build_header(self):
timestamp = int(time.time())
md5 = BaseLib.md5('{}{}{}'.format(timestamp, self.authkey, timestamp))
return 'Chrome-Proxy: ps={}-{}-{}-{}, sid={}, c=win, b=3029, p=110'.format(
int(time.time()),
self.randint(),
self.randint(),
self.randint(),
BaseLib.str2hex(md5.hexdigest())
)
def set_proxy(self):
self.http.proxies['http'] = self.host
self.http.headers = self._build_header()
return self.http

View File

@@ -0,0 +1,33 @@
from threading import Thread
class MultiThreads:
threads = None
max_threads = 2
to_run = None
def __init__(self):
self.threads = []
self.to_run = []
try:
import multiprocessing
self.max_threads = multiprocessing.cpu_count()
except Exception:
pass
def add(self, target: callable, args: tuple):
self.threads.append(Thread(target=target, args=args))
def _run_processes(self, callback: callable = None, n: int = None):
for t in self.to_run:
if not n:
t.join()
callback is not None and callback()
def start(self, callback: callable = None):
for n, t in enumerate(self.threads): # starting all threads
t.start()
self.to_run.append(t)
self._run_processes(callback, (n + 1) % self.max_threads)
self._run_processes(callback)
self.threads = []

View File

@@ -0,0 +1,171 @@
import requests
from .url_normalizer import normalize_uri
class Request:
__redirect_base_url = ''
_headers = None
referer = ''
proxies = None
allow_webp = True
user_agent = '{} {} {} {}'.format(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'AppleWebKit/537.36 (KHTML, like Gecko)',
'Chrome/60.0.3112.101',
'Safari/537.36'
)
default_lang = 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3'
cookies = None
kwargs = None
debug = False
response = None
_history = None
allow_send_referer = True
def __init__(self):
self.proxies = {}
self.cookies = {}
self._history = []
def __patch_headers(self, headers):
if isinstance(self._headers, dict):
for i in self._headers:
headers[i] = self._headers[i]
return headers
def _get_cookies(self, cookies=None):
return cookies if cookies else self.cookies
def _prepare_redirect_base_url(self, url):
if not self.__redirect_base_url:
self.__redirect_base_url = url
def _get_kwargs(self):
kwargs = {}
if self.kwargs:
kwargs = self.kwargs
return kwargs
def __update_cookies(self, r):
_ = r.cookies.get_dict()
for c in _:
self.cookies[c] = _[c]
def __redirect_helper(self, r, url, method):
proxy = None
location = url
if r.status_code == 303:
method = 'get'
elif r.status_code == 305:
proxy = {
'http': r.headers['location'],
'https': r.headers['location'],
}
else:
location = normalize_uri(r.headers['location'], self.__redirect_base_url)
return proxy, location, method
def _requests_helper(
self, method, url, headers=None, data=None,
max_redirects=10, **kwargs
) -> requests.Response:
self._prepare_redirect_base_url(url)
headers = self.__patch_headers(headers)
args = {
'url': url,
'headers': headers,
'data': data,
}
self.__set_defaults(args, kwargs)
self.__set_defaults(args, self._get_kwargs())
args.setdefault('allow_redirects', False)
r = getattr(requests, method)(**args)
self.__update_cookies(r)
if r.is_redirect and method != 'head':
if max_redirects < 1:
self.debug and print(self._history)
raise AttributeError('Too many redirects')
self._history.append(url)
proxy, location, method = self.__redirect_helper(r, url, method)
if proxy:
kwargs['proxies'] = proxy
return self._requests_helper(
method=method, url=location, headers=headers,
data=data, max_redirects=(max_redirects - 1),
**kwargs
)
return r
@staticmethod
def __set_defaults(args_orig: dict, args_vars: dict):
for idx in args_vars:
args_orig.setdefault(idx, args_vars[idx])
def requests(
self, url: str, headers: dict = None, cookies: dict = None,
data=None, method='get', files=None, timeout=None, **kwargs
) -> requests.Response:
if not isinstance(headers, dict):
headers = {}
self._history = []
cookies = self._get_cookies(cookies)
headers.setdefault('User-Agent', self.user_agent)
if self.allow_send_referer and self.referer:
headers.setdefault('Referer', self.referer)
headers.setdefault('Accept-Language', self.default_lang)
if self.allow_webp:
headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=1.0,image/webp,image/apng,*/*;q=1.0'
kwargs.setdefault('proxies', self.proxies)
self.response = self._requests_helper(
method=method, url=url, headers=headers, cookies=cookies,
data=data, files=files, timeout=timeout,
**kwargs
)
return self.response
def get(self, url: str, headers: dict = None, cookies: dict = None, **kwargs) -> str:
response = self.requests(
url=url,
headers=headers,
cookies=cookies,
method='get',
**kwargs
)
text = response.text
response.close()
return text
def post(self, url: str, headers: dict = None, cookies: dict = None, data: dict = (), files=None, **kwargs) -> str:
response = self.requests(
url=url,
headers=headers,
cookies=cookies,
method='post',
data=data,
files=files,
**kwargs
)
text = response.text
response.close()
return text
def reset_proxy(self):
self.proxies = {}
def set_proxy(self, proxy):
self.reset_proxy()
if isinstance(proxy, dict):
self.proxies['http'] = proxy.get('http', None)
self.proxies['https'] = proxy.get('https', None)
elif isinstance(proxy, str):
self.proxies['http'] = proxy
def get_base_cookies(self, url: str):
"""
:param url:
:return:
"""
response = self.requests(url=url, method='head')
response.close()
return response.cookies

View File

@@ -0,0 +1,25 @@
import json
import webbrowser
from packaging import version
from requests import get
from manga_py.meta import __version__, __repo_name__
def check_version():
api_url = 'https://api.github.com/repos/' + __repo_name__ + '/releases/latest'
api_content = json.loads(get(api_url).text)
tag_name = api_content.get('tag_name', None)
if tag_name and version.parse(tag_name) > version.parse(__version__):
download_addr = api_content['assets'][0]
return tag_name, download_addr['browser_download_url']
return ()
def download_update():
pass
def open_browser(url):
webbrowser.open(url)

View File

@@ -0,0 +1,73 @@
from urllib.parse import urlparse
class UrlNormalizer:
@staticmethod
def _parse_sheme(parse, base_parse):
if not parse.scheme:
uri = base_parse.scheme
else:
uri = parse.scheme
return uri + '://'
@staticmethod
def _parse_netloc(parse, base_parse):
if not parse.netloc:
uri = base_parse.netloc
else:
uri = parse.netloc
return uri
@staticmethod
def _test_path_netloc(parse):
if parse.path.find('://') == 0:
return urlparse('http' + parse.path).path
return parse.path
@staticmethod
def __parse_rel_path(parse, base_parse):
path = ''
if base_parse.path.rfind('/') > 0:
path = base_parse.path[0:base_parse.path.rfind('/')]
return path.rstrip('/') + '/' + parse.path.lstrip('/')
@staticmethod
def _parse_path(parse, base_parse):
if parse.netloc:
return parse.path
_path = UrlNormalizer._test_path_netloc(parse)
if _path:
if _path.find('/') == 0:
return _path
else:
return UrlNormalizer.__parse_rel_path(parse, base_parse)
else:
return base_parse.path
@staticmethod
def _parse_query(parse):
if parse.query:
return '?' + parse.query
return ''
@staticmethod
def _parse_fragment(parse):
if parse.fragment:
return '#' + parse.fragment
return ''
@staticmethod
def url_helper(url: str, base_url: str) -> str:
parse = urlparse(url)
base_parse = urlparse(base_url)
un = UrlNormalizer
sheme = un._parse_sheme(parse, base_parse)
netloc = un._parse_netloc(parse, base_parse)
path = un._parse_path(parse, base_parse)
query = un._parse_query(parse)
fragment = un._parse_fragment(parse)
return sheme + netloc + path + query + fragment
normalize_uri = UrlNormalizer.url_helper

View File

@@ -0,0 +1,2 @@
class WebSocket:
pass