Added manga-py source
This commit is contained in:
72
manga-py-stable_1.x/manga_py/providers/helpers/_http2.py
Normal file
72
manga-py-stable_1.x/manga_py/providers/helpers/_http2.py
Normal file
@@ -0,0 +1,72 @@
|
||||
class Http2:
|
||||
provider = None
|
||||
path_join = None
|
||||
is_file = None
|
||||
chapters = None
|
||||
chapters_count = 0
|
||||
|
||||
def __init__(self, provider):
|
||||
from manga_py.fs import path_join, is_file
|
||||
self.provider = provider
|
||||
self.path_join = path_join
|
||||
self.is_file = is_file
|
||||
|
||||
def _get_name(self, idx):
|
||||
return self.path_join(
|
||||
self.provider._params.get('destination'),
|
||||
self.provider._storage['manga_name'],
|
||||
'{:0>3}-{}.{}'.format(
|
||||
idx, self.provider.get_archive_name(),
|
||||
self.provider._archive_type()
|
||||
)
|
||||
)
|
||||
|
||||
def __download(self, idx, name, url):
|
||||
_min, _max = self._min_max_calculate()
|
||||
self.provider._info.add_volume(
|
||||
self.provider.chapter,
|
||||
self.provider.get_archive_path()
|
||||
)
|
||||
|
||||
self.provider.progress(self.chapters_count, idx)
|
||||
|
||||
if idx < _min or (idx >= _max > 0) or self.is_file(name):
|
||||
return False
|
||||
|
||||
if not self.provider._simulate:
|
||||
try:
|
||||
self.provider.http().download_file(url, name, idx)
|
||||
except Exception as e:
|
||||
self.provider._info.set_last_volume_error(e)
|
||||
|
||||
def _min_max_calculate(self):
|
||||
_min = self.provider._params.get('skip_volumes', 0)
|
||||
_max = self.provider._params.get('max_volumes', 0)
|
||||
self.chapters_count = len(self.chapters)
|
||||
if _max > 0 or _min > 0:
|
||||
if _max < self.chapters_count:
|
||||
_max = self.chapters_count - _max
|
||||
else:
|
||||
_max = 0
|
||||
self.chapters_count = self.chapters_count - _min - _max
|
||||
if _max > 0 and _min > 0:
|
||||
_max += _min - 1
|
||||
return _min, _max
|
||||
|
||||
def download_archives(self, chapters=None):
|
||||
if chapters is None:
|
||||
chapters = self.provider._storage['chapters']
|
||||
self.chapters = chapters
|
||||
for idx, url in enumerate(chapters):
|
||||
self.provider.before_download_chapter()
|
||||
self.provider._storage['current_chapter'] = idx
|
||||
name = self._get_name(idx)
|
||||
idx, url, name = self.before_download(idx, url, name)
|
||||
self.__download(idx, name, url)
|
||||
self.after_download(idx, name)
|
||||
|
||||
def before_download(self, idx, url, _path):
|
||||
return idx, url, _path
|
||||
|
||||
def after_download(self, idx, _path):
|
||||
pass
|
||||
@@ -0,0 +1,81 @@
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class AnimeXtremistCom:
|
||||
provider = None
|
||||
path = None
|
||||
|
||||
def __init__(self, provider: Provider):
|
||||
self.provider = provider
|
||||
self.path = provider.get_url()
|
||||
|
||||
@staticmethod
|
||||
def build_path(item):
|
||||
return item[0] + item[1]
|
||||
|
||||
@staticmethod
|
||||
def __sort(item, selector):
|
||||
_re = selector.search(item)
|
||||
if _re:
|
||||
return int(_re.group(1))
|
||||
return 0
|
||||
|
||||
def sort_items(self, items):
|
||||
r = self.provider.re.compile(r'.+?-(\d+)')
|
||||
return sorted(items, key=lambda i: self.__sort(i[0], r))
|
||||
|
||||
def sort_images(self, items):
|
||||
r = self.provider.re.compile(r'.+/.+-(\d+)[^/]*\.html')
|
||||
return sorted(items, key=lambda i: self.__sort(i, r))
|
||||
|
||||
def _chapters(self, url=None):
|
||||
a = 'li + li > a'
|
||||
if url:
|
||||
items = self.provider.html_fromstring(url, a)
|
||||
else:
|
||||
items = self.provider.document_fromstring(self.provider.content, a)
|
||||
return items
|
||||
|
||||
# http://animextremist.com/mangas-online/99love/
|
||||
def _chapters_with_dirs(self, items):
|
||||
result = []
|
||||
for i in items:
|
||||
href = i.get('href')
|
||||
url = '{}{}'.format(self.path, href)
|
||||
result += [(href, ['{}{}'.format(
|
||||
url,
|
||||
a.get('href')
|
||||
) for a in self._chapters(url)])]
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _rebuild_dict_to_tuple(_dict):
|
||||
result = []
|
||||
for i in _dict:
|
||||
result += [(i, [a for a in _dict[i]])]
|
||||
return result
|
||||
|
||||
# http://animextremist.com/mangas-online/onepiece-manga/
|
||||
def _chapters_without_dirs(self, items):
|
||||
result = {}
|
||||
r = self.provider.re.compile(r'(.+?-\d+)') # todo
|
||||
for i in items:
|
||||
href = i.get('href')
|
||||
key = self.provider.re.search(r, href).group(1)
|
||||
if result.get(key) is None:
|
||||
result[key] = []
|
||||
result[key].append('{}{}'.format(self.path, href))
|
||||
return self._rebuild_dict_to_tuple(result)
|
||||
|
||||
def get_chapters(self):
|
||||
items = self._chapters()
|
||||
if len(items) and items[0].get('href').find('.html') < 0:
|
||||
items = self._chapters_with_dirs(items)
|
||||
else:
|
||||
items = self._chapters_without_dirs(items)
|
||||
return self.sort_items(items)
|
||||
|
||||
def get_page_image(self, src, selector, attr='src') -> str:
|
||||
image = self.provider.html_fromstring(src, selector)
|
||||
if image and len(image):
|
||||
return image[0].get(attr)
|
||||
@@ -0,0 +1,30 @@
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class EHentaiOrg:
|
||||
provider = None
|
||||
|
||||
def __init__(self, provider: Provider):
|
||||
self.provider = provider
|
||||
|
||||
def get_pages_count(self, parser):
|
||||
selector = '.gtb table.ptt td[onclick] > a'
|
||||
paginate = parser.cssselect(selector)
|
||||
max_idx = 0
|
||||
for i in paginate:
|
||||
idx = self.provider.re.search(r'\?p=(\d+)', i.get('href'))
|
||||
max_idx = max(max_idx, int(idx.group(1)))
|
||||
return max_idx
|
||||
|
||||
def get_image(self, i):
|
||||
url = i.get('href')
|
||||
src = self.provider.html_fromstring(url, 'img#img', 0)
|
||||
return src.get('src')
|
||||
|
||||
def get_url(self):
|
||||
url = self.provider.get_url()
|
||||
if ~url.find('?'):
|
||||
url = url[:url.find('?')]
|
||||
if ~url.find('#'):
|
||||
url = url[:url.find('#')]
|
||||
return url
|
||||
@@ -0,0 +1,26 @@
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class EightMusesCom:
|
||||
provider = None
|
||||
|
||||
def __init__(self, provider: Provider):
|
||||
self.provider = provider
|
||||
self._n = provider.http().normalize_uri
|
||||
|
||||
def is_images_page(self, parser) -> bool:
|
||||
if not parser:
|
||||
return False
|
||||
return self.provider.re.search(r'/\d+$', parser[0].get('href')) is not None
|
||||
|
||||
def parser(self, url, selector):
|
||||
return self.provider.html_fromstring(self._n(url), selector)
|
||||
|
||||
def chapters(self, parser) -> list:
|
||||
if self.is_images_page(parser):
|
||||
return [parser]
|
||||
items = []
|
||||
selector = self.provider.chapter_selector
|
||||
for i in parser:
|
||||
items += self.chapters(self.parser(i.get('href'), selector))
|
||||
return items
|
||||
@@ -0,0 +1,50 @@
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from manga_py.http.url_normalizer import normalize_uri
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class JavZipOrg:
|
||||
parser = None
|
||||
url = None
|
||||
domain = None
|
||||
|
||||
def __init__(self, parser: Provider):
|
||||
self.parser = parser
|
||||
url = parser.chapter
|
||||
if parser.re.search(r'jav-zip\.org', url):
|
||||
self.url = url
|
||||
_ = urlparse(url)
|
||||
self.domain = _.scheme + '://' + _.netloc
|
||||
|
||||
def _parse_id(self):
|
||||
return self.parser.re.search('/.p=(\d+)', self.url).group(1)
|
||||
|
||||
def parse_images(self, content):
|
||||
images = []
|
||||
for i in content.cssselect('img'):
|
||||
src = normalize_uri(i.get('src'), self.url)
|
||||
images.append(src)
|
||||
return images
|
||||
|
||||
def get(self, step):
|
||||
url = '{}/wp-admin/admin-ajax.php?post={}&action=get_content&step={}'
|
||||
url = url.format(self.domain, self._parse_id, step)
|
||||
content = self.parser.json.loads(self.parser.http_get(url))
|
||||
content = self.parser.document_fromstring(content['mes'])
|
||||
allow_more = True
|
||||
if len(content.cssselect('a.view-more')) < 1:
|
||||
allow_more = False
|
||||
return allow_more, content
|
||||
|
||||
def get_images(self):
|
||||
if not self.url:
|
||||
return []
|
||||
images = []
|
||||
step = 0
|
||||
allow_more = True
|
||||
while allow_more:
|
||||
allow_more, content = self.get(step)
|
||||
step += 50 # constant
|
||||
images += self.parse_images(content)
|
||||
return images
|
||||
53
manga-py-stable_1.x/manga_py/providers/helpers/nine_manga.py
Normal file
53
manga-py-stable_1.x/manga_py/providers/helpers/nine_manga.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from abc import ABCMeta
|
||||
from time import sleep
|
||||
from urllib.parse import unquote
|
||||
|
||||
from requests import get
|
||||
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class NineHelper(Provider, metaclass=ABCMeta):
|
||||
img_server = 'https://ta1.taadd.com'
|
||||
|
||||
def re_name(self, url):
|
||||
return self.re.search(r'/manga/(.+)\.html', url)
|
||||
|
||||
@staticmethod
|
||||
def normalize_name(name, normalize):
|
||||
if normalize:
|
||||
name = unquote(name)
|
||||
return name
|
||||
|
||||
def parse_img_uri(self, url):
|
||||
return self.re.search('://[^/]+/(.+)', url).group(1)
|
||||
|
||||
def get_img_server(self, content):
|
||||
server = self.re.search(r'img_url\s?=\s?"([^"]+)', content)
|
||||
if server:
|
||||
return server.group(1)
|
||||
return self.img_server
|
||||
|
||||
def get_files_on_page(self, content):
|
||||
result = self.document_fromstring(content, 'em a.pic_download')
|
||||
if not result:
|
||||
return []
|
||||
images = []
|
||||
pic_url = self.get_img_server(content)
|
||||
for i in result:
|
||||
src = self.parse_img_uri(i.get('href'))
|
||||
images.append('{}/{}'.format(pic_url, src))
|
||||
return images
|
||||
|
||||
def _get_page_content(self, url):
|
||||
sleep(.6)
|
||||
return get(
|
||||
url,
|
||||
headers={
|
||||
'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
|
||||
'Referer': '',
|
||||
} # fix guard
|
||||
).text
|
||||
|
||||
def prepare_cookies(self):
|
||||
self._storage['cookies'].setdefault('__cfduid', '1a2b3c4d5e')
|
||||
150
manga-py-stable_1.x/manga_py/providers/helpers/std.py
Normal file
150
manga-py-stable_1.x/manga_py/providers/helpers/std.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from requests import get
|
||||
from time import sleep
|
||||
|
||||
|
||||
class Std:
|
||||
_vol_fill = False
|
||||
|
||||
def get_archive_name(self) -> str:
|
||||
idx = self.get_chapter_index()
|
||||
self._vol_fill = True
|
||||
return self.normal_arc_name({'vol': idx.split('-')})
|
||||
|
||||
def _elements(self, selector, content=None) -> list:
|
||||
if not content:
|
||||
content = self.content
|
||||
return self.document_fromstring(content, selector)
|
||||
|
||||
def _cover_from_content(self, selector, attr='src') -> str:
|
||||
image = self._elements(selector)
|
||||
if image is not None and len(image):
|
||||
return self.http().normalize_uri(image[0].get(attr))
|
||||
|
||||
@staticmethod
|
||||
def _first_select_options(parser, selector, skip_first=True) -> list:
|
||||
options = 'option'
|
||||
if skip_first:
|
||||
options = 'option + option'
|
||||
select = parser.cssselect(selector)
|
||||
if select:
|
||||
return select[0].cssselect(options)
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def _images_helper(cls, parser, selector, attr='src') -> list:
|
||||
image = parser.cssselect(selector)
|
||||
return [i.get(attr).strip(' \r\n\t\0') for i in image]
|
||||
|
||||
@classmethod
|
||||
def _idx_to_x2(cls, idx, default=0) -> list:
|
||||
return [
|
||||
str(idx[0]),
|
||||
str(default if len(idx) < 2 or not idx[1] else idx[1])
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _join_groups(idx, glue='-') -> str:
|
||||
result = []
|
||||
for i in idx:
|
||||
if i:
|
||||
result.append(i)
|
||||
return glue.join(result)
|
||||
|
||||
def _get_name(self, selector, url=None) -> str:
|
||||
if url is None:
|
||||
url = self.get_url()
|
||||
return self.re.search(selector, url).group(1)
|
||||
|
||||
def _get_content(self, tpl) -> str:
|
||||
return self.http_get(tpl.format(self.domain, self.manga_name))
|
||||
|
||||
def _base_cookies(self, url=None):
|
||||
if url is None:
|
||||
url = self.get_url()
|
||||
cookies = self.http().get_base_cookies(url)
|
||||
self._storage['cookies'] = cookies.get_dict()
|
||||
|
||||
def parse_background(self, image) -> str:
|
||||
selector = r'background.+?url\([\'"]?([^\s]+?)[\'"]?\)'
|
||||
url = self.re.search(selector, image.get('style'))
|
||||
return self.http().normalize_uri(url.group(1))
|
||||
|
||||
@property
|
||||
def manga_name(self) -> str:
|
||||
name = self._storage.get('manga_name', None)
|
||||
if name is None:
|
||||
name = self.get_manga_name()
|
||||
return name
|
||||
|
||||
def normal_arc_name(self, idx):
|
||||
if isinstance(idx, str):
|
||||
idx = [idx]
|
||||
if isinstance(idx, list):
|
||||
self._vol_fill = True
|
||||
return self.__normal_name_list(idx)
|
||||
if isinstance(idx, dict):
|
||||
return self.__normal_name_dict(idx)
|
||||
raise DeprecationWarning('Wrong arc name type: %s' % type(idx))
|
||||
|
||||
@staticmethod
|
||||
def __fill(var, fmt: str = '-{}'):
|
||||
if isinstance(var, str):
|
||||
var = [var]
|
||||
return (fmt * len(var)).format(*var).lstrip('-')
|
||||
|
||||
def __normal_name_list(self, idx: list):
|
||||
fmt = 'vol_{:0>3}'
|
||||
if len(idx) > 1:
|
||||
fmt += '-{}' * (len(idx) - 1)
|
||||
elif self._vol_fill and self._zero_fill:
|
||||
idx.append('0')
|
||||
fmt += '-{}'
|
||||
return fmt.format(*idx)
|
||||
|
||||
def __normal_name_dict(self, idx: dict):
|
||||
vol = idx.get('vol', None)
|
||||
ch = idx.get('ch', None)
|
||||
result = ''
|
||||
if vol:
|
||||
if isinstance(vol, str):
|
||||
vol = [vol]
|
||||
result = self.__normal_name_list(vol)
|
||||
if ch:
|
||||
result += '-ch_' + self.__fill(ch)
|
||||
|
||||
if self._with_manga_name:
|
||||
name = self._params.get('name', '')
|
||||
if not len(name):
|
||||
name = self.manga_name
|
||||
|
||||
result = '%s-%s' % (name, result)
|
||||
|
||||
return result
|
||||
|
||||
def text_content(self, content, selector, idx: int = 0, strip: bool = True):
|
||||
doc = self.document_fromstring(content, selector)
|
||||
if not doc:
|
||||
return None
|
||||
text = doc[idx].text_content()
|
||||
if strip:
|
||||
text = text.strip()
|
||||
return text
|
||||
|
||||
def _download(self, file_name, url, method):
|
||||
# clean file downloader
|
||||
now_try_count = 0
|
||||
while now_try_count < 5:
|
||||
with open(file_name, 'wb') as out_file:
|
||||
now_try_count += 1
|
||||
response = get(url, timeout=60, allow_redirects=True)
|
||||
if response.status_code >= 400:
|
||||
self.http().debug and self.log('ERROR! Code {}\nUrl: {}'.format(
|
||||
response.status_code,
|
||||
url,
|
||||
))
|
||||
sleep(2)
|
||||
continue
|
||||
out_file.write(response.content)
|
||||
response.close()
|
||||
out_file.close()
|
||||
break
|
||||
41
manga-py-stable_1.x/manga_py/providers/helpers/tapas_io.py
Normal file
41
manga-py-stable_1.x/manga_py/providers/helpers/tapas_io.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from manga_py.meta import __downloader_uri__
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class TapasIo:
|
||||
provider = None
|
||||
|
||||
def __init__(self, provider: Provider):
|
||||
self.provider = provider
|
||||
|
||||
def _content(self, content):
|
||||
type = content.get('type', None)
|
||||
if type == 'DEFAULT':
|
||||
return self._type_default(content)
|
||||
|
||||
def _error(self, content):
|
||||
self.provider.log('\r\nERROR!\r\nCode: {}\r\nType: {}\r\nPlease, send url to developer ({})'.format(
|
||||
content['code'],
|
||||
content['type'],
|
||||
__downloader_uri__
|
||||
))
|
||||
|
||||
def _type_default(self, content):
|
||||
items = self.provider.document_fromstring(content.get('data', {}).get('html', '<html></html>'), '.art-image')
|
||||
return [i.get('src') for i in items]
|
||||
|
||||
def chapter_url(self):
|
||||
return '{}/episode/view/{}'.format(
|
||||
self.provider.domain,
|
||||
self.provider.chapter['id']
|
||||
)
|
||||
|
||||
def parse_chapter_content(self):
|
||||
content = self.provider.json.loads(self.provider.http_get(self.chapter_url()))
|
||||
if content['code'] != 200:
|
||||
self._error(content)
|
||||
return []
|
||||
_content = self._content(content)
|
||||
if _content is None:
|
||||
self._error(content)
|
||||
return _content
|
||||
@@ -0,0 +1,53 @@
|
||||
from manga_py.crypt import Puzzle
|
||||
from manga_py.fs import get_temp_path, rename
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class TonariNoYjJp:
|
||||
provider = None
|
||||
div_num = 4
|
||||
multiply = 8
|
||||
matrix = None
|
||||
temp_path = None
|
||||
|
||||
def __init__(self, provider: Provider):
|
||||
self.provider = provider
|
||||
self.temp_path = get_temp_path('__image_matrix{}.png')
|
||||
matrix = {}
|
||||
for i in range(self.div_num * self.div_num):
|
||||
matrix[i] = (i % self.div_num) * self.div_num + int(i / self.div_num)
|
||||
self.matrix = matrix
|
||||
|
||||
def _chapter_api_content(self, idx) -> dict:
|
||||
api = '{}/api/viewer/readable_products?current_readable_product_id={}&' \
|
||||
'number_since=99&number_until=-1&read_more_num=100&type=episode'
|
||||
content = self.provider.http_get(api.format(self.provider.domain, idx))
|
||||
if content[0] == '{':
|
||||
return self.provider.json.loads(content)
|
||||
return {}
|
||||
|
||||
def _check_need_next_chapter(self, next_url):
|
||||
if next_url:
|
||||
test = self.provider.re.search('number_since=(\d+)', next_url).group(1)
|
||||
if int(test) > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_chapters(self, idx) -> list:
|
||||
content = self._chapter_api_content(idx)
|
||||
items = self.provider.document_fromstring(content.get('html', '<html></html>'), '.series-episode-list-thumb')
|
||||
need_more = self._check_need_next_chapter(content.get('nextUrl', None))
|
||||
if need_more:
|
||||
items += self.get_chapters(content.get('nextUrl'))
|
||||
re = self.provider.re.compile(r'/episode-thumbnail/(\d+)')
|
||||
return [re.search(i.get('src')).group(1) for i in items]
|
||||
|
||||
def solve_image(self, path, idx):
|
||||
try:
|
||||
solver = Puzzle(self.div_num, self.div_num, self.matrix, self.multiply)
|
||||
solver.need_copy_orig = True
|
||||
_ = self.temp_path.format(idx)
|
||||
solver.de_scramble(path, _)
|
||||
rename(_, path)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,29 @@
|
||||
from requests import Session
|
||||
|
||||
from manga_py.base_classes import WebDriver
|
||||
from manga_py.provider import Provider
|
||||
|
||||
|
||||
class TsuminoCom:
|
||||
provider = None
|
||||
|
||||
def __init__(self, provider: Provider):
|
||||
self.provider = provider
|
||||
|
||||
def get_cookies(self, url):
|
||||
web_driver = WebDriver()
|
||||
driver = web_driver.get_driver()
|
||||
driver.get(url)
|
||||
iframe = driver.find_element_by_css_selector(".g-recaptcha iframe")
|
||||
src = self.provider.http_get(iframe.get_attribute('src'))
|
||||
driver.close()
|
||||
|
||||
g_token = self.provider.html_fromstring(src).cssselect('#recaptcha-token')
|
||||
session = Session()
|
||||
h = session.post('{}/Read/AuthProcess'.format(self.provider.domain), data={
|
||||
'g-recaptcha-response': g_token[0].get('value'),
|
||||
'Id': 1,
|
||||
'Page': 1,
|
||||
})
|
||||
session.close()
|
||||
return h.cookies
|
||||
Reference in New Issue
Block a user