Added manga-py source
This commit is contained in:
8
manga-py-stable_1.x/manga_py/base_classes/__init__.py
Normal file
8
manga-py-stable_1.x/manga_py/base_classes/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from .abstract import Abstract
|
||||
from .archive import Archive
|
||||
from .base import Base
|
||||
from .callbacks import Callbacks
|
||||
from .cf_protect import CloudFlareProtect
|
||||
from .chapter_helper import ChapterHelper
|
||||
from .static import Static
|
||||
from .web_driver import WebDriver
|
||||
51
manga-py-stable_1.x/manga_py/base_classes/abstract.py
Normal file
51
manga-py-stable_1.x/manga_py/base_classes/abstract.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from abc import abstractmethod
|
||||
|
||||
|
||||
class Abstract:
|
||||
|
||||
@abstractmethod
|
||||
def get_main_content(self): # call once
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_manga_name(self) -> str: # call once
|
||||
return ''
|
||||
|
||||
@abstractmethod
|
||||
def get_chapters(self) -> list: # call once
|
||||
return []
|
||||
|
||||
def prepare_cookies(self): # if site with cookie protect
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_files(self) -> list: # call ever volume loop
|
||||
return []
|
||||
|
||||
# @abstractmethod
|
||||
# def get_archive_name(self) -> str:
|
||||
# pass
|
||||
|
||||
# for chapters selected by manual (cli)
|
||||
@abstractmethod
|
||||
def get_chapter_index(self) -> str:
|
||||
pass
|
||||
|
||||
def book_meta(self) -> dict:
|
||||
pass
|
||||
|
||||
def before_download_chapter(self):
|
||||
pass
|
||||
|
||||
def get_cover(self):
|
||||
pass
|
||||
|
||||
def before_file_save(self, url, idx) -> str: # return url !
|
||||
return url
|
||||
|
||||
def after_file_save(self, _path: str, idx: int):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def chapter_for_json(self) -> str:
|
||||
pass
|
||||
82
manga-py-stable_1.x/manga_py/base_classes/archive.py
Normal file
82
manga-py-stable_1.x/manga_py/base_classes/archive.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from os import path
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
from manga_py.fs import is_file, make_dirs, basename, dirname, unlink, get_temp_path
|
||||
# from PIL import Image as PilImage
|
||||
from manga_py.image import Image
|
||||
|
||||
|
||||
class Archive:
|
||||
_archive = None
|
||||
_writes = None
|
||||
files = None
|
||||
not_change_files_extension = False
|
||||
no_webp = False
|
||||
has_error = False
|
||||
|
||||
def __init__(self):
|
||||
self.files = []
|
||||
self._writes = {}
|
||||
|
||||
def write_file(self, data, in_arc_name):
|
||||
self._writes[in_arc_name] = data
|
||||
|
||||
def add_file(self, file, in_arc_name=None):
|
||||
if in_arc_name is None:
|
||||
in_arc_name = basename(file)
|
||||
self.files.append((file, in_arc_name))
|
||||
|
||||
def set_files_list(self, files):
|
||||
self.files = files
|
||||
|
||||
def add_book_info(self, data):
|
||||
self.write_file('comicbook.xml', data)
|
||||
|
||||
def __add_files(self):
|
||||
for file in self.files:
|
||||
if is_file(file[0]):
|
||||
ext = self.__update_image_extension(file[0])
|
||||
if self.no_webp and ext[ext.rfind('.'):] == '.webp':
|
||||
jpeg = ext[:ext.rfind('.')] + '.jpeg'
|
||||
jpeg_path = path.join(dirname(file[0]), jpeg)
|
||||
Image(file[0]).convert(jpeg_path)
|
||||
file = jpeg_path, jpeg
|
||||
elif ext:
|
||||
file = file[0], ext
|
||||
self._archive.write(*file)
|
||||
|
||||
def __add_writes(self):
|
||||
for file in self._writes:
|
||||
self._archive.writestr(file, self._writes[file])
|
||||
|
||||
def add_info(self, data):
|
||||
self.write_file(data, 'info.txt')
|
||||
|
||||
def make(self, dst):
|
||||
if not len(self.files) and not len(self._writes):
|
||||
return
|
||||
|
||||
make_dirs(dirname(dst))
|
||||
|
||||
self._archive = ZipFile(dst, 'w', ZIP_DEFLATED)
|
||||
try:
|
||||
self.__add_files()
|
||||
self.__add_writes()
|
||||
self._archive.close()
|
||||
except OSError as e:
|
||||
self._archive.close()
|
||||
raise e
|
||||
self._archive.close()
|
||||
self._maked()
|
||||
|
||||
def _maked(self):
|
||||
for file in self.files:
|
||||
unlink(file[0])
|
||||
|
||||
def __update_image_extension(self, filename) -> str:
|
||||
fn, extension = path.splitext(filename)
|
||||
if not self.not_change_files_extension:
|
||||
ext = Image.real_extension(get_temp_path(filename))
|
||||
if ext:
|
||||
extension = ext
|
||||
return basename(fn + extension)
|
||||
157
manga-py-stable_1.x/manga_py/base_classes/base.py
Normal file
157
manga-py-stable_1.x/manga_py/base_classes/base.py
Normal file
@@ -0,0 +1,157 @@
|
||||
import re
|
||||
from os import path
|
||||
from sys import stderr
|
||||
|
||||
from loguru import logger
|
||||
from lxml.html import HtmlElement
|
||||
|
||||
from manga_py.http import Http
|
||||
from manga_py.image import Image
|
||||
|
||||
|
||||
class Base:
|
||||
_storage = None
|
||||
_params = None
|
||||
_image_params = None
|
||||
_http_kwargs = None
|
||||
__http = None
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self._storage = {
|
||||
'cookies': {},
|
||||
'main_content': None,
|
||||
'chapters': [],
|
||||
'current_chapter': 0,
|
||||
'current_file': 0,
|
||||
'proxies': {},
|
||||
'domain_uri': None,
|
||||
}
|
||||
self._params = {
|
||||
'destination': 'Manga',
|
||||
'cf-protect': False,
|
||||
}
|
||||
self._image_params = {
|
||||
'crop': (0, 0, 0, 0),
|
||||
# 'crop': (left, upper, right, lower)
|
||||
'auto_crop': False,
|
||||
# 'auto_crop': True,
|
||||
}
|
||||
self._http_kwargs = {}
|
||||
|
||||
def _archive_type(self):
|
||||
arc_type = 'zip'
|
||||
if self._params['cbz']:
|
||||
arc_type = 'cbz'
|
||||
return arc_type
|
||||
|
||||
def get_url(self):
|
||||
return self._params['url']
|
||||
|
||||
@property
|
||||
def domain(self) -> str:
|
||||
try:
|
||||
if not self._storage.get('domain_uri', None):
|
||||
self._storage['domain_uri'] = re.search('(https?://[^/]+)', self._params['url']).group(1)
|
||||
return self._storage.get('domain_uri', '')
|
||||
except Exception:
|
||||
print('url is broken!', file=stderr)
|
||||
exit()
|
||||
|
||||
@staticmethod
|
||||
def image_auto_crop(src_path, dest_path=None):
|
||||
image = Image(src_path=src_path)
|
||||
image.crop_auto(dest_path=dest_path)
|
||||
image.close()
|
||||
|
||||
def image_manual_crop(self, src_path, dest_path=None): # sizes: (left, top, right, bottom)
|
||||
if isinstance(self._image_params['crop'], tuple) != (0, 0, 0, 0):
|
||||
image = Image(src_path=src_path)
|
||||
image.crop_manual_with_offsets(offsets=self._image_params['crop'], dest_path=dest_path)
|
||||
image.close()
|
||||
|
||||
def _build_http_params(self, params):
|
||||
if params is None:
|
||||
params = {}
|
||||
params.setdefault('allow_webp', not self._params.get('disallow_webp', None))
|
||||
params.setdefault('referer', self._storage.get('referer', self.domain))
|
||||
params.setdefault('user_agent', self._get_user_agent())
|
||||
params.setdefault('proxies', self._storage.get('proxies', None))
|
||||
params.setdefault('cookies', self._storage.get('cookies', None))
|
||||
params.setdefault('kwargs', self._http_kwargs)
|
||||
return params
|
||||
|
||||
def http(self, new=False, params=None) -> Http:
|
||||
http_params = self._build_http_params(params)
|
||||
if new:
|
||||
http = Http(**http_params)
|
||||
return http
|
||||
elif not self.__http:
|
||||
self.__http = Http(**http_params)
|
||||
return self.__http
|
||||
|
||||
def http_get(self, url: str, headers: dict = None, cookies: dict = None):
|
||||
return self.http().get(url=url, headers=headers, cookies=cookies)
|
||||
|
||||
def http_post(self, url: str, headers: dict = None, cookies: dict = None, data=()):
|
||||
return self.http().post(url=url, headers=headers, cookies=cookies, data=data)
|
||||
|
||||
def _get_user_agent(self):
|
||||
ua_storage = self._storage.get('user_agent', None)
|
||||
ua_params = self._params.get('user_agent', None)
|
||||
if self._params.get('cf_protect', False):
|
||||
return ua_storage
|
||||
return ua_params
|
||||
|
||||
@property
|
||||
def chapter_id(self):
|
||||
return self._storage.get('current_chapter', 0)
|
||||
|
||||
@chapter_id.setter
|
||||
def chapter_id(self, idx):
|
||||
self._storage['current_chapter'] = idx
|
||||
|
||||
@classmethod
|
||||
def __normalize_chapters(cls, n, element):
|
||||
if isinstance(element, HtmlElement):
|
||||
return n(element.get('href'))
|
||||
if isinstance(element, str):
|
||||
return n(element)
|
||||
return element
|
||||
|
||||
def _prepare_chapters(self, chapters):
|
||||
n = self.http().normalize_uri
|
||||
items = []
|
||||
if chapters and len(chapters):
|
||||
for i in chapters:
|
||||
url = self.__normalize_chapters(n, i)
|
||||
items.append(url)
|
||||
else:
|
||||
logger.warning('Chapters list empty. Check %s' % self.get_url())
|
||||
return items
|
||||
|
||||
@property
|
||||
def chapter(self):
|
||||
return self._storage['chapters'][self.chapter_id]
|
||||
|
||||
def get_current_file(self):
|
||||
return self._storage['files'][self._storage['current_file']]
|
||||
|
||||
def book_meta(self) -> dict:
|
||||
return {}
|
||||
|
||||
def _image_name(self, idx, filename):
|
||||
if idx is None:
|
||||
idx = self._storage['current_file']
|
||||
fn, extension = path.splitext(filename)
|
||||
_path = '{:0>3}_{}'.format(idx, fn)
|
||||
if self._params['rename_pages']:
|
||||
_path = '{:0>3}'.format(idx)
|
||||
return _path + extension
|
||||
|
||||
def chapter_for_json(self) -> str:
|
||||
return self.chapter
|
||||
|
||||
def put_info_json(self, meta):
|
||||
# manga_name, url, directory
|
||||
pass
|
||||
35
manga-py-stable_1.x/manga_py/base_classes/callbacks.py
Normal file
35
manga-py-stable_1.x/manga_py/base_classes/callbacks.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from typing import Callable
|
||||
|
||||
|
||||
class Callbacks:
|
||||
def _call_files_progress_callback(self):
|
||||
if callable(self.progress):
|
||||
_max, _current = len(self._storage['files']), self._storage['current_file']
|
||||
self.progress(_max, _current, _current < 1)
|
||||
|
||||
def set_quest_callback(self, callback: Callable): # Required call from initiator (CLI, GUI)
|
||||
setattr(self, 'quest', callback)
|
||||
|
||||
def set_progress_callback(self, callback: Callable): # Required call from initiator (CLI, GUI)
|
||||
setattr(self, 'progress', callback)
|
||||
|
||||
def set_log_callback(self, callback: Callable): # Required call from initiator (CLI, GUI)
|
||||
setattr(self, 'log', callback)
|
||||
|
||||
def set_quest_password_callback(self, callback: Callable): # Required call from iterator (CLI, GUI)
|
||||
setattr(self, 'quest_password', callback)
|
||||
|
||||
def quest(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def quest_password(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def progress(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def log(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def book_meta(self) -> dict:
|
||||
return {}
|
||||
18
manga-py-stable_1.x/manga_py/base_classes/cf_protect.py
Normal file
18
manga-py-stable_1.x/manga_py/base_classes/cf_protect.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from sys import stderr
|
||||
|
||||
import cloudscraper
|
||||
|
||||
|
||||
class CloudFlareProtect:
|
||||
protector = []
|
||||
|
||||
def run(self, url): # pragma: no cover
|
||||
|
||||
if not self.protector:
|
||||
scraper = cloudscraper.create_scraper()
|
||||
try:
|
||||
self.protector = scraper.get_tokens(url)
|
||||
except Exception as e:
|
||||
print('CF error! %s' % e, file=stderr)
|
||||
|
||||
return self.protector
|
||||
15
manga-py-stable_1.x/manga_py/base_classes/chapter_helper.py
Normal file
15
manga-py-stable_1.x/manga_py/base_classes/chapter_helper.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# cli chapters parser
|
||||
class ChapterHelper:
|
||||
chapters = ''
|
||||
|
||||
def __init__(self, chapters: str):
|
||||
self.chapters = chapters
|
||||
if isinstance(self.chapters, str):
|
||||
self.chapters = self.chapters.split(' ')
|
||||
|
||||
def get_chapters(self, urls):
|
||||
chapters = []
|
||||
for i, url in enumerate(urls):
|
||||
if i in self.chapters:
|
||||
chapters.append(urls)
|
||||
return chapters
|
||||
41
manga-py-stable_1.x/manga_py/base_classes/static.py
Normal file
41
manga-py-stable_1.x/manga_py/base_classes/static.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from lxml.html import document_fromstring
|
||||
from purifier.purifier import HTMLPurifier
|
||||
|
||||
|
||||
class Static:
|
||||
|
||||
@staticmethod
|
||||
def _clear_html(body):
|
||||
purifier = HTMLPurifier({
|
||||
'div': ['*'], 'span': ['*'],
|
||||
'img': ['*'], 'a': ['*'],
|
||||
'h1': ['*'], 'h2': ['*'],
|
||||
'h3': ['*'], 'h4': ['*'],
|
||||
'h5': ['*'], 'h6': ['*'],
|
||||
})
|
||||
return purifier.feed(body)
|
||||
|
||||
@staticmethod
|
||||
def document_fromstring(body, selector: str = None, idx: int = None): # pragma: no cover
|
||||
result = document_fromstring(body) # todo
|
||||
if isinstance(selector, str):
|
||||
result = result.cssselect(selector)
|
||||
if isinstance(idx, int):
|
||||
result = result[idx]
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _set_if_not_none(var, key, value): # pragma: no cover
|
||||
if value is not None:
|
||||
var[key] = value
|
||||
|
||||
@staticmethod
|
||||
def __test_ascii(i):
|
||||
o = ord(i)
|
||||
_ = 39 < o < 127
|
||||
_ = _ and o not in [42, 47, 92, 94]
|
||||
return _ or o > 161
|
||||
|
||||
@staticmethod
|
||||
def remove_not_ascii(value):
|
||||
return "".join(i for i in value if i == '_' or Static.__test_ascii(i))
|
||||
48
manga-py-stable_1.x/manga_py/base_classes/web_driver.py
Normal file
48
manga-py-stable_1.x/manga_py/base_classes/web_driver.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from os import chmod
|
||||
from sys import platform
|
||||
from zipfile import ZipFile
|
||||
|
||||
from requests import get
|
||||
|
||||
from manga_py.fs import is_file, dirname, path_join, get_util_home_path
|
||||
|
||||
|
||||
class WebDriver:
|
||||
driver_version = '2.40'
|
||||
|
||||
@staticmethod
|
||||
def is_win():
|
||||
return ~platform.find('win32')
|
||||
|
||||
def download_drivder(self):
|
||||
url_prefix = 'https://chromedriver.storage.googleapis.com/'
|
||||
url = '/chromedriver_linux64.zip'
|
||||
if ~platform.find('darwin'):
|
||||
url = '/chromedriver_mac64.zip'
|
||||
if self.is_win():
|
||||
url = '/chromedriver_win32.zip'
|
||||
|
||||
path = path_join(get_util_home_path(), 'driver.zip')
|
||||
|
||||
with open(path, 'wb') as driver:
|
||||
driver.write(get(url_prefix + self.driver_version + url).content)
|
||||
driver.close()
|
||||
with ZipFile(path) as file:
|
||||
file.extractall(dirname(self._driver_path()))
|
||||
|
||||
def _driver_path(self):
|
||||
if self.is_win():
|
||||
driver = 'chromedriver.exe'
|
||||
else:
|
||||
driver = 'chromedriver'
|
||||
return path_join(get_util_home_path(), driver)
|
||||
|
||||
def get_driver(self):
|
||||
from selenium import webdriver # need, if captcha detected
|
||||
driver_path = self._driver_path()
|
||||
if not is_file(driver_path):
|
||||
self.download_drivder()
|
||||
self.is_win() or chmod(driver_path, 0o755)
|
||||
driver = webdriver.Chrome(executable_path=driver_path)
|
||||
driver.set_window_size(500, 600)
|
||||
return driver
|
||||
Reference in New Issue
Block a user