Added manga-py source

This commit is contained in:
2019-12-14 22:33:14 -05:00
parent 9a4dd4b09b
commit 45067caea6
420 changed files with 18054 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
from .abstract import Abstract
from .archive import Archive
from .base import Base
from .callbacks import Callbacks
from .cf_protect import CloudFlareProtect
from .chapter_helper import ChapterHelper
from .static import Static
from .web_driver import WebDriver

View File

@@ -0,0 +1,51 @@
from abc import abstractmethod
class Abstract:
@abstractmethod
def get_main_content(self): # call once
pass
@abstractmethod
def get_manga_name(self) -> str: # call once
return ''
@abstractmethod
def get_chapters(self) -> list: # call once
return []
def prepare_cookies(self): # if site with cookie protect
pass
@abstractmethod
def get_files(self) -> list: # call ever volume loop
return []
# @abstractmethod
# def get_archive_name(self) -> str:
# pass
# for chapters selected by manual (cli)
@abstractmethod
def get_chapter_index(self) -> str:
pass
def book_meta(self) -> dict:
pass
def before_download_chapter(self):
pass
def get_cover(self):
pass
def before_file_save(self, url, idx) -> str: # return url !
return url
def after_file_save(self, _path: str, idx: int):
pass
@abstractmethod
def chapter_for_json(self) -> str:
pass

View File

@@ -0,0 +1,82 @@
from os import path
from zipfile import ZipFile, ZIP_DEFLATED
from manga_py.fs import is_file, make_dirs, basename, dirname, unlink, get_temp_path
# from PIL import Image as PilImage
from manga_py.image import Image
class Archive:
_archive = None
_writes = None
files = None
not_change_files_extension = False
no_webp = False
has_error = False
def __init__(self):
self.files = []
self._writes = {}
def write_file(self, data, in_arc_name):
self._writes[in_arc_name] = data
def add_file(self, file, in_arc_name=None):
if in_arc_name is None:
in_arc_name = basename(file)
self.files.append((file, in_arc_name))
def set_files_list(self, files):
self.files = files
def add_book_info(self, data):
self.write_file('comicbook.xml', data)
def __add_files(self):
for file in self.files:
if is_file(file[0]):
ext = self.__update_image_extension(file[0])
if self.no_webp and ext[ext.rfind('.'):] == '.webp':
jpeg = ext[:ext.rfind('.')] + '.jpeg'
jpeg_path = path.join(dirname(file[0]), jpeg)
Image(file[0]).convert(jpeg_path)
file = jpeg_path, jpeg
elif ext:
file = file[0], ext
self._archive.write(*file)
def __add_writes(self):
for file in self._writes:
self._archive.writestr(file, self._writes[file])
def add_info(self, data):
self.write_file(data, 'info.txt')
def make(self, dst):
if not len(self.files) and not len(self._writes):
return
make_dirs(dirname(dst))
self._archive = ZipFile(dst, 'w', ZIP_DEFLATED)
try:
self.__add_files()
self.__add_writes()
self._archive.close()
except OSError as e:
self._archive.close()
raise e
self._archive.close()
self._maked()
def _maked(self):
for file in self.files:
unlink(file[0])
def __update_image_extension(self, filename) -> str:
fn, extension = path.splitext(filename)
if not self.not_change_files_extension:
ext = Image.real_extension(get_temp_path(filename))
if ext:
extension = ext
return basename(fn + extension)

View File

@@ -0,0 +1,157 @@
import re
from os import path
from sys import stderr
from loguru import logger
from lxml.html import HtmlElement
from manga_py.http import Http
from manga_py.image import Image
class Base:
_storage = None
_params = None
_image_params = None
_http_kwargs = None
__http = None
def __init__(self):
self._storage = {
'cookies': {},
'main_content': None,
'chapters': [],
'current_chapter': 0,
'current_file': 0,
'proxies': {},
'domain_uri': None,
}
self._params = {
'destination': 'Manga',
'cf-protect': False,
}
self._image_params = {
'crop': (0, 0, 0, 0),
# 'crop': (left, upper, right, lower)
'auto_crop': False,
# 'auto_crop': True,
}
self._http_kwargs = {}
def _archive_type(self):
arc_type = 'zip'
if self._params['cbz']:
arc_type = 'cbz'
return arc_type
def get_url(self):
return self._params['url']
@property
def domain(self) -> str:
try:
if not self._storage.get('domain_uri', None):
self._storage['domain_uri'] = re.search('(https?://[^/]+)', self._params['url']).group(1)
return self._storage.get('domain_uri', '')
except Exception:
print('url is broken!', file=stderr)
exit()
@staticmethod
def image_auto_crop(src_path, dest_path=None):
image = Image(src_path=src_path)
image.crop_auto(dest_path=dest_path)
image.close()
def image_manual_crop(self, src_path, dest_path=None): # sizes: (left, top, right, bottom)
if isinstance(self._image_params['crop'], tuple) != (0, 0, 0, 0):
image = Image(src_path=src_path)
image.crop_manual_with_offsets(offsets=self._image_params['crop'], dest_path=dest_path)
image.close()
def _build_http_params(self, params):
if params is None:
params = {}
params.setdefault('allow_webp', not self._params.get('disallow_webp', None))
params.setdefault('referer', self._storage.get('referer', self.domain))
params.setdefault('user_agent', self._get_user_agent())
params.setdefault('proxies', self._storage.get('proxies', None))
params.setdefault('cookies', self._storage.get('cookies', None))
params.setdefault('kwargs', self._http_kwargs)
return params
def http(self, new=False, params=None) -> Http:
http_params = self._build_http_params(params)
if new:
http = Http(**http_params)
return http
elif not self.__http:
self.__http = Http(**http_params)
return self.__http
def http_get(self, url: str, headers: dict = None, cookies: dict = None):
return self.http().get(url=url, headers=headers, cookies=cookies)
def http_post(self, url: str, headers: dict = None, cookies: dict = None, data=()):
return self.http().post(url=url, headers=headers, cookies=cookies, data=data)
def _get_user_agent(self):
ua_storage = self._storage.get('user_agent', None)
ua_params = self._params.get('user_agent', None)
if self._params.get('cf_protect', False):
return ua_storage
return ua_params
@property
def chapter_id(self):
return self._storage.get('current_chapter', 0)
@chapter_id.setter
def chapter_id(self, idx):
self._storage['current_chapter'] = idx
@classmethod
def __normalize_chapters(cls, n, element):
if isinstance(element, HtmlElement):
return n(element.get('href'))
if isinstance(element, str):
return n(element)
return element
def _prepare_chapters(self, chapters):
n = self.http().normalize_uri
items = []
if chapters and len(chapters):
for i in chapters:
url = self.__normalize_chapters(n, i)
items.append(url)
else:
logger.warning('Chapters list empty. Check %s' % self.get_url())
return items
@property
def chapter(self):
return self._storage['chapters'][self.chapter_id]
def get_current_file(self):
return self._storage['files'][self._storage['current_file']]
def book_meta(self) -> dict:
return {}
def _image_name(self, idx, filename):
if idx is None:
idx = self._storage['current_file']
fn, extension = path.splitext(filename)
_path = '{:0>3}_{}'.format(idx, fn)
if self._params['rename_pages']:
_path = '{:0>3}'.format(idx)
return _path + extension
def chapter_for_json(self) -> str:
return self.chapter
def put_info_json(self, meta):
# manga_name, url, directory
pass

View File

@@ -0,0 +1,35 @@
from typing import Callable
class Callbacks:
def _call_files_progress_callback(self):
if callable(self.progress):
_max, _current = len(self._storage['files']), self._storage['current_file']
self.progress(_max, _current, _current < 1)
def set_quest_callback(self, callback: Callable): # Required call from initiator (CLI, GUI)
setattr(self, 'quest', callback)
def set_progress_callback(self, callback: Callable): # Required call from initiator (CLI, GUI)
setattr(self, 'progress', callback)
def set_log_callback(self, callback: Callable): # Required call from initiator (CLI, GUI)
setattr(self, 'log', callback)
def set_quest_password_callback(self, callback: Callable): # Required call from iterator (CLI, GUI)
setattr(self, 'quest_password', callback)
def quest(self, *args, **kwargs):
pass
def quest_password(self, *args, **kwargs):
pass
def progress(self, *args, **kwargs):
pass
def log(self, *args, **kwargs):
pass
def book_meta(self) -> dict:
return {}

View File

@@ -0,0 +1,18 @@
from sys import stderr
import cloudscraper
class CloudFlareProtect:
protector = []
def run(self, url): # pragma: no cover
if not self.protector:
scraper = cloudscraper.create_scraper()
try:
self.protector = scraper.get_tokens(url)
except Exception as e:
print('CF error! %s' % e, file=stderr)
return self.protector

View File

@@ -0,0 +1,15 @@
# cli chapters parser
class ChapterHelper:
chapters = ''
def __init__(self, chapters: str):
self.chapters = chapters
if isinstance(self.chapters, str):
self.chapters = self.chapters.split(' ')
def get_chapters(self, urls):
chapters = []
for i, url in enumerate(urls):
if i in self.chapters:
chapters.append(urls)
return chapters

View File

@@ -0,0 +1,41 @@
from lxml.html import document_fromstring
from purifier.purifier import HTMLPurifier
class Static:
@staticmethod
def _clear_html(body):
purifier = HTMLPurifier({
'div': ['*'], 'span': ['*'],
'img': ['*'], 'a': ['*'],
'h1': ['*'], 'h2': ['*'],
'h3': ['*'], 'h4': ['*'],
'h5': ['*'], 'h6': ['*'],
})
return purifier.feed(body)
@staticmethod
def document_fromstring(body, selector: str = None, idx: int = None): # pragma: no cover
result = document_fromstring(body) # todo
if isinstance(selector, str):
result = result.cssselect(selector)
if isinstance(idx, int):
result = result[idx]
return result
@staticmethod
def _set_if_not_none(var, key, value): # pragma: no cover
if value is not None:
var[key] = value
@staticmethod
def __test_ascii(i):
o = ord(i)
_ = 39 < o < 127
_ = _ and o not in [42, 47, 92, 94]
return _ or o > 161
@staticmethod
def remove_not_ascii(value):
return "".join(i for i in value if i == '_' or Static.__test_ascii(i))

View File

@@ -0,0 +1,48 @@
from os import chmod
from sys import platform
from zipfile import ZipFile
from requests import get
from manga_py.fs import is_file, dirname, path_join, get_util_home_path
class WebDriver:
driver_version = '2.40'
@staticmethod
def is_win():
return ~platform.find('win32')
def download_drivder(self):
url_prefix = 'https://chromedriver.storage.googleapis.com/'
url = '/chromedriver_linux64.zip'
if ~platform.find('darwin'):
url = '/chromedriver_mac64.zip'
if self.is_win():
url = '/chromedriver_win32.zip'
path = path_join(get_util_home_path(), 'driver.zip')
with open(path, 'wb') as driver:
driver.write(get(url_prefix + self.driver_version + url).content)
driver.close()
with ZipFile(path) as file:
file.extractall(dirname(self._driver_path()))
def _driver_path(self):
if self.is_win():
driver = 'chromedriver.exe'
else:
driver = 'chromedriver'
return path_join(get_util_home_path(), driver)
def get_driver(self):
from selenium import webdriver # need, if captcha detected
driver_path = self._driver_path()
if not is_file(driver_path):
self.download_drivder()
self.is_win() or chmod(driver_path, 0o755)
driver = webdriver.Chrome(executable_path=driver_path)
driver.set_window_size(500, 600)
return driver