Added manga-py source

2019-12-14 22:33:14 -05:00
parent 9a4dd4b09b
commit 45067caea6
420 changed files with 18054 additions and 0 deletions
--- a/manga-py-stable_1.x/manga_py/base_classes/init.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/init.py
@@ -0,0 +1,8 @@
+from .abstract import Abstract
+from .archive import Archive
+from .base import Base
+from .callbacks import Callbacks
+from .cf_protect import CloudFlareProtect
+from .chapter_helper import ChapterHelper
+from .static import Static
+from .web_driver import WebDriver
--- a/manga-py-stable_1.x/manga_py/base_classes/abstract.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/abstract.py
@@ -0,0 +1,51 @@
+from abc import abstractmethod
+
+
+class Abstract:
+
+    @abstractmethod
+    def get_main_content(self):  # call once
+        pass
+
+    @abstractmethod
+    def get_manga_name(self) -> str:  # call once
+        return ''
+
+    @abstractmethod
+    def get_chapters(self) -> list:  # call once
+        return []
+
+    def prepare_cookies(self):  # if site with cookie protect
+        pass
+
+    @abstractmethod
+    def get_files(self) -> list:  # call ever volume loop
+        return []
+
+    # @abstractmethod
+    # def get_archive_name(self) -> str:
+    #     pass
+
+    #  for chapters selected by manual (cli)
+    @abstractmethod
+    def get_chapter_index(self) -> str:
+        pass
+
+    def book_meta(self) -> dict:
+        pass
+
+    def before_download_chapter(self):
+        pass
+
+    def get_cover(self):
+        pass
+
+    def before_file_save(self, url, idx) -> str:  # return url !
+        return url
+
+    def after_file_save(self, _path: str, idx: int):
+        pass
+
+    @abstractmethod
+    def chapter_for_json(self) -> str:
+        pass
--- a/manga-py-stable_1.x/manga_py/base_classes/archive.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/archive.py
@@ -0,0 +1,82 @@
+from os import path
+from zipfile import ZipFile, ZIP_DEFLATED
+
+from manga_py.fs import is_file, make_dirs, basename, dirname, unlink, get_temp_path
+# from PIL import Image as PilImage
+from manga_py.image import Image
+
+
+class Archive:
+    _archive = None
+    _writes = None
+    files = None
+    not_change_files_extension = False
+    no_webp = False
+    has_error = False
+
+    def __init__(self):
+        self.files = []
+        self._writes = {}
+
+    def write_file(self, data, in_arc_name):
+        self._writes[in_arc_name] = data
+
+    def add_file(self, file, in_arc_name=None):
+        if in_arc_name is None:
+            in_arc_name = basename(file)
+        self.files.append((file, in_arc_name))
+
+    def set_files_list(self, files):
+        self.files = files
+
+    def add_book_info(self, data):
+        self.write_file('comicbook.xml', data)
+
+    def __add_files(self):
+        for file in self.files:
+            if is_file(file[0]):
+                ext = self.__update_image_extension(file[0])
+                if self.no_webp and ext[ext.rfind('.'):] == '.webp':
+                    jpeg = ext[:ext.rfind('.')] + '.jpeg'
+                    jpeg_path = path.join(dirname(file[0]), jpeg)
+                    Image(file[0]).convert(jpeg_path)
+                    file = jpeg_path, jpeg
+                elif ext:
+                    file = file[0], ext
+                self._archive.write(*file)
+
+    def __add_writes(self):
+        for file in self._writes:
+            self._archive.writestr(file, self._writes[file])
+
+    def add_info(self, data):
+        self.write_file(data, 'info.txt')
+
+    def make(self, dst):
+        if not len(self.files) and not len(self._writes):
+            return
+
+        make_dirs(dirname(dst))
+
+        self._archive = ZipFile(dst, 'w', ZIP_DEFLATED)
+        try:
+            self.__add_files()
+            self.__add_writes()
+            self._archive.close()
+        except OSError as e:
+            self._archive.close()
+            raise e
+        self._archive.close()
+        self._maked()
+
+    def _maked(self):
+        for file in self.files:
+            unlink(file[0])
+
+    def __update_image_extension(self, filename) -> str:
+        fn, extension = path.splitext(filename)
+        if not self.not_change_files_extension:
+            ext = Image.real_extension(get_temp_path(filename))
+            if ext:
+                extension = ext
+        return basename(fn + extension)
--- a/manga-py-stable_1.x/manga_py/base_classes/base.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/base.py
@@ -0,0 +1,157 @@
+import re
+from os import path
+from sys import stderr
+
+from loguru import logger
+from lxml.html import HtmlElement
+
+from manga_py.http import Http
+from manga_py.image import Image
+
+
+class Base:
+    _storage = None
+    _params = None
+    _image_params = None
+    _http_kwargs = None
+    __http = None
+
+    def __init__(self):
+
+        self._storage = {
+            'cookies': {},
+            'main_content': None,
+            'chapters': [],
+            'current_chapter': 0,
+            'current_file': 0,
+            'proxies': {},
+            'domain_uri': None,
+        }
+        self._params = {
+            'destination': 'Manga',
+            'cf-protect': False,
+        }
+        self._image_params = {
+            'crop': (0, 0, 0, 0),
+            # 'crop': (left, upper, right, lower)
+            'auto_crop': False,
+            # 'auto_crop': True,
+        }
+        self._http_kwargs = {}
+
+    def _archive_type(self):
+        arc_type = 'zip'
+        if self._params['cbz']:
+            arc_type = 'cbz'
+        return arc_type
+
+    def get_url(self):
+        return self._params['url']
+
+    @property
+    def domain(self) -> str:
+        try:
+            if not self._storage.get('domain_uri', None):
+                self._storage['domain_uri'] = re.search('(https?://[^/]+)', self._params['url']).group(1)
+            return self._storage.get('domain_uri', '')
+        except Exception:
+            print('url is broken!', file=stderr)
+            exit()
+
+    @staticmethod
+    def image_auto_crop(src_path, dest_path=None):
+        image = Image(src_path=src_path)
+        image.crop_auto(dest_path=dest_path)
+        image.close()
+
+    def image_manual_crop(self, src_path, dest_path=None):  # sizes: (left, top, right, bottom)
+        if isinstance(self._image_params['crop'], tuple) != (0, 0, 0, 0):
+            image = Image(src_path=src_path)
+            image.crop_manual_with_offsets(offsets=self._image_params['crop'], dest_path=dest_path)
+            image.close()
+
+    def _build_http_params(self, params):
+        if params is None:
+            params = {}
+        params.setdefault('allow_webp', not self._params.get('disallow_webp', None))
+        params.setdefault('referer', self._storage.get('referer', self.domain))
+        params.setdefault('user_agent', self._get_user_agent())
+        params.setdefault('proxies', self._storage.get('proxies', None))
+        params.setdefault('cookies', self._storage.get('cookies', None))
+        params.setdefault('kwargs', self._http_kwargs)
+        return params
+
+    def http(self, new=False, params=None) -> Http:
+        http_params = self._build_http_params(params)
+        if new:
+            http = Http(**http_params)
+            return http
+        elif not self.__http:
+            self.__http = Http(**http_params)
+        return self.__http
+
+    def http_get(self, url: str, headers: dict = None, cookies: dict = None):
+        return self.http().get(url=url, headers=headers, cookies=cookies)
+
+    def http_post(self, url: str, headers: dict = None, cookies: dict = None, data=()):
+        return self.http().post(url=url, headers=headers, cookies=cookies, data=data)
+
+    def _get_user_agent(self):
+        ua_storage = self._storage.get('user_agent', None)
+        ua_params = self._params.get('user_agent', None)
+        if self._params.get('cf_protect', False):
+            return ua_storage
+        return ua_params
+
+    @property
+    def chapter_id(self):
+        return self._storage.get('current_chapter', 0)
+
+    @chapter_id.setter
+    def chapter_id(self, idx):
+        self._storage['current_chapter'] = idx
+
+    @classmethod
+    def __normalize_chapters(cls, n, element):
+        if isinstance(element, HtmlElement):
+            return n(element.get('href'))
+        if isinstance(element, str):
+            return n(element)
+        return element
+
+    def _prepare_chapters(self, chapters):
+        n = self.http().normalize_uri
+        items = []
+        if chapters and len(chapters):
+            for i in chapters:
+                url = self.__normalize_chapters(n, i)
+                items.append(url)
+        else:
+            logger.warning('Chapters list empty. Check %s' % self.get_url())
+        return items
+
+    @property
+    def chapter(self):
+        return self._storage['chapters'][self.chapter_id]
+
+    def get_current_file(self):
+        return self._storage['files'][self._storage['current_file']]
+
+    def book_meta(self) -> dict:
+        return {}
+
+    def _image_name(self, idx, filename):
+        if idx is None:
+            idx = self._storage['current_file']
+        fn, extension = path.splitext(filename)
+        _path = '{:0>3}_{}'.format(idx, fn)
+        if self._params['rename_pages']:
+            _path = '{:0>3}'.format(idx)
+        return _path + extension
+
+    def chapter_for_json(self) -> str:
+        return self.chapter
+
+    def put_info_json(self, meta):
+        # manga_name, url, directory
+        pass
--- a/manga-py-stable_1.x/manga_py/base_classes/callbacks.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/callbacks.py
@@ -0,0 +1,35 @@
+from typing import Callable
+
+
+class Callbacks:
+    def _call_files_progress_callback(self):
+        if callable(self.progress):
+            _max, _current = len(self._storage['files']), self._storage['current_file']
+            self.progress(_max, _current, _current < 1)
+
+    def set_quest_callback(self, callback: Callable):  # Required call from initiator (CLI, GUI)
+        setattr(self, 'quest', callback)
+
+    def set_progress_callback(self, callback: Callable):  # Required call from initiator (CLI, GUI)
+        setattr(self, 'progress', callback)
+
+    def set_log_callback(self, callback: Callable):  # Required call from initiator (CLI, GUI)
+        setattr(self, 'log', callback)
+
+    def set_quest_password_callback(self, callback: Callable):  # Required call from iterator (CLI, GUI)
+        setattr(self, 'quest_password', callback)
+
+    def quest(self, *args, **kwargs):
+        pass
+
+    def quest_password(self, *args, **kwargs):
+        pass
+
+    def progress(self, *args, **kwargs):
+        pass
+
+    def log(self, *args, **kwargs):
+        pass
+
+    def book_meta(self) -> dict:
+        return {}
--- a/manga-py-stable_1.x/manga_py/base_classes/cf_protect.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/cf_protect.py
@@ -0,0 +1,18 @@
+from sys import stderr
+
+import cloudscraper
+
+
+class CloudFlareProtect:
+    protector = []
+
+    def run(self, url):  # pragma: no cover
+
+        if not self.protector:
+            scraper = cloudscraper.create_scraper()
+            try:
+                self.protector = scraper.get_tokens(url)
+            except Exception as e:
+                print('CF error! %s' % e, file=stderr)
+
+        return self.protector
--- a/manga-py-stable_1.x/manga_py/base_classes/chapter_helper.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/chapter_helper.py
@@ -0,0 +1,15 @@
+# cli chapters parser
+class ChapterHelper:
+    chapters = ''
+
+    def __init__(self, chapters: str):
+        self.chapters = chapters
+        if isinstance(self.chapters, str):
+            self.chapters = self.chapters.split(' ')
+
+    def get_chapters(self, urls):
+        chapters = []
+        for i, url in enumerate(urls):
+            if i in self.chapters:
+                chapters.append(urls)
+        return chapters
--- a/manga-py-stable_1.x/manga_py/base_classes/static.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/static.py
@@ -0,0 +1,41 @@
+from lxml.html import document_fromstring
+from purifier.purifier import HTMLPurifier
+
+
+class Static:
+
+    @staticmethod
+    def _clear_html(body):
+        purifier = HTMLPurifier({
+            'div': ['*'], 'span': ['*'],
+            'img': ['*'], 'a': ['*'],
+            'h1': ['*'], 'h2': ['*'],
+            'h3': ['*'], 'h4': ['*'],
+            'h5': ['*'], 'h6': ['*'],
+        })
+        return purifier.feed(body)
+
+    @staticmethod
+    def document_fromstring(body, selector: str = None, idx: int = None):  # pragma: no cover
+        result = document_fromstring(body)  # todo
+        if isinstance(selector, str):
+            result = result.cssselect(selector)
+        if isinstance(idx, int):
+            result = result[idx]
+        return result
+
+    @staticmethod
+    def _set_if_not_none(var, key, value):  # pragma: no cover
+        if value is not None:
+            var[key] = value
+
+    @staticmethod
+    def __test_ascii(i):
+        o = ord(i)
+        _ = 39 < o < 127
+        _ = _ and o not in [42, 47, 92, 94]
+        return _ or o > 161
+
+    @staticmethod
+    def remove_not_ascii(value):
+        return "".join(i for i in value if i == '_' or Static.__test_ascii(i))
--- a/manga-py-stable_1.x/manga_py/base_classes/web_driver.py
+++ b/manga-py-stable_1.x/manga_py/base_classes/web_driver.py
@@ -0,0 +1,48 @@
+from os import chmod
+from sys import platform
+from zipfile import ZipFile
+
+from requests import get
+
+from manga_py.fs import is_file, dirname, path_join, get_util_home_path
+
+
+class WebDriver:
+    driver_version = '2.40'
+
+    @staticmethod
+    def is_win():
+        return ~platform.find('win32')
+
+    def download_drivder(self):
+        url_prefix = 'https://chromedriver.storage.googleapis.com/'
+        url = '/chromedriver_linux64.zip'
+        if ~platform.find('darwin'):
+            url = '/chromedriver_mac64.zip'
+        if self.is_win():
+            url = '/chromedriver_win32.zip'
+
+        path = path_join(get_util_home_path(), 'driver.zip')
+
+        with open(path, 'wb') as driver:
+            driver.write(get(url_prefix + self.driver_version + url).content)
+            driver.close()
+        with ZipFile(path) as file:
+            file.extractall(dirname(self._driver_path()))
+
+    def _driver_path(self):
+        if self.is_win():
+            driver = 'chromedriver.exe'
+        else:
+            driver = 'chromedriver'
+        return path_join(get_util_home_path(), driver)
+
+    def get_driver(self):
+        from selenium import webdriver  # need, if captcha detected
+        driver_path = self._driver_path()
+        if not is_file(driver_path):
+            self.download_drivder()
+        self.is_win() or chmod(driver_path, 0o755)
+        driver = webdriver.Chrome(executable_path=driver_path)
+        driver.set_window_size(500, 600)
+        return driver