Added manga-py source

2019-12-14 22:33:14 -05:00
parent 9a4dd4b09b
commit 45067caea6
420 changed files with 18054 additions and 0 deletions
--- a/manga-py-stable_1.x/manga_py/provider.py
+++ b/manga-py-stable_1.x/manga_py/provider.py
@@ -0,0 +1,274 @@
+import json
+import re
+from abc import ABC
+from sys import stderr
+
+from .base_classes import (
+    Abstract,
+    Archive,
+    Base,
+    Callbacks,
+    # TODO
+    CloudFlareProtect,
+    Static
+)
+from .fs import (
+    get_temp_path,
+    is_file,
+    basename,
+    remove_file_query_params,
+    path_join,
+    unlink,
+    file_size,
+)
+from .http import MultiThreads
+from .info import Info
+from .meta import __downloader_uri__
+from .meta import __version__
+
+
+class Provider(Base, Abstract, Static, Callbacks, ABC):
+    _volumes_count = 0
+    _archive = None
+    _zero_fill = False
+    _with_manga_name = False
+    _info = None
+    _simulate = False
+    _volume = None
+    _show_chapter_info = False
+    __debug = False
+    _override_name = ''
+
+    def __init__(self, info: Info = None):
+        super().__init__()
+        self.re = re
+        self.json = json
+        self._params['temp_directory'] = get_temp_path()
+        self._info = info
+
+    def _params_parser(self, params):
+        # image params
+        self._set_if_not_none(self._image_params, 'crop_blank', params.get('crop_blank', False))
+        self._set_if_not_none(
+            self._image_params, 'crop',
+            (params.get('xt', 0),
+             params.get('xr', 0),
+             params.get('xb', 0),
+             params.get('xl', 0)),
+        )
+        self._image_params['no_webp'] = params.get('no_webp', False)
+        # downloading params
+        self._set_if_not_none(self._params, 'destination', params.get('destination', None))
+        self._zero_fill = params.get('zero_fill')
+        self._with_manga_name = params.get('with_manga_name')
+        self._simulate = params.get('simulate')
+        self._show_chapter_info = params.get('show_current_chapter_info', False)
+        self.__debug = params.get('debug', False)
+        self._override_name = self._params.get('override_archive_name')
+        if self._with_manga_name and self._override_name:
+            raise RuntimeError('Conflict of parameters. Please use only --with-manga-name, or --override-archive-name')
+
+    def process(self, url, params=None):  # Main method
+        self._params['url'] = url
+        params = params if isinstance(params, dict) else {}
+        self._params_parser(params)
+        for i in params:
+            self._params.setdefault(i, params[i])
+
+        proxy = params.get('proxy', None)
+        if proxy is not None:
+            self._storage['proxies'] = {
+                'http': proxy,
+                'https': proxy,
+            }
+
+        self.prepare_cookies()
+        self._storage['manga_name'] = self.get_manga_name()
+        self._storage['main_content'] = self.content
+        self._storage['chapters'] = self._prepare_chapters(self.get_chapters())
+
+        if not self._params.get('reverse_downloading', False):
+            self._storage['chapters'] = self._storage['chapters'][::-1]
+
+        self._storage['init_cookies'] = self._storage['cookies']
+        self._info and self._info.set_ua(self.http().user_agent)
+
+        self.loop_chapters()
+
+    def _check_archive(self):
+        # check
+        _path = self.get_archive_path()
+        not_allow_archive = not self._params.get('rewrite_exists_archives', False)
+
+        return not_allow_archive and is_file(_path)
+
+    def _download_chapter(self):
+        if not self._simulate:
+            try:
+                self.before_download_chapter()
+                self._storage['files'] = self.get_files()
+                self.loop_files()
+            except Exception as e:
+                # Main debug here
+                if self.__debug:
+                    raise e
+                self.log([e], file=stderr)
+                self._info.set_last_volume_error(e)
+
+    def loop_chapters(self):
+        volumes = self._storage['chapters']
+        _min = self._params.get('skip_volumes', 0)
+        _max = self._params.get('max_volumes', 0)
+        count = 0  # count downloaded chapters
+        for idx, __url in enumerate(volumes):
+            self.chapter_id = idx
+            if idx < _min or (count >= _max > 0) or self._check_archive():
+                continue
+            count += 1
+            self._info.add_volume(self.chapter_for_json(), self.get_archive_path())
+            self._download_chapter()
+
+    def loop_files(self):
+        if isinstance(self._storage['files'], list):
+            if self._show_chapter_info:
+                self.log('\n\nCurrent chapter url: %s\n' % (self.chapter,))
+            if len(self._storage['files']) == 0:
+                # see Std
+                self.log('Error processing file: %s' % self.get_archive_name(), file=stderr)
+                return
+            self._archive = Archive()
+            self._archive.not_change_files_extension = self._params.get('not_change_files_extension', False)
+            self._archive.no_webp = self._image_params.get('no_webp', False)
+            self._call_files_progress_callback()
+
+            self._multi_thread_save(self._storage['files'])
+
+            self.make_archive()
+
+    def _save_file_params_helper(self, url, idx):
+        if url is None:
+            _url = self.http().normalize_uri(self.get_current_file())
+        else:
+            _url = url
+        _url = self.before_file_save(_url, idx)
+        filename = remove_file_query_params(basename(_url))
+        _path = self.remove_not_ascii(self._image_name(idx, filename))
+        _path = get_temp_path(_path)
+        return _path, idx, _url
+
+    def save_file(self, idx=None, callback=None, url=None, in_arc_name=None):
+        _path, idx, _url = self._save_file_params_helper(url, idx)
+
+        if not is_file(_path) or file_size(_path) < 32:
+            self.http().download_file(_url, _path, idx)
+        self.after_file_save(_path, idx)
+        self._archive.add_file(_path)
+
+        callable(callback) and callback()
+
+        return _path
+
+    def get_archive_path(self):
+        if self._override_name:
+            _path = "{}_{}".format(self._override_name, str(self.normal_arc_name(self.get_chapter_index().split('-'))))
+        else:
+            # see Std
+            _path = remove_file_query_params(self.get_archive_name())
+        _path = self.remove_not_ascii(_path)
+
+        if not _path:
+            _path = str(self.chapter_id)
+
+        name = self._params.get('name', '')
+        if not len(name):
+            name = self._storage['manga_name']
+
+        additional_data_name = ''
+        if self.http().has_error:
+            additional_data_name = 'ERROR.'
+            self.http().has_error = False
+
+        return path_join(
+            self._params.get('destination', 'Manga'),
+            name,
+            _path + '.%s%s' % (additional_data_name, self._archive_type())
+        ) \
+            .replace('?', '_') \
+            .replace('"', '_') \
+            .replace('>', '_') \
+            .replace('<', '_') \
+            .replace('|', '_')  # Windows...
+
+    def make_archive(self):
+        _path = self.get_archive_path()
+
+        info = 'Site: {}\nDownloader: {}\nVersion: {}'.format(self.get_url(), __downloader_uri__, __version__)
+
+        # """
+        # make book info
+        # """
+        # if self._params['cbz']:
+        #     self._archive.add_book_info(self._arc_meta_info())
+
+        self._archive.add_info(info)
+        try:
+            self._archive.make(_path)
+        except OSError as e:
+            self.log('')
+            self.log(e)
+            self.log(e, file=stderr)
+            self._info.set_last_volume_error(str(e))
+            unlink(_path)
+            raise e
+
+    def html_fromstring(self, url, selector: str = None, idx: int = None):
+        params = {}
+        if isinstance(url, dict):
+            params = url['params']
+            url = url['url']
+        return self.document_fromstring(self.http_get(url, **params), selector, idx)
+
+    def _multi_thread_callback(self):
+        self._call_files_progress_callback()
+        self._storage['current_file'] += 1
+
+    def _multi_thread_save(self, files):
+        threading = MultiThreads()
+        # hack
+        self._storage['current_file'] = 0
+        if self._params.get('max_threads', None) is not None:
+            threading.max_threads = int(self._params.get('max_threads'))
+        for idx, url in enumerate(files):
+            threading.add(self.save_file, (idx, self._multi_thread_callback, url, None))
+
+        threading.start()
+
+    def cf_protect(self, url):
+        """
+        WARNING! Thins function replace cookies!
+        :param url: str
+        :return:
+        """
+        cf = CloudFlareProtect()
+        params = cf.run(url)
+        if len(params):
+            self.update_cookies(params[0])
+            self.update_ua(params[1])
+            self._params['cf-protect'] = True
+
+    def update_ua(self, ua):
+        self._storage['user_agent'] = ua
+        self.http().user_agent = ua
+        self._info and self._info.set_ua(ua)
+
+    def update_cookies(self, cookies):
+        for k in cookies:
+            self._storage['cookies'][k] = cookies[k]
+            self.http().cookies[k] = cookies[k]
+
+    @property
+    def content(self):
+        content = self._storage.get('main_content', None)
+        if content is None:
+            content = self.get_main_content()
+        return content