import json import re from abc import ABC from sys import stderr from .base_classes import ( Abstract, Archive, Base, Callbacks, # TODO CloudFlareProtect, Static ) from .fs import ( get_temp_path, is_file, basename, remove_file_query_params, path_join, unlink, file_size, ) from .http import MultiThreads from .info import Info from .meta import __downloader_uri__ from .meta import __version__ class Provider(Base, Abstract, Static, Callbacks, ABC): _volumes_count = 0 _archive = None _zero_fill = False _with_manga_name = False _info = None _simulate = False _volume = None _show_chapter_info = False __debug = False _override_name = '' def __init__(self, info: Info = None): super().__init__() self.re = re self.json = json self._params['temp_directory'] = get_temp_path() self._info = info def _params_parser(self, params): # image params self._set_if_not_none(self._image_params, 'crop_blank', params.get('crop_blank', False)) self._set_if_not_none( self._image_params, 'crop', (params.get('xt', 0), params.get('xr', 0), params.get('xb', 0), params.get('xl', 0)), ) self._image_params['no_webp'] = params.get('no_webp', False) # downloading params self._set_if_not_none(self._params, 'destination', params.get('destination', None)) self._zero_fill = params.get('zero_fill') self._with_manga_name = params.get('with_manga_name') self._simulate = params.get('simulate') self._show_chapter_info = params.get('show_current_chapter_info', False) self.__debug = params.get('debug', False) self._override_name = self._params.get('override_archive_name') if self._with_manga_name and self._override_name: raise RuntimeError('Conflict of parameters. Please use only --with-manga-name, or --override-archive-name') def process(self, url, params=None): # Main method self._params['url'] = url params = params if isinstance(params, dict) else {} self._params_parser(params) for i in params: self._params.setdefault(i, params[i]) proxy = params.get('proxy', None) if proxy is not None: self._storage['proxies'] = { 'http': proxy, 'https': proxy, } self.prepare_cookies() self._storage['manga_name'] = self.get_manga_name() self._storage['main_content'] = self.content self._storage['chapters'] = self._prepare_chapters(self.get_chapters()) if not self._params.get('reverse_downloading', False): self._storage['chapters'] = self._storage['chapters'][::-1] self._storage['init_cookies'] = self._storage['cookies'] self._info and self._info.set_ua(self.http().user_agent) self.loop_chapters() def _check_archive(self): # check _path = self.get_archive_path() not_allow_archive = not self._params.get('rewrite_exists_archives', False) return not_allow_archive and is_file(_path) def _download_chapter(self): if not self._simulate: try: self.before_download_chapter() self._storage['files'] = self.get_files() self.loop_files() except Exception as e: # Main debug here if self.__debug: raise e self.log([e], file=stderr) self._info.set_last_volume_error(e) def loop_chapters(self): volumes = self._storage['chapters'] _min = self._params.get('skip_volumes', 0) _max = self._params.get('max_volumes', 0) count = 0 # count downloaded chapters for idx, __url in enumerate(volumes): self.chapter_id = idx if idx < _min or (count >= _max > 0) or self._check_archive(): continue count += 1 self._info.add_volume(self.chapter_for_json(), self.get_archive_path()) self._download_chapter() def loop_files(self): if isinstance(self._storage['files'], list): if self._show_chapter_info: self.log('\n\nCurrent chapter url: %s\n' % (self.chapter,)) if len(self._storage['files']) == 0: # see Std self.log('Error processing file: %s' % self.get_archive_name(), file=stderr) return self._archive = Archive() self._archive.not_change_files_extension = self._params.get('not_change_files_extension', False) self._archive.no_webp = self._image_params.get('no_webp', False) self._call_files_progress_callback() self._multi_thread_save(self._storage['files']) self.make_archive() def _save_file_params_helper(self, url, idx): if url is None: _url = self.http().normalize_uri(self.get_current_file()) else: _url = url _url = self.before_file_save(_url, idx) filename = remove_file_query_params(basename(_url)) _path = self.remove_not_ascii(self._image_name(idx, filename)) _path = get_temp_path(_path) return _path, idx, _url def save_file(self, idx=None, callback=None, url=None, in_arc_name=None): _path, idx, _url = self._save_file_params_helper(url, idx) if not is_file(_path) or file_size(_path) < 32: self.http().download_file(_url, _path, idx) self.after_file_save(_path, idx) self._archive.add_file(_path) callable(callback) and callback() return _path def get_archive_path(self): if self._override_name: _path = "{}_{}".format(self._override_name, str(self.normal_arc_name(self.get_chapter_index().split('-')))) else: # see Std _path = remove_file_query_params(self.get_archive_name()) _path = self.remove_not_ascii(_path) if not _path: _path = str(self.chapter_id) name = self._params.get('name', '') if not len(name): name = self._storage['manga_name'] additional_data_name = '' if self.http().has_error: additional_data_name = 'ERROR.' self.http().has_error = False return path_join( self._params.get('destination', 'Manga'), name, _path + '.%s%s' % (additional_data_name, self._archive_type()) ) \ .replace('?', '_') \ .replace('"', '_') \ .replace('>', '_') \ .replace('<', '_') \ .replace('|', '_') # Windows... def make_archive(self): _path = self.get_archive_path() info = 'Site: {}\nDownloader: {}\nVersion: {}'.format(self.get_url(), __downloader_uri__, __version__) # """ # make book info # """ # if self._params['cbz']: # self._archive.add_book_info(self._arc_meta_info()) self._archive.add_info(info) try: self._archive.make(_path) except OSError as e: self.log('') self.log(e) self.log(e, file=stderr) self._info.set_last_volume_error(str(e)) unlink(_path) raise e def html_fromstring(self, url, selector: str = None, idx: int = None): params = {} if isinstance(url, dict): params = url['params'] url = url['url'] return self.document_fromstring(self.http_get(url, **params), selector, idx) def _multi_thread_callback(self): self._call_files_progress_callback() self._storage['current_file'] += 1 def _multi_thread_save(self, files): threading = MultiThreads() # hack self._storage['current_file'] = 0 if self._params.get('max_threads', None) is not None: threading.max_threads = int(self._params.get('max_threads')) for idx, url in enumerate(files): threading.add(self.save_file, (idx, self._multi_thread_callback, url, None)) threading.start() def cf_protect(self, url): """ WARNING! Thins function replace cookies! :param url: str :return: """ cf = CloudFlareProtect() params = cf.run(url) if len(params): self.update_cookies(params[0]) self.update_ua(params[1]) self._params['cf-protect'] = True def update_ua(self, ua): self._storage['user_agent'] = ua self.http().user_agent = ua self._info and self._info.set_ua(ua) def update_cookies(self, cookies): for k in cookies: self._storage['cookies'][k] = cookies[k] self.http().cookies[k] = cookies[k] @property def content(self): content = self._storage.get('main_content', None) if content is None: content = self.get_main_content() return content