158 lines
4.8 KiB
Python
158 lines
4.8 KiB
Python
import re
|
|
from os import path
|
|
from sys import stderr
|
|
|
|
from loguru import logger
|
|
from lxml.html import HtmlElement
|
|
|
|
from manga_py.http import Http
|
|
from manga_py.image import Image
|
|
|
|
|
|
class Base:
|
|
_storage = None
|
|
_params = None
|
|
_image_params = None
|
|
_http_kwargs = None
|
|
__http = None
|
|
|
|
def __init__(self):
|
|
|
|
self._storage = {
|
|
'cookies': {},
|
|
'main_content': None,
|
|
'chapters': [],
|
|
'current_chapter': 0,
|
|
'current_file': 0,
|
|
'proxies': {},
|
|
'domain_uri': None,
|
|
}
|
|
self._params = {
|
|
'destination': 'Manga',
|
|
'cf-protect': False,
|
|
}
|
|
self._image_params = {
|
|
'crop': (0, 0, 0, 0),
|
|
# 'crop': (left, upper, right, lower)
|
|
'auto_crop': False,
|
|
# 'auto_crop': True,
|
|
}
|
|
self._http_kwargs = {}
|
|
|
|
def _archive_type(self):
|
|
arc_type = 'zip'
|
|
if self._params['cbz']:
|
|
arc_type = 'cbz'
|
|
return arc_type
|
|
|
|
def get_url(self):
|
|
return self._params['url']
|
|
|
|
@property
|
|
def domain(self) -> str:
|
|
try:
|
|
if not self._storage.get('domain_uri', None):
|
|
self._storage['domain_uri'] = re.search('(https?://[^/]+)', self._params['url']).group(1)
|
|
return self._storage.get('domain_uri', '')
|
|
except Exception:
|
|
print('url is broken!', file=stderr)
|
|
exit()
|
|
|
|
@staticmethod
|
|
def image_auto_crop(src_path, dest_path=None):
|
|
image = Image(src_path=src_path)
|
|
image.crop_auto(dest_path=dest_path)
|
|
image.close()
|
|
|
|
def image_manual_crop(self, src_path, dest_path=None): # sizes: (left, top, right, bottom)
|
|
if isinstance(self._image_params['crop'], tuple) != (0, 0, 0, 0):
|
|
image = Image(src_path=src_path)
|
|
image.crop_manual_with_offsets(offsets=self._image_params['crop'], dest_path=dest_path)
|
|
image.close()
|
|
|
|
def _build_http_params(self, params):
|
|
if params is None:
|
|
params = {}
|
|
params.setdefault('allow_webp', not self._params.get('disallow_webp', None))
|
|
params.setdefault('referer', self._storage.get('referer', self.domain))
|
|
params.setdefault('user_agent', self._get_user_agent())
|
|
params.setdefault('proxies', self._storage.get('proxies', None))
|
|
params.setdefault('cookies', self._storage.get('cookies', None))
|
|
params.setdefault('kwargs', self._http_kwargs)
|
|
return params
|
|
|
|
def http(self, new=False, params=None) -> Http:
|
|
http_params = self._build_http_params(params)
|
|
if new:
|
|
http = Http(**http_params)
|
|
return http
|
|
elif not self.__http:
|
|
self.__http = Http(**http_params)
|
|
return self.__http
|
|
|
|
def http_get(self, url: str, headers: dict = None, cookies: dict = None):
|
|
return self.http().get(url=url, headers=headers, cookies=cookies)
|
|
|
|
def http_post(self, url: str, headers: dict = None, cookies: dict = None, data=()):
|
|
return self.http().post(url=url, headers=headers, cookies=cookies, data=data)
|
|
|
|
def _get_user_agent(self):
|
|
ua_storage = self._storage.get('user_agent', None)
|
|
ua_params = self._params.get('user_agent', None)
|
|
if self._params.get('cf_protect', False):
|
|
return ua_storage
|
|
return ua_params
|
|
|
|
@property
|
|
def chapter_id(self):
|
|
return self._storage.get('current_chapter', 0)
|
|
|
|
@chapter_id.setter
|
|
def chapter_id(self, idx):
|
|
self._storage['current_chapter'] = idx
|
|
|
|
@classmethod
|
|
def __normalize_chapters(cls, n, element):
|
|
if isinstance(element, HtmlElement):
|
|
return n(element.get('href'))
|
|
if isinstance(element, str):
|
|
return n(element)
|
|
return element
|
|
|
|
def _prepare_chapters(self, chapters):
|
|
n = self.http().normalize_uri
|
|
items = []
|
|
if chapters and len(chapters):
|
|
for i in chapters:
|
|
url = self.__normalize_chapters(n, i)
|
|
items.append(url)
|
|
else:
|
|
logger.warning('Chapters list empty. Check %s' % self.get_url())
|
|
return items
|
|
|
|
@property
|
|
def chapter(self):
|
|
return self._storage['chapters'][self.chapter_id]
|
|
|
|
def get_current_file(self):
|
|
return self._storage['files'][self._storage['current_file']]
|
|
|
|
def book_meta(self) -> dict:
|
|
return {}
|
|
|
|
def _image_name(self, idx, filename):
|
|
if idx is None:
|
|
idx = self._storage['current_file']
|
|
fn, extension = path.splitext(filename)
|
|
_path = '{:0>3}_{}'.format(idx, fn)
|
|
if self._params['rename_pages']:
|
|
_path = '{:0>3}'.format(idx)
|
|
return _path + extension
|
|
|
|
def chapter_for_json(self) -> str:
|
|
return self.chapter
|
|
|
|
def put_info_json(self, meta):
|
|
# manga_name, url, directory
|
|
pass
|