diff --git a/README.md b/README.md index faf13a0f..9e5ea2c6 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ $ jmcomic 422866 - **支持Plugin插件,可以方便地扩展功能,以及使用别人的插件** - 目前内置支持的插件有:`登录插件` `硬件占用监控插件` `只下载新章插件` `压缩文件插件` `下载特定后缀图片插件` `发送QQ邮件插件` `日志主题过滤插件` `自动使用浏览器cookies插件` - 支持自定义本子/章节/图片下载前后的回调函数 - - 支持自定义debug/logging + - 支持自定义日志 - 支持自定义类:`Downloader(负责调度)` `Option(负责配置)` `Client(负责请求)` `实体类`等 ## 进阶使用 diff --git a/assets/docs/sources/TODO.md b/assets/docs/sources/TODO.md index 8e73ae20..45ea7f7c 100644 --- a/assets/docs/sources/TODO.md +++ b/assets/docs/sources/TODO.md @@ -2,7 +2,8 @@ | 版本范围 | 更新内容 | |:--------:|:--------------------------------------:| -| v2.3.* | 实现移动端API的基础功能,统一HTML和API的实现 | +| v2.4.* | 项目实现基本稳定,进入维护期,按需增加功能。 | +| v2.3.* | 实现移动端API的基础功能,统一HTML和API的实现。 | | v2.2.* | 新的插件体系,新的命令行调用,完善搜索功能。 | | v2.1.* | 拆分Downloader抽象调度,优化可扩展性、代码复用性、模块级别自定义。 | | v2.0.* | 重新设计合理的抽象层次,实现请求重试切换域名机制,新的option配置设计。 | diff --git a/assets/docs/sources/index.md b/assets/docs/sources/index.md index 021f1a4c..2083c90c 100644 --- a/assets/docs/sources/index.md +++ b/assets/docs/sources/index.md @@ -29,9 +29,9 @@ Python API for JMComic(禁漫天堂) - Supports Plugin for easy functionality extension and use of other plugins. - Currently built-in - plugins: `login plugin`, `hardware usage monitoring plugin`, `only download new chapters plugin`, `zip compression plugin`, `image suffix filter plugin` `send qq email plugin` `debug logging topic filter plugin` `auto set browser cookies plugin`. + plugins: `login plugin`, `hardware usage monitoring plugin`, `only download new chapters plugin`, `zip compression plugin`, `image suffix filter plugin` `send qq email plugin` `logging topic filter plugin` `auto set browser cookies plugin`. - Supports custom callback functions before and after downloading album/chapter/images. - - Supports custom debug logging. + - Supports custom logging. - Supports custom core classes: `Downloader (responsible for scheduling)`, `Option (responsible for configuration)`, `Client (responsible for requests)`, `entity classes`, and more. diff --git a/assets/docs/sources/tutorial/4_module_custom.md b/assets/docs/sources/tutorial/4_module_custom.md index 60a66a18..1f9fafae 100644 --- a/assets/docs/sources/tutorial/4_module_custom.md +++ b/assets/docs/sources/tutorial/4_module_custom.md @@ -140,27 +140,27 @@ def custom_album_photo_image_detail_class(): -## 自定义debug +## 自定义log ```python -def custom_jm_debug(): +def custom_jm_log(): """ - 该函数演示自定义debug + 该函数演示自定义log """ - # jmcomic模块在运行过程中会使用 jm_debug() 这个函数进行打印信息 - # jm_debug() 这个函数 最后会调用 JmModuleConfig.debug_executor 函数 - # 你可以写一个自己的函数,替换 JmModuleConfig.debug_executor,实现自定义debug + # jmcomic模块在运行过程中会使用 jm_log() 这个函数进行打印信息 + # jm_log() 这个函数 最后会调用 JmModuleConfig.log_executor 函数 + # 你可以写一个自己的函数,替换 JmModuleConfig.log_executor,实现自定义log - # 1. 自定义debug函数 - def my_debug(topic: str, msg: str): + # 1. 自定义log函数 + def my_log(topic: str, msg: str): """ - 这个debug函数的参数列表必须包含两个参数,topic和msg - @param topic: debug主题,例如 'album.before', 'req.error', 'plugin.error' - @param msg: 具体debug的信息 + 这个log函数的参数列表必须包含两个参数,topic和msg + @param topic: log主题,例如 'album.before', 'req.error', 'plugin.error' + @param msg: 具体log的信息 """ pass - # 2. 让my_debug生效 - JmModuleConfig.debug_executor = my_debug + # 2. 让my_log生效 + JmModuleConfig.log_executor = my_log ``` \ No newline at end of file diff --git a/assets/docs/sources/tutorial/8_pick_domain.md b/assets/docs/sources/tutorial/8_pick_domain.md index 3e521b70..c94a13ec 100644 --- a/assets/docs/sources/tutorial/8_pick_domain.md +++ b/assets/docs/sources/tutorial/8_pick_domain.md @@ -13,7 +13,7 @@ meta_data = { # 'proxies': ProxyBuilder.clash_proxy() } -disable_jm_debug() +disable_jm_log() def get_domain_ls(): diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index 558a3213..42a4708a 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,7 +2,7 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.3.17' +__version__ = '2.4.1' from .api import * from .jm_plugin import * diff --git a/src/jmcomic/cl.py b/src/jmcomic/cl.py index 909630f9..0d9eb057 100644 --- a/src/jmcomic/cl.py +++ b/src/jmcomic/cl.py @@ -77,8 +77,8 @@ def parse(text): def main(self): self.parse_arg() - from .api import jm_debug - jm_debug('command_line', + from .api import jm_log + jm_log('command_line', f'start downloading...\n' f'- using option: [{self.option_path or "default"}]\n' f'to be downloaded: \n' diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index 5e1f4219..87507974 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -1,7 +1,7 @@ from .jm_client_interface import * -# 抽象基类,实现了域名管理,发请求,重试机制,debug,缓存等功能 +# 抽象基类,实现了域名管理,发请求,重试机制,log,缓存等功能 class AbstractJmClient( JmcomicClient, PostmanProxy, @@ -79,20 +79,20 @@ def request_with_retry(self, api_path=url, domain=self.domain_list[domain_index], ) - jm_debug(self.debug_topic_request(), self.decode(url)) + jm_log(self.log_topic(), self.decode(url)) else: # 图片url pass if domain_index != 0 or retry_count != 0: - jm_debug(f'req.retry', + jm_log(f'req.retry', ', '.join([ f'次数: [{retry_count}/{self.retry_times}]', f'域名: [{domain_index} of {self.domain_list}]', f'路径: [{url}]', f'参数: [{kwargs if "login" not in url else "#login_form#"}]' ]) - ) + ) try: resp = request(url, **kwargs) @@ -106,12 +106,12 @@ def request_with_retry(self, return self.request_with_retry(request, url, domain_index + 1, 0, judge, **kwargs) # noinspection PyMethodMayBeStatic - def debug_topic_request(self): + def log_topic(self): return self.client_key # noinspection PyMethodMayBeStatic, PyUnusedLocal def before_retry(self, e, kwargs, retry_count, url): - jm_debug('req.error', str(e)) + jm_log('req.error', str(e)) def enable_cache(self): # noinspection PyDefaultArgument,PyShadowingBuiltins @@ -176,7 +176,7 @@ def set_domain_list(self, domain_list: List[str]): # noinspection PyUnusedLocal def fallback(self, request, url, domain_index, retry_count, **kwargs): msg = f"请求重试全部失败: [{url}], {self.domain_list}" - jm_debug('req.fallback', msg) + jm_log('req.fallback', msg) ExceptionTool.raises(msg) # noinspection PyMethodMayBeStatic @@ -190,7 +190,7 @@ def append_params_to_url(self, url, params): # noinspection PyMethodMayBeStatic def decode(self, url: str): - if not JmModuleConfig.decode_url_when_debug or '/search/' not in url: + if not JmModuleConfig.decode_url_when_logging or '/search/' not in url: return url from urllib.parse import unquote @@ -268,10 +268,12 @@ def search(self, def login(self, username, password, - refresh_client_cookies=True, id_remember='on', login_remember='on', ): + """ + 返回response响应对象 + """ data = { 'username': username, @@ -289,11 +291,45 @@ def login(self, if resp.status_code != 301: ExceptionTool.raises_resp(f'登录失败,状态码为{resp.status_code}', resp) - if refresh_client_cookies is True: - self['cookies'] = resp.cookies + orig_cookies = self.get_meta_data('cookies') or {} + new_cookies = dict(resp.cookies) + # 重复登录下存在bug,AVS会丢失 + if 'AVS' in orig_cookies and 'AVS' not in new_cookies: + return resp + + self['cookies'] = new_cookies return resp + def favorite_folder(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ) -> JmFavoritePage: + if username == '': + username = self.get_username_or_raise() + + resp = self.get_jm_html( + f'/user/{username}/favorite/albums', + params={ + 'page': page, + 'o': order_by, + 'folder_id': folder_id, + } + ) + + return JmPageTool.parse_html_to_favorite_page(resp.text) + + # noinspection PyTypeChecker + def get_username_or_raise(self) -> str: + cookies = self.get_meta_data('cookies', None) + if not cookies: + ExceptionTool.raises('未登录,无法获取到对应的用户名,需要传username参数') + + # 解析cookies,可能需要用到 phpserialize,比较麻烦,暂不实现 + ExceptionTool.raises('需要传username参数') + def get_jm_html(self, url, require_200=True, **kwargs): """ 请求禁漫网页的入口 @@ -349,10 +385,10 @@ def album_comment(self, data['is_reply'] = 1 data['forum_subject'] = 1 - jm_debug('album.comment', + jm_log('album.comment', f'{video_id}: [{comment}]' + - (f' to ({comment_id})' if comment_id is not None else '') - ) + (f' to ({comment_id})' if comment_id is not None else '') + ) resp = self.post('/ajax/album_comment', headers=JmModuleConfig.album_comment_headers, @@ -360,7 +396,7 @@ def album_comment(self, ) ret = JmAcResp(resp) - jm_debug('album.comment', f'{video_id}: [{comment}] ← ({ret.model().cid})') + jm_log('album.comment', f'{video_id}: [{comment}] ← ({ret.model().cid})') return ret @@ -370,14 +406,31 @@ def require_resp_success_else_raise(cls, resp, orig_req_url: str): :param resp: 响应对象 :param orig_req_url: /photo/12412312 """ - # 1. 检查是否 album_missing - error_album_missing = '/error/album_missing' - if resp.url.endswith(error_album_missing) and not orig_req_url.endswith(error_album_missing): - ExceptionTool.raise_missing(resp, orig_req_url) + resp_url: str = resp.url - # 2. 是否是特殊的内容 + # 1. 是否是特殊的内容 cls.check_special_text(resp) + # 2. 检查响应发送重定向,重定向url是否表示错误网页,即 /error/xxx + if resp.redirect_count == 0 or '/error/' not in resp_url: + return + + # 3. 检查错误类型 + def match_case(error_path): + return resp_url.endswith(error_path) and not orig_req_url.endswith(error_path) + + # 3.1 album_missing + if match_case('/error/album_missing'): + ExceptionTool.raise_missing(resp, orig_req_url) + + # 3.2 user_missing + if match_case('/error/user_missing'): + ExceptionTool.raises_resp('此用戶名稱不存在,或者你没有登录,請再次確認使用名稱', resp) + + # 3.3 invalid_module + if match_case('/error/invalid_module'): + ExceptionTool.raises_resp('發生了無法預期的錯誤。若問題持續發生,請聯繫客服支援', resp) + @classmethod def check_special_text(cls, resp): html = resp.text @@ -423,6 +476,7 @@ class JmApiClient(AbstractJmClient): API_ALBUM = '/album' API_CHAPTER = '/chapter' API_SCRAMBLE = '/chapter_view_template' + API_FAVORITE = '/favorite' def search(self, search_query: str, @@ -439,7 +493,7 @@ def search(self, 't': time, } - resp = self.get_decode(self.append_params_to_url(self.API_SEARCH, params)) + resp = self.req_api(self.append_params_to_url(self.API_SEARCH, params)) # 直接搜索禁漫车号,发生重定向的响应数据 resp.model_data # { @@ -453,7 +507,7 @@ def search(self, aid = data.redirect_aid return JmSearchPage.wrap_single_album(self.get_album_detail(aid)) - return JmSearchTool.parse_api_resp_to_page(data) + return JmPageTool.parse_api_to_search_page(data) def get_album_detail(self, album_id) -> JmAlbumDetail: return self.fetch_detail_entity(album_id, @@ -475,7 +529,7 @@ def get_photo_detail(self, def get_scramble_id(self, photo_id, album_id=None): """ - 带有缓存的fetch_scramble_id,缓存位于JmModuleConfig.SCRAMBLE_CACHE + 带有缓存的fetch_scramble_id,缓存位于 JmModuleConfig.SCRAMBLE_CACHE """ cache = JmModuleConfig.SCRAMBLE_CACHE if photo_id in cache: @@ -497,7 +551,7 @@ def fetch_detail_entity(self, apid, clazz): """ apid = JmcomicText.parse_to_jm_id(apid) url = self.API_ALBUM if issubclass(clazz, JmAlbumDetail) else self.API_CHAPTER - resp = self.get_decode( + resp = self.req_api( url, params={ 'id': apid, @@ -513,7 +567,7 @@ def fetch_scramble_id(self, photo_id): 请求scramble_id """ photo_id: str = JmcomicText.parse_to_jm_id(photo_id) - resp = self.get_decode( + resp = self.req_api( self.API_SCRAMBLE, params={ "id": photo_id, @@ -528,8 +582,8 @@ def fetch_scramble_id(self, photo_id): None, ) if scramble_id is None: - jm_debug('api.scramble', f'未匹配到scramble_id,响应文本:{resp.text}') - scramble_id = str(JmModuleConfig.SCRAMBLE_220980) + jm_log('api.scramble', f'未匹配到scramble_id,响应文本:{resp.text}') + scramble_id = str(JmMagicConstants.SCRAMBLE_220980) return scramble_id @@ -598,7 +652,7 @@ def setting(self) -> JmApiResp: "float_ad": true } """ - resp = self.get_decode('/setting') + resp = self.req_api('/setting') return resp def login(self, @@ -607,16 +661,69 @@ def login(self, refresh_client_cookies=True, id_remember='on', login_remember='on', - ): - jm_debug('api.login', '禁漫移动端无需登录,调用login不会做任何操作') - pass + ) -> JmApiResp: + """ + { + "uid": "123", + "username": "x", + "email": "x", + "emailverified": "yes", + "photo": "x", + "fname": "", + "gender": "x", + "message": "Welcome x!", + "coin": 123, + "album_favorites": 123, + "s": "x", + "level_name": "x", + "level": 1, + "nextLevelExp": 123, + "exp": "123", + "expPercent": 123, + "badges": [], + "album_favorites_max": 123 + } + + """ + resp = self.req_api('/login', False, data={ + 'username': username, + 'password': password, + }) + + resp.require_success() + cookies = dict(resp.resp.cookies) + cookies.update({'AVS': resp.res_data['s']}) + self['cookies'] = cookies - def get_decode(self, url, **kwargs) -> JmApiResp: + return resp + + def favorite_folder(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ) -> JmFavoritePage: + resp = self.req_api( + self.API_FAVORITE, + params={ + 'page': page, + 'folder_id': folder_id, + 'o': order_by, + } + ) + + return JmPageTool.parse_api_to_favorite_page(resp.model_data) + + def req_api(self, url, get=True, **kwargs) -> JmApiResp: # set headers headers, key_ts = self.headers_key_ts kwargs['headers'] = headers - resp = self.get(url, **kwargs) + if get: + resp = self.get(url, **kwargs) + else: + resp = self.post(url, **kwargs) + return JmApiResp.wrap(resp, key_ts) @property @@ -624,9 +731,6 @@ def headers_key_ts(self): key_ts = time_stamp() return JmModuleConfig.new_api_headers(key_ts), key_ts - def debug_topic_request(self): - return 'api' - @classmethod def require_resp_success(cls, resp: JmApiResp, orig_req_url: str): resp.require_success() diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py index de4ee5ca..316ba03c 100644 --- a/src/jmcomic/jm_client_interface.py +++ b/src/jmcomic/jm_client_interface.py @@ -145,6 +145,7 @@ def json(self, **kwargs) -> Dict: """ + class JmDetailClient: def get_album_detail(self, album_id) -> JmAlbumDetail: @@ -194,9 +195,6 @@ class JmUserClient: def login(self, username, password, - refresh_client_cookies=True, - id_remember='on', - login_remember='on', ): raise NotImplementedError @@ -219,6 +217,21 @@ def album_comment(self, """ raise NotImplementedError + def favorite_folder(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ) -> JmFavoritePage: + """ + 获取收藏了的漫画,文件夹默认是全部 + :param folder_id: 文件夹id + :param page: 分页 + :param order_by: 排序 + :param username: 用户名 + """ + raise NotImplementedError + class JmImageClient: @@ -238,7 +251,7 @@ def download_image(self, :param decode_image: 要保存的是解密后的图还是原图 """ if scramble_id is None: - scramble_id = JmModuleConfig.SCRAMBLE_220980 + scramble_id = JmMagicConstants.SCRAMBLE_220980 # 请求图片 resp = self.get_jm_image(img_url) @@ -294,16 +307,6 @@ class JmSearchAlbumClient: 範例:全彩 人妻 """ - ORDER_BY_LATEST = 'mr' - ORDER_BY_VIEW = 'mv' - ORDER_BY_PICTURE = 'mp' - ORDER_BY_LIKE = 'tf' - - TIME_TODAY = 't' - TIME_WEEK = 'w' - TIME_MONTH = 'm' - TIME_ALL = 'a' - def search(self, search_query: str, page: int, @@ -319,8 +322,8 @@ def search(self, def search_site(self, search_query: str, page: int = 1, - order_by: str = ORDER_BY_LATEST, - time: str = TIME_ALL, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, ): """ 对应禁漫的站内搜索 @@ -330,8 +333,8 @@ def search_site(self, def search_work(self, search_query: str, page: int = 1, - order_by: str = ORDER_BY_LATEST, - time: str = TIME_ALL, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, ): """ 搜索album的作品 work @@ -341,8 +344,8 @@ def search_work(self, def search_author(self, search_query: str, page: int = 1, - order_by: str = ORDER_BY_LATEST, - time: str = TIME_ALL, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, ): """ 搜索album的作者 author @@ -352,8 +355,8 @@ def search_author(self, def search_tag(self, search_query: str, page: int = 1, - order_by: str = ORDER_BY_LATEST, - time: str = TIME_ALL, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, ): """ 搜索album的标签 tag @@ -363,21 +366,89 @@ def search_tag(self, def search_actor(self, search_query: str, page: int = 1, - order_by: str = ORDER_BY_LATEST, - time: str = TIME_ALL, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, ): """ 搜索album的登场角色 actor """ return self.search(search_query, page, 4, order_by, time) + +# noinspection PyAbstractClass +class JmcomicClient( + JmImageClient, + JmDetailClient, + JmUserClient, + JmSearchAlbumClient, + Postman, +): + client_key: None + + def get_domain_list(self) -> List[str]: + """ + 获取当前client的域名配置 + """ + raise NotImplementedError + + def set_domain_list(self, domain_list: List[str]): + """ + 设置当前client的域名配置 + """ + raise NotImplementedError + + def get_html_domain(self, postman=None): + return JmModuleConfig.get_html_domain(postman or self.get_root_postman()) + + def get_html_domain_all(self, postman=None): + return JmModuleConfig.get_html_domain_all(postman or self.get_root_postman()) + + # noinspection PyMethodMayBeStatic + def do_page_iter(self, params: dict, page: int, get_page_method): + from math import inf + def update(value: Union[Dict], page: int, page_content: JmPageContent): + if value is None: + return page + 1, page_content.page_count + + ExceptionTool.require_true(isinstance(value, dict), 'require dict params') + + # 根据外界传递的参数,更新params和page + page = value.get('page', page) + params.update(value) + + return page, inf + + total = inf + while page <= total: + params['page'] = page + page_content = get_page_method(**params) + value = yield page_content + page, total = update(value, page, page_content) + + def favorite_folder_gen(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ) -> Generator[JmFavoritePage, Dict, None]: + """ + 见 search_gen + """ + params = { + 'order_by': order_by, + 'folder_id': folder_id, + 'username': username, + } + + yield from self.do_page_iter(params, page, self.favorite_folder) + def search_gen(self, search_query: str, main_tag=0, page: int = 1, - order_by: str = ORDER_BY_LATEST, - time: str = TIME_ALL, - ): + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + ) -> Generator[JmSearchPage, Dict, None]: """ 搜索结果的生成器,支持下面这种调用方式: @@ -409,56 +480,4 @@ def search_gen(self, 'time': time, } - def search(page): - params['page'] = page - return self.search(**params) - - from math import inf - - def update(value: Union[Dict], page: int, search_page: JmSearchPage): - if value is None: - return page + 1, search_page.page_count - - ExceptionTool.require_true(isinstance(value, dict), 'require dict params') - - # 根据外界传递的参数,更新params和page - page = value.get('page', page) - params.update(value) - - return page, inf - - total = inf - - while page <= total: - search_page = search(page) - value = yield search_page - page, total = update(value, page, search_page) - - -# noinspection PyAbstractClass -class JmcomicClient( - JmImageClient, - JmDetailClient, - JmUserClient, - JmSearchAlbumClient, - Postman, -): - client_key: None - - def get_domain_list(self) -> List[str]: - """ - 获取当前client的域名配置 - """ - raise NotImplementedError - - def set_domain_list(self, domain_list: List[str]): - """ - 设置当前client的域名配置 - """ - raise NotImplementedError - - def get_html_domain(self, postman=None): - return JmModuleConfig.get_html_domain(postman or self.get_root_postman()) - - def get_html_domain_all(self, postman=None): - return JmModuleConfig.get_html_domain_all(postman or self.get_root_postman()) + yield from self.do_page_iter(params, page, self.search) diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index 0c26d03b..63f11a93 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -3,7 +3,7 @@ def field_cache(*args, **kwargs): return field_cache(*args, **kwargs) -def default_jm_debug_logging(topic: str, msg: str): +def default_jm_logging(topic: str, msg: str): from common import format_ts print(f'{format_ts()}:【{topic}】{msg}') @@ -35,6 +35,29 @@ class JmcomicException(Exception): pass +class JmMagicConstants: + ORDER_BY_LATEST = 'mr' + ORDER_BY_VIEW = 'mv' + ORDER_BY_PICTURE = 'mp' + ORDER_BY_LIKE = 'tf' + + TIME_TODAY = 't' + TIME_WEEK = 'w' + TIME_MONTH = 'm' + TIME_ALL = 'a' + + # 分页大小 + PAGE_SIZE_SEARCH = 80 + PAGE_SIZE_FAVORITE = 20 + + SCRAMBLE_220980 = 220980 + SCRAMBLE_268850 = 268850 + SCRAMBLE_421926 = 421926 # 2023-02-08后改了图片切割算法 + + # 移动端API密钥 + APP_SECRET = '18comicAPPContent' + + class JmModuleConfig: # 网站相关 PROT = "https://" @@ -57,14 +80,8 @@ class JmModuleConfig: } # 图片分隔相关 - SCRAMBLE_220980 = 220980 - SCRAMBLE_268850 = 268850 - SCRAMBLE_421926 = 421926 # 2023-02-08后改了图片切割算法 SCRAMBLE_CACHE = {} - # 移动端API密钥 - APP_SECRET = '18comicAPPContent' - # cookies,目前只在移动端使用,因为移动端请求接口须携带,但不会校验cookies的内容。 APP_COOKIES = None @@ -109,17 +126,17 @@ class JmModuleConfig: # 插件注册表 REGISTRY_PLUGIN = {} - # 执行debug的函数 - debug_executor = default_jm_debug_logging + # 执行log的函数 + log_executor = default_jm_logging # postman构造函数 postman_constructor = default_postman_constructor - # 网页正则表达式解析失败时,执行抛出异常的函数,可以替换掉用于debug + # 网页正则表达式解析失败时,执行抛出异常的函数,可以替换掉用于log raise_exception_executor = default_raise_exception_executor - # debug开关标记 - enable_jm_debug = True - # debug时解码url - decode_url_when_debug = True + # log开关标记 + enable_jm_log = True + # log时解码url + decode_url_when_logging = True # 下载时的一些默认值配置 DEFAULT_AUTHOR = 'default-author' @@ -194,7 +211,7 @@ def get_html_url(cls, postman=None): postman = postman or cls.new_postman(session=True) url = postman.with_redirect_catching().get(cls.JM_REDIRECT_URL) - cls.jm_debug('module.html_url', f'获取禁漫网页URL: [{cls.JM_REDIRECT_URL}] → [{url}]') + cls.jm_log('module.html_url', f'获取禁漫网页URL: [{cls.JM_REDIRECT_URL}] → [{url}]') return url @classmethod @@ -215,7 +232,7 @@ def get_html_domain_all(cls, postman=None): from .jm_toolkit import JmcomicText domain_list = JmcomicText.analyse_jm_pub_html(resp.text) - cls.jm_debug('module.html_domain_all', f'获取禁漫网页全部域名: [{resp.url}] → {domain_list}') + cls.jm_log('module.html_domain_all', f'获取禁漫网页全部域名: [{resp.url}] → {domain_list}') return domain_list @classmethod @@ -228,7 +245,7 @@ def get_cookies(cls, postman=None): resp = postman.get(url) cookies = dict(resp.cookies) - cls.jm_debug('module.cookies', f'获取cookies: [{url}] → {cookies}') + cls.jm_log('module.cookies', f'获取cookies: [{url}] → {cookies}') return cookies @classmethod @@ -249,7 +266,6 @@ def new_html_headers(cls, domain='18comic.vip'): 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'none', 'sec-fetch-user': '?1', - 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 ' 'Safari/537.36', } @@ -264,7 +280,7 @@ def new_api_headers(cls, key_ts): key_ts = time_stamp() import hashlib - token = hashlib.md5(f"{key_ts}{cls.APP_SECRET}".encode("utf-8")).hexdigest() + token = hashlib.md5(f"{key_ts}{JmMagicConstants.APP_SECRET}".encode("utf-8")).hexdigest() return { 'token': token, @@ -283,13 +299,13 @@ def new_api_headers(cls, key_ts): # noinspection PyUnusedLocal @classmethod - def jm_debug(cls, topic: str, msg: str): - if cls.enable_jm_debug is True: - cls.debug_executor(topic, msg) + def jm_log(cls, topic: str, msg: str): + if cls.enable_jm_log is True: + cls.log_executor(topic, msg) @classmethod - def disable_jm_debug(cls): - cls.enable_jm_debug = False + def disable_jm_log(cls): + cls.enable_jm_log = False @classmethod def new_postman(cls, session=False, **kwargs): @@ -324,7 +340,7 @@ def new_postman(cls, session=False, **kwargs): DEFAULT_PROXIES = system_proxy() # use system proxy by default default_option_dict: dict = { - 'debug': None, + 'log': None, 'dir_rule': {'rule': 'Bd_Pname', 'base_dir': None}, 'download': { 'cache': True, @@ -349,9 +365,9 @@ def new_postman(cls, session=False, **kwargs): 'retry_times': 5 }, 'plugins': { - # 如果插件抛出参数校验异常,只debug。(全局配置,可以被插件的局部配置覆盖) - # 可选值:ignore(忽略),debug(打印日志),raise(抛异常)。 - 'valid': 'debug', + # 如果插件抛出参数校验异常,只log。(全局配置,可以被插件的局部配置覆盖) + # 可选值:ignore(忽略),log(打印日志),raise(抛异常)。 + 'valid': 'log', }, } @@ -365,9 +381,9 @@ def option_default_dict(cls) -> dict: option_dict = deepcopy(cls.default_option_dict) - # debug - if option_dict['debug'] is None: - option_dict['debug'] = cls.enable_jm_debug + # log + if option_dict['log'] is None: + option_dict['log'] = cls.enable_jm_log # dir_rule.base_dir dir_rule = option_dict['dir_rule'] @@ -413,5 +429,5 @@ def register_client(cls, client_class): cls.REGISTRY_CLIENT[client_class.client_key] = client_class -jm_debug = JmModuleConfig.jm_debug -disable_jm_debug = JmModuleConfig.disable_jm_debug +jm_log = JmModuleConfig.jm_log +disable_jm_log = JmModuleConfig.disable_jm_log diff --git a/src/jmcomic/jm_downloader.py b/src/jmcomic/jm_downloader.py index b016d6fe..de4ff9dc 100644 --- a/src/jmcomic/jm_downloader.py +++ b/src/jmcomic/jm_downloader.py @@ -1,53 +1,45 @@ from .jm_option import * -# help for typing -DownloadIterObjs = Union[ - JmAlbumDetail, - Sequence[JmPhotoDetail], - JmPhotoDetail, - Sequence[JmImageDetail], -] - # noinspection PyMethodMayBeStatic class DownloadCallback: def before_album(self, album: JmAlbumDetail): - jm_debug('album.before', + jm_log('album.before', f'本子获取成功: [{album.id}], ' f'作者: [{album.author}], ' f'章节数: [{len(album)}], ' f'总页数: [{album.page_count}], ' f'标题: [{album.name}], ' f'关键词: [{album.tags}]' - ) + ) def after_album(self, album: JmAlbumDetail): - jm_debug('album.after', f'本子下载完成: [{album.id}]') + jm_log('album.after', f'本子下载完成: [{album.id}]') def before_photo(self, photo: JmPhotoDetail): - jm_debug('photo.before', + jm_log('photo.before', f'开始下载章节: {photo.id} ({photo.album_id}[{photo.index}/{len(photo.from_album)}]), ' f'标题: [{photo.name}], ' f'图片数为[{len(photo)}]' - ) + ) def after_photo(self, photo: JmPhotoDetail): - jm_debug('photo.after', + jm_log('photo.after', f'章节下载完成: [{photo.id}] ({photo.album_id}[{photo.index}/{len(photo.from_album)}])') def before_image(self, image: JmImageDetail, img_save_path): if image.is_exists: - jm_debug('image.before', + jm_log('image.before', f'图片已存在: {image.tag} ← [{img_save_path}]' - ) + ) else: - jm_debug('image.before', + jm_log('image.before', f'图片准备下载: {image.tag}, [{image.img_url}] → [{img_save_path}]' - ) + ) def after_image(self, image: JmImageDetail, img_save_path): - jm_debug('image.after', + jm_log('image.after', f'图片下载完成: {image.tag}, [{image.img_url}] → [{img_save_path}]') @@ -116,7 +108,7 @@ def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient): # noinspection PyMethodMayBeStatic def execute_by_condition(self, - iter_objs: DownloadIterObjs, + iter_objs: DetailEntity, apply: Callable, count_batch: int, ): @@ -144,17 +136,17 @@ def execute_by_condition(self, ) # noinspection PyMethodMayBeStatic - def filter_iter_objs(self, iter_objs: DownloadIterObjs): + def filter_iter_objs(self, detail: DetailEntity): """ 该方法可用于过滤本子/章节,默认不会做过滤。 例如: 只想下载 本子的最新一章,返回 [album[-1]] 只想下载 章节的前10张图片,返回 [photo[:10]] - :param iter_objs: 可能是本子或者章节,需要自行使用 isinstance 判断 + :param detail: 可能是本子或者章节,需要自行使用 isinstance / is_xxx 判断 :returns: 只想要下载的 本子的章节 或 章节的图片 """ - return iter_objs + return detail # noinspection PyUnusedLocal def client_for_album(self, jm_album_id) -> JmcomicClient: @@ -203,6 +195,6 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is not None: - jm_debug('dler.exception', + jm_log('dler.exception', f'{self.__class__.__name__} Exit with exception: {exc_type, exc_val}' - ) + ) diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py index b71568c9..9884d851 100644 --- a/src/jmcomic/jm_entity.py +++ b/src/jmcomic/jm_entity.py @@ -9,6 +9,18 @@ def save_to_file(self, filepath): from common import PackerUtil PackerUtil.pack(self, filepath) + @classmethod + def is_image(cls): + return False + + @classmethod + def is_photo(cls): + return False + + @classmethod + def is_album(cls): + return False + class IndexedEntity: def getindex(self, index: int): @@ -153,10 +165,14 @@ def of(cls, @property def tag(self) -> str: """ - this tag is used to print pretty info when debug + this tag is used to print pretty info when logging """ return f'{self.aid}/{self.img_file_name}{self.img_file_suffix} [{self.index + 1}/{len(self.from_photo)}]' + @classmethod + def is_image(cls): + return True + class JmPhotoDetail(DetailEntity): @@ -312,6 +328,10 @@ def __len__(self): def __iter__(self) -> Generator[JmImageDetail, None, None]: return super().__iter__() + @classmethod + def is_photo(cls): + return True + class JmAlbumDetail(DetailEntity): @@ -421,21 +441,36 @@ def __len__(self): def __iter__(self) -> Generator[JmPhotoDetail, None, None]: return super().__iter__() + @classmethod + def is_album(cls): + return True + -class JmSearchPage(JmBaseEntity, IndexedEntity): +class JmPageContent(JmBaseEntity, IndexedEntity): ContentItem = Tuple[str, Dict[str, Any]] - def __init__(self, content: List[ContentItem], page_count): + def __init__(self, content: List[ContentItem], total: int): """ + content: [ album_id, {title, tag_list, ...} ] - :param content: 搜索结果,移动端和网页端都一次返回80个 - :param page_count: 总页数,登录和不登录能看到的总页数不一样 + :param content: 分页数据 + :param total: 一共多少页 """ self.content = content - self.page_count = page_count + self.total = total + + @property + def page_count(self) -> int: + page_size = self.page_size + import math + return math.ceil(int(self.total) / page_size) + + @property + def page_size(self) -> int: + raise NotImplementedError def iter_id(self) -> Generator[str, None, None]: """ @@ -456,8 +491,31 @@ def iter_id_title_tag(self) -> Generator[Tuple[str, str, List[str]], None, None] 返回 album_id, album_title, album_tag_list 的迭代器 """ for aid, ainfo in self.content: + ainfo.setdefault('tag_list', []) yield aid, ainfo['name'], ainfo['tag_list'] + # 下面的方法实现方便的元素访问 + + def __len__(self): + return len(self.content) + + def __iter__(self): + return self.iter_id_title() + + def __getitem__(self, item) -> Union[ContentItem, List[ContentItem]]: + return super().__getitem__(item) + + def getindex(self, index: int): + return self.content[index] + + +class JmSearchPage(JmPageContent): + + @property + def page_size(self) -> int: + from .jm_client_interface import JmMagicConstants + return JmMagicConstants.PAGE_SIZE_SEARCH + # 下面的方法是对单个album的包装 @property @@ -479,16 +537,19 @@ def wrap_single_album(cls, album: JmAlbumDetail) -> 'JmSearchPage': setattr(page, 'album', album) return page - # 下面的方法实现方便的元素访问 - def __len__(self): - return len(self.content) +class JmFavoritePage(JmPageContent): - def __iter__(self): - return self.iter_id_title() + def __init__(self, content, folder_list, total): + """ - def __getitem__(self, item) -> Union[ContentItem, List[ContentItem]]: - return super().__getitem__(item) + :param content: 收藏夹一页数据 + :param folder_list: 所有的收藏夹的信息 + :param total: 收藏夹的收藏总数 + """ + super().__init__(content, total) + self.folder_list = folder_list - def getindex(self, index: int): - return self.content[index] + @property + def page_size(self) -> int: + return JmMagicConstants.PAGE_SIZE_FAVORITE diff --git a/src/jmcomic/jm_option.py b/src/jmcomic/jm_option.py index e725d0b4..6c270757 100644 --- a/src/jmcomic/jm_option.py +++ b/src/jmcomic/jm_option.py @@ -89,7 +89,7 @@ def deside_image_save_dir(self, ret = self.apply_rule_solver(album, photo, solver) except BaseException as e: # noinspection PyUnboundLocalVariable - jm_debug('dir_rule', f'路径规则"{solver[2]}"的解析出错: {e}, album={album}, photo={photo}') + jm_log('dir_rule', f'路径规则"{solver[2]}"的解析出错: {e}, album={album}, photo={photo}') raise e path_ls.append(str(ret)) @@ -289,10 +289,10 @@ def default(cls, proxies=None, domain=None) -> 'JmOption': def construct(cls, origdic: Dict, cover_default=True) -> 'JmOption': dic = cls.merge_default_dict(origdic) if cover_default else origdic - # debug - debug = dic.pop('debug', True) - if debug is False: - disable_jm_debug() + # log + log = dic.pop('log', True) + if log is False: + disable_jm_log() # version version = dic.pop('version', None) @@ -323,7 +323,7 @@ def compatible_with_old_versions(cls, dic): def deconstruct(self) -> Dict: return { 'version': self.version, - 'debug': JmModuleConfig.enable_jm_debug, + 'log': JmModuleConfig.enable_jm_log, 'dir_rule': { 'rule': self.dir_rule.rule_dsl, 'base_dir': self.dir_rule.base_dir, @@ -372,9 +372,15 @@ def new_jm_client(self, domain=None, impl=None, cache=None, **kwargs) -> Jmcomic # 所有需要用到的 self.client 配置项如下 postman_conf: dict = deepcopy(self.client.postman.src_dict) # postman dsl 配置 meta_data: dict = postman_conf['meta_data'] # 元数据 - impl: str = impl or self.client.impl # client_key retry_times: int = self.client.retry_times # 重试次数 cache: str = cache if cache is not None else self.client.cache # 启用缓存 + impl: str = impl or self.client.impl # client_key + if isinstance(impl, type): + # eg: impl = JmHtmlClient + # noinspection PyUnresolvedReferences + impl = impl.client_key + + # start construct client # domain def decide_domain(): @@ -530,7 +536,7 @@ def invoke_plugin(self, plugin_class, kwargs: Any, extra: dict, pinfo: dict): # 构建插件对象 plugin: JmOptionPlugin = plugin_class.build(self) - jm_debug('plugin.invoke', f'调用插件: [{plugin_class.plugin_key}]') + jm_log('plugin.invoke', f'调用插件: [{plugin_class.plugin_key}]') # 调用插件功能 plugin.invoke(**kwargs) @@ -557,11 +563,11 @@ def handle_plugin_valid_exception(self, e, pinfo: dict, kwargs: dict, plugin): # ignore return - if mode == 'debug': - # debug - jm_debug('plugin.validation', + if mode == 'log': + # log + jm_log('plugin.validation', f'插件 [{e.plugin.plugin_key}] 参数校验异常:{e.msg}' - ) + ) return if mode == 'raise': @@ -573,13 +579,13 @@ def handle_plugin_valid_exception(self, e, pinfo: dict, kwargs: dict, plugin): # noinspection PyMethodMayBeStatic,PyUnusedLocal def handle_plugin_unexpected_error(self, e, pinfo: dict, kwargs: dict, plugin): msg = str(e) - jm_debug('plugin.error', f'插件 [{plugin.plugin_key}],运行遇到未捕获异常,异常信息: {msg}') + jm_log('plugin.error', f'插件 [{plugin.plugin_key}],运行遇到未捕获异常,异常信息: {msg}') raise e # noinspection PyMethodMayBeStatic,PyUnusedLocal def handle_plugin_exception(self, e, pinfo: dict, kwargs: dict, plugin): msg = str(e) - jm_debug('plugin.exception', f'插件 [{plugin.plugin_key}],调用失败,异常信息: {msg}') + jm_log('plugin.exception', f'插件 [{plugin.plugin_key}] 调用失败,异常信息: [{msg}]') raise e # noinspection PyMethodMayBeStatic @@ -607,7 +613,7 @@ def fix_kwargs(self, kwargs) -> Dict[str, Any]: if isinstance(k, (int, float)): newk = str(k) - jm_debug('plugin.kwargs', f'插件参数类型转换: {k} ({type(k)}) -> {newk} ({type(newk)})') + jm_log('plugin.kwargs', f'插件参数类型转换: {k} ({type(k)}) -> {newk} ({type(newk)})') new_kwargs[newk] = v continue diff --git a/src/jmcomic/jm_plugin.py b/src/jmcomic/jm_plugin.py index 1ebefc08..d6a7cab7 100644 --- a/src/jmcomic/jm_plugin.py +++ b/src/jmcomic/jm_plugin.py @@ -34,8 +34,8 @@ def build(cls, option: JmOption) -> 'JmOptionPlugin': return cls(option) @classmethod - def debug(cls, msg, topic=None): - jm_debug( + def log(cls, msg, topic=None): + jm_log( topic=f'plugin.{cls.plugin_key}' + (f'.{topic}' if topic is not None else ''), msg=msg ) @@ -49,7 +49,7 @@ def require_true(self, case: Any, msg: str): raise PluginValidationException(self, msg) - def warning_lib_not_install(self, lib='psutil'): + def warning_lib_not_install(self, lib: str): msg = (f'插件`{self.plugin_key}`依赖库: {lib},请先安装{lib}再使用。' f'安装命令: [pip install {lib}]') import warnings @@ -71,13 +71,12 @@ def invoke(self, client = self.option.new_jm_client() client.login(username, password) - cookies = client['cookies'] - postman: dict = self.option.client.postman.src_dict - meta_data = postman.get('meta_data', {}) - meta_data['cookies'] = cookies - postman['meta_data'] = meta_data - self.debug('登录成功') + cookies = dict(client['cookies']) + self.option.update_cookies(cookies) + JmModuleConfig.APP_COOKIES = cookies + + self.log('登录成功') class UsageLogPlugin(JmOptionPlugin): @@ -146,7 +145,7 @@ def warning(): if len(warning_msg_list) != 0: warning_msg_list.insert(0, '硬件占用告警,占用过高可能导致系统卡死!') warning_msg_list.append('') - self.debug('\n'.join(warning_msg_list), topic='warning') + self.log('\n'.join(warning_msg_list), topic='warning') while True: # 获取CPU占用率(0~100) @@ -167,7 +166,7 @@ def warning(): # f"发送的字节数: {network_bytes_sent}", # f"接收的字节数: {network_bytes_received}", ]) - self.debug(msg, topic='log') + self.log(msg, topic='log') if enable_warning is True: # 警告 @@ -211,11 +210,12 @@ def find_update(album: JmAlbumDetail): return photo_ls class FindUpdateDownloader(JmDownloader): - def filter_iter_objs(self, iter_objs): - if not isinstance(iter_objs, JmAlbumDetail): - return iter_objs + def filter_iter_objs(self, detail): + if not detail.is_album(): + return detail - return find_update(iter_objs) + detail: JmAlbumDetail + return find_update(detail) # 调用下载api,指定option和downloader download_album( @@ -318,7 +318,7 @@ def addpath(f): def do_zip(self, source_dir, zip_path, all_filepath, msg): if len(all_filepath) == 0: - self.debug('无下载文件,无需压缩', 'skip') + self.log('无下载文件,无需压缩', 'skip') return None from common import backup_dir_to_zip @@ -328,7 +328,7 @@ def do_zip(self, source_dir, zip_path, all_filepath, msg): acceptor=lambda f: os.path.isdir(f) or self.unified_path(f) in all_filepath ).close() - self.debug(msg, 'finish') + self.log(msg, 'finish') return self.unified_path(source_dir) def after_zip(self, dir_zip_dict: Dict[str, Optional[str]]): @@ -365,13 +365,13 @@ def delete_all_files_and_empty_dir(self, all_downloaded: dict, dir_list: List[st continue os.remove(f) - self.debug(f'删除原文件: {f}', 'remove') + self.log(f'删除原文件: {f}', 'remove') for d in sorted(dir_list, reverse=True): # check exist if file_exists(d): os.rmdir(d) - self.debug(f'删除文件夹: {d}', 'remove') + self.log(f'删除文件夹: {d}', 'remove') class ClientProxyPlugin(JmOptionPlugin): @@ -394,7 +394,7 @@ def hook_new_jm_client(*args, **kwargs): if whitelist is not None and client.client_key not in whitelist: return client - self.debug(f'proxy client {client} with {proxy_clazz}') + self.log(f'proxy client {client} with {proxy_clazz}') return proxy_clazz(client, **clazz_init_kwargs) self.option.new_jm_client = hook_new_jm_client @@ -415,7 +415,7 @@ def invoke(self, def apply_filter_then_decide_cache(image: JmImageDetail): if image.img_file_suffix not in allowed_suffix_set: - self.debug(f'跳过下载图片: {image.tag},' + self.log(f'跳过下载图片: {image.tag},' f'因为其后缀\'{image.img_file_suffix}\'不在允许的后缀集合{allowed_suffix_set}内') # hook is_exists True to skip download image.is_exists = True @@ -446,25 +446,25 @@ def invoke(self, epostman = econfig.create_email_postman() epostman.send(content, title) - self.debug('Email sent successfully') + self.log('Email sent successfully') -class DebugTopicFilterPlugin(JmOptionPlugin): - plugin_key = 'debug_topic_filter' +class LogTopicFilterPlugin(JmOptionPlugin): + plugin_key = 'log_topic_filter' def invoke(self, whitelist) -> None: if whitelist is not None: whitelist = set(whitelist) - old_jm_debug = JmModuleConfig.debug_executor + old_jm_log = JmModuleConfig.log_executor - def new_jm_debug(topic, msg): + def new_jm_log(topic, msg): if whitelist is not None and topic not in whitelist: return - old_jm_debug(topic, msg) + old_jm_log(topic, msg) - JmModuleConfig.debug_executor = new_jm_debug + JmModuleConfig.log_executor = new_jm_log class AutoSetBrowserCookiesPlugin(JmOptionPlugin): @@ -510,10 +510,10 @@ def invoke(self, if isinstance(e, ImportError): self.warning_lib_not_install('browser_cookie3') else: - self.debug('获取浏览器cookies失败,请关闭浏览器重试') + self.log('获取浏览器cookies失败,请关闭浏览器重试') return self.option.update_cookies( {k: v for k, v in cookies.items() if k in self.accepted_cookies_keys} ) - self.debug('获取浏览器cookies成功') + self.log('获取浏览器cookies成功') diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py index 23536884..90f41ed8 100644 --- a/src/jmcomic/jm_toolkit.py +++ b/src/jmcomic/jm_toolkit.py @@ -115,7 +115,7 @@ def analyse_jm_album_html(cls, html: str) -> JmAlbumDetail: @classmethod def analyse_jm_search_html(cls, html: str) -> JmSearchPage: - return JmSearchTool.parse_html_to_page(html) + return JmPageTool.parse_html_to_search_page(html) @classmethod def reflect_new_instance(cls, html: str, cls_field_prefix: str, clazz: type): @@ -252,7 +252,7 @@ def require_not_match(cls, html: str, pattern: Pattern, *, msg_func): ) -class JmSearchTool: +class JmPageTool: # 用来缩减html的长度 pattern_html_search_shorten_for = compile(r'