From bf2ca6efbc07e4d6dbc9ddc8ece70217b567c9a0 Mon Sep 17 00:00:00 2001 From: hect0x7 <93357912+hect0x7@users.noreply.github.com> Date: Wed, 1 Nov 2023 18:18:13 +0800 Subject: [PATCH] =?UTF-8?q?v2.3.17:=20=E6=94=AF=E6=8C=81=E4=BB=8E=E6=B5=8F?= =?UTF-8?q?=E8=A7=88=E5=99=A8=E8=8E=B7=E5=8F=96cookies=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E7=99=BB=E5=BD=95=E7=A6=81=E6=BC=AB=E3=80=90=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E3=80=91;=20=E9=87=8D=E6=9E=84Client=E7=BC=93=E5=AD=98?= =?UTF-8?q?=E6=9C=BA=E5=88=B6=E3=80=81=E6=94=AF=E6=8C=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E7=BC=93=E5=AD=98=E7=BA=A7=E5=88=AB=20(#161)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- assets/docs/sources/index.md | 3 +- assets/docs/sources/option_file_syntax.md | 5 ++ setup.py | 2 +- src/jmcomic/__init__.py | 2 +- src/jmcomic/jm_client_impl.py | 60 +++++++++++++----- src/jmcomic/jm_client_interface.py | 4 +- src/jmcomic/jm_config.py | 2 +- src/jmcomic/jm_option.py | 75 +++++++++++++++++++++-- src/jmcomic/jm_plugin.py | 65 ++++++++++++++++++-- tests/test_jmcomic/__init__.py | 13 ++-- tests/test_jmcomic/test_jm_client.py | 65 ++++++++++++++++++++ 12 files changed, 259 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 998134f5..faf13a0f 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ $ jmcomic 422866 - **可扩展性强** - **支持Plugin插件,可以方便地扩展功能,以及使用别人的插件** - - 目前内置支持的插件有:`登录插件` `硬件占用监控插件` `只下载新章插件` `压缩文件插件` `下载特定后缀图片插件` `发送QQ邮件插件` `日志主题过滤插件` + - 目前内置支持的插件有:`登录插件` `硬件占用监控插件` `只下载新章插件` `压缩文件插件` `下载特定后缀图片插件` `发送QQ邮件插件` `日志主题过滤插件` `自动使用浏览器cookies插件` - 支持自定义本子/章节/图片下载前后的回调函数 - 支持自定义debug/logging - 支持自定义类:`Downloader(负责调度)` `Option(负责配置)` `Client(负责请求)` `实体类`等 diff --git a/assets/docs/sources/index.md b/assets/docs/sources/index.md index 4a9aa40a..d385699e 100644 --- a/assets/docs/sources/index.md +++ b/assets/docs/sources/index.md @@ -25,8 +25,7 @@ Python API for JMComic(禁漫天堂) - Highly extensible: - Supports Plugin plugins for easy functionality extension and use of other plugins. - - Currently built-in - plugins: `login plugin`, `hardware usage monitoring plugin`, `only download new chapters plugin`, `zip compression plugin`, `image suffix filter plugin` `send qq email plugin` `debug logging topic filter plugin`. + - Currently built-in plugins: `login plugin`, `hardware usage monitoring plugin`, `only download new chapters plugin`, `zip compression plugin`, `image suffix filter plugin` `send qq email plugin` `debug logging topic filter plugin` `auto set browser cookies plugin`. - Supports custom callback functions before and after downloading album/chapter/images. - Supports custom debug logging. - Supports custom core diff --git a/assets/docs/sources/option_file_syntax.md b/assets/docs/sources/option_file_syntax.md index 8fcc8a6f..5ae82dfa 100644 --- a/assets/docs/sources/option_file_syntax.md +++ b/assets/docs/sources/option_file_syntax.md @@ -120,6 +120,11 @@ plugins: proxy_client_key: cl_proxy_future # 代理类的client_key whitelist: [ api, ] # 白名单,当client.impl匹配白名单时才代理 + - plugin: auto_set_browser_cookies # 自动获取浏览器cookies,详见插件类 + kwargs: + browser: chrome + domain: 18comic.vip + after_album: - plugin: zip # 压缩文件插件 kwargs: diff --git a/setup.py b/setup.py index 5407cc42..50c61848 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ package_dir={"": "src"}, python_requires=">=3.7", install_requires=[ - 'commonX>=0.5.7', + 'commonX>=0.6.2', 'curl_cffi', 'PyYAML', 'Pillow', diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index 45806004..558a3213 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,7 +2,7 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.3.16' +__version__ = '2.3.17' from .api import * from .jm_plugin import * diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index f610fb36..5e1f4219 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -25,6 +25,8 @@ def __init__(self, fallback_domain_list.insert(0, domain) self.domain_list = fallback_domain_list + self.CLIENT_CACHE = None + self.enable_cache() self.after_init() def after_init(self): @@ -111,31 +113,59 @@ def debug_topic_request(self): def before_retry(self, e, kwargs, retry_count, url): jm_debug('req.error', str(e)) - def enable_cache(self, debug=False): - if self.is_cache_enabled(): - return + def enable_cache(self): + # noinspection PyDefaultArgument,PyShadowingBuiltins + def make_key(args, kwds, typed, + kwd_mark=(object(),), + fasttypes={int, str}, + tuple=tuple, type=type, len=len): + key = args + if kwds: + key += kwd_mark + for item in kwds.items(): + key += item + if typed: + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for v in kwds.values()) + elif len(key) == 1 and type(key[0]) in fasttypes: + return key[0] + return hash(key) def wrap_func_with_cache(func_name, cache_field_name): if hasattr(self, cache_field_name): return - if sys.version_info > (3, 9): - import functools - cache = functools.cache - else: - from functools import lru_cache - cache = lru_cache() - func = getattr(self, func_name) - setattr(self, func_name, cache(func)) + + def cache_wrapper(*args, **kwargs): + cache = self.CLIENT_CACHE + + # Equivalent to not enable cache + if cache is None: + return func(*args, **kwargs) + + key = make_key(args, kwargs, False) + sentinel = object() # unique object used to signal cache misses + + result = cache.get(key, sentinel) + if result is not sentinel: + return result + + result = func(*args, **kwargs) + cache[key] = result + return result + + setattr(self, func_name, cache_wrapper) for func_name in self.func_to_cache: wrap_func_with_cache(func_name, f'__{func_name}.cache.dict__') - setattr(self, '__enable_cache__', True) + def set_cache_dict(self, cache_dict: Optional[Dict]): + self.CLIENT_CACHE = cache_dict - def is_cache_enabled(self) -> bool: - return getattr(self, '__enable_cache__', False) + def get_cache_dict(self): + return self.CLIENT_CACHE def get_domain_list(self): return self.domain_list @@ -635,7 +665,7 @@ class FutureClientProxy(JmcomicClient): client_key = 'cl_proxy_future' proxy_methods = ['album_comment', 'enable_cache', 'get_domain_list', 'get_html_domain', 'get_html_domain_all', 'get_jm_image', - 'is_cache_enabled', 'set_domain_list', ] + 'set_cache_dict', 'get_cache_dict', 'set_domain_list', ] class FutureWrapper: def __init__(self, future): diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py index dae21799..de4ee5ca 100644 --- a/src/jmcomic/jm_client_interface.py +++ b/src/jmcomic/jm_client_interface.py @@ -160,10 +160,10 @@ def get_photo_detail(self, def of_api_url(self, api_path, domain): raise NotImplementedError - def enable_cache(self, debug=False): + def set_cache_dict(self, cache_dict: Optional[Dict]): raise NotImplementedError - def is_cache_enabled(self) -> bool: + def get_cache_dict(self) -> Optional[Dict]: raise NotImplementedError def check_photo(self, photo: JmPhotoDetail): diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index 1c3e14af..0c26d03b 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -335,7 +335,7 @@ def new_postman(cls, session=False, **kwargs): }, }, 'client': { - 'cache': None, + 'cache': None, # see CacheRegistry 'domain': [], 'postman': { 'type': 'cffi', diff --git a/src/jmcomic/jm_option.py b/src/jmcomic/jm_option.py index 73406a06..e725d0b4 100644 --- a/src/jmcomic/jm_option.py +++ b/src/jmcomic/jm_option.py @@ -1,6 +1,58 @@ from .jm_client_impl import * +class CacheRegistry: + REGISTRY = {} + + @classmethod + def level_option(cls, option, _client): + registry = cls.REGISTRY + registry.setdefault(option, {}) + return registry[option] + + @classmethod + def level_client(cls, _option, client): + registry = cls.REGISTRY + registry.setdefault(client, {}) + return registry[client] + + @classmethod + def enable_client_cache_on_condition(cls, option: 'JmOption', client: JmcomicClient, cache: Union[None, bool, str, Callable]): + """ + cache parameter + + if None: no cache + + if bool: + true: level_option + + false: no cache + + if str: + (invoke corresponding Cache class method) + + :param option: JmOption + :param client: JmcomicClient + :param cache: config dsl + """ + if cache is None: + return + + elif isinstance(cache, bool): + if cache is False: + return + else: + cache = cls.level_option + + elif isinstance(cache, str): + func = getattr(cls, cache, None) + assert func is not None, f'未实现的cache配置名: {cache}' + cache = func + + cache: Callable + client.set_cache_dict(cache(option, client)) + + class DirRule: rule_sample = [ # 根目录 / Album-id / Photo-序号 / @@ -312,12 +364,17 @@ def build_jm_client(self, **kwargs): return self.new_jm_client(**kwargs) def new_jm_client(self, domain=None, impl=None, cache=None, **kwargs) -> JmcomicClient: + """ + 创建新的Client(客户端),不同Client之间的元数据不共享 + """ + from copy import deepcopy + # 所有需要用到的 self.client 配置项如下 - postman_conf: dict = self.client.postman.src_dict # postman dsl 配置 - meta_data: dict = postman_conf['meta_data'] # 请求元信息 + postman_conf: dict = deepcopy(self.client.postman.src_dict) # postman dsl 配置 + meta_data: dict = postman_conf['meta_data'] # 元数据 impl: str = impl or self.client.impl # client_key retry_times: int = self.client.retry_times # 重试次数 - cache: str = cache or self.client.cache # 启用缓存 + cache: str = cache if cache is not None else self.client.cache # 启用缓存 # domain def decide_domain(): @@ -357,11 +414,19 @@ def decide_domain(): ) # enable cache - if cache is True: - client.enable_cache() + CacheRegistry.enable_client_cache_on_condition(self, client, cache) return client + def update_cookies(self, cookies: dict): + metadata: dict = self.client.postman.meta_data.src_dict + orig_cookies: Optional[Dict] = metadata.get('cookies', None) + if orig_cookies is None: + metadata['cookies'] = cookies + else: + orig_cookies.update(cookies) + metadata['cookies'] = orig_cookies + # noinspection PyMethodMayBeStatic def decide_client_domain(self, client_key: str) -> List[str]: is_client_type = lambda ctype: self.client_key_is_given_type(client_key, ctype) diff --git a/src/jmcomic/jm_plugin.py b/src/jmcomic/jm_plugin.py index 765a6e15..1ebefc08 100644 --- a/src/jmcomic/jm_plugin.py +++ b/src/jmcomic/jm_plugin.py @@ -49,6 +49,12 @@ def require_true(self, case: Any, msg: str): raise PluginValidationException(self, msg) + def warning_lib_not_install(self, lib='psutil'): + msg = (f'插件`{self.plugin_key}`依赖库: {lib},请先安装{lib}再使用。' + f'安装命令: [pip install {lib}]') + import warnings + warnings.warn(msg) + class JmLoginPlugin(JmOptionPlugin): """ @@ -111,12 +117,7 @@ def monitor_resource_usage( try: import psutil except ImportError: - msg = (f'插件`{self.plugin_key}`依赖psutil库,请先安装psutil再使用。' - f'安装命令: [pip install psutil]') - import warnings - warnings.warn(msg) - # import sys - # print(msg, file=sys.stderr) + self.warning_lib_not_install('psutil') return from time import sleep @@ -464,3 +465,55 @@ def new_jm_debug(topic, msg): old_jm_debug(topic, msg) JmModuleConfig.debug_executor = new_jm_debug + + +class AutoSetBrowserCookiesPlugin(JmOptionPlugin): + plugin_key = 'auto_set_browser_cookies' + + accepted_cookies_keys = str_to_set(''' + yuo1 + remember_id + remember + ''') + + def invoke(self, + browser: str, + domain: str, + ) -> None: + """ + 坑点预警:由于禁漫需要校验同一设备,使用该插件需要配置自己浏览器的headers,例如 + + ```yml + client: + postman: + meta_data: + headers: { + # 浏览器headers + } + + # 插件配置如下: + plugins: + after_init: + - plugin: auto_set_browser_cookies + kwargs: + browser: chrome + domain: 18comic.vip + ``` + + :param browser: chrome/edge/... + :param domain: 18comic.vip/... + :return: cookies + """ + cookies, e = get_browser_cookies(browser, domain, safe=True) + + if cookies is None: + if isinstance(e, ImportError): + self.warning_lib_not_install('browser_cookie3') + else: + self.debug('获取浏览器cookies失败,请关闭浏览器重试') + return + + self.option.update_cookies( + {k: v for k, v in cookies.items() if k in self.accepted_cookies_keys} + ) + self.debug('获取浏览器cookies成功') diff --git a/tests/test_jmcomic/__init__.py b/tests/test_jmcomic/__init__.py index 20b79e5d..5fc73d37 100644 --- a/tests/test_jmcomic/__init__.py +++ b/tests/test_jmcomic/__init__.py @@ -46,11 +46,7 @@ def tearDown(self) -> None: @classmethod def setUpClass(cls): # 设置 JmOption,JmcomicClient - try: - option = create_option_by_env('JM_OPTION_PATH_TEST') - except JmcomicException: - option = create_option('./assets/option/option_test.yml') - + option = cls.new_option() cls.option = option cls.client = option.build_jm_client() @@ -61,6 +57,13 @@ def setUpClass(cls): return cost_time_dict[cls.__name__] = ts() + @classmethod + def new_option(cls): + try: + return create_option_by_env('JM_OPTION_PATH_TEST') + except JmcomicException: + return create_option('./assets/option/option_test.yml') + @classmethod def tearDownClass(cls) -> None: if skip_time_cost_debug: diff --git a/tests/test_jmcomic/test_jm_client.py b/tests/test_jmcomic/test_jm_client.py index 6a722f57..442ee978 100644 --- a/tests/test_jmcomic/test_jm_client.py +++ b/tests/test_jmcomic/test_jm_client.py @@ -264,3 +264,68 @@ def test_search_generator(self): }) print(page.page_count) break + + def test_cache_level(self): + def get(cl): + return cl.get_album_detail('123') + + def assertEqual(first_cl, second_cl, msg): + return self.assertEqual( + get(first_cl), + get(second_cl), + msg, + ) + + def assertNotEqual(first_cl, second_cl, msg): + return self.assertNotEqual( + get(first_cl), + get(second_cl), + msg, + ) + + cases = [ + ( + CacheRegistry.level_option, + CacheRegistry.level_option, + CacheRegistry.level_client, + CacheRegistry.level_client, + ), + ( + True, + 'level_option', + 'level_client', + CacheRegistry.level_client, + ) + ] + + def run(arg1, arg2, arg3, arg4): + op = self.new_option() + + c1 = op.new_jm_client(cache=arg1) + c2 = op.new_jm_client(cache=arg2) + c3 = op.new_jm_client(cache=arg3) + c4 = op.new_jm_client(cache=arg4) + c5 = op.new_jm_client(cache=False) + + # c1 == c2 + # c3 == c4 + # c1 != c3 + # c5 != c1, c2, c3, c4 + invoke_all( + args_func_list=[ + (None, func) for func in [ + lambda: assertEqual(c1, c2, 'equals in same option level'), + lambda: assertNotEqual(c3, c4, 'not equals in client level'), + lambda: assertNotEqual(c1, c3, 'not equals in different level'), + lambda: assertNotEqual(c1, c5, 'not equals for None level'), + lambda: assertNotEqual(c3, c5, 'not equals for None level'), + ] + ] + ) + + future_ls = thread_pool_executor( + iter_objs=cases, + apply_each_obj_func=run, + ) + + return [f.result() for f in future_ls] # 等待执行完毕