From 1d5b1f0050262e6b88ab306032f378b25e5dbf60 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:18:29 +0800 Subject: [PATCH 001/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9douyin=E4=B8=BB?= =?UTF-8?q?=E9=A1=B5=E6=94=B6=E8=97=8F=E6=A8=A1=E5=BC=8F=E4=B8=BA`collecti?= =?UTF-8?q?on`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/cli.py | 4 ++-- f2/apps/douyin/handler.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 97aeb06..c84b18a 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -302,11 +302,11 @@ def merge_config(main_conf, custom_conf, **kwargs): @click.option( "--mode", "-M", - type=click.Choice(["one", "post", "like", "collect", "mix", "live"]), + type=click.Choice(["one", "post", "like", "collection", "mix", "live"]), # default="post", # required=True, help=_( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collect),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),合辑(mix),直播(live)" ), ) @click.option( diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 5bb8f8a..abd5b19 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -14,7 +14,7 @@ UserPost, UserProfile, UserLike, - UserCollect, + UserCollection, UserMix, PostDetail, UserLive, @@ -408,7 +408,7 @@ async def fetch_user_like_videos( logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) - @mode_handler("collect") + @mode_handler("collection") async def handle_user_collect(self): """ 用于处理用户收藏的视频 (Used to process videos collected by users) @@ -426,14 +426,14 @@ async def handle_user_collect(self): async with AsyncUserDB("douyin_users.db") as db: user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) - async for aweme_data_list in self.fetch_user_collect_videos( + async for aweme_data_list in self.fetch_user_collection_videos( max_cursor, page_counts, max_counts ): await self.downloader.create_download_tasks( self.kwargs, aweme_data_list, user_path ) - async def fetch_user_collect_videos( + async def fetch_user_collection_videos( self, max_cursor: int = 0, page_counts: int = 20, max_counts: int = None ) -> AsyncGenerator[List[Dict[str, Any]], None]: """ @@ -470,8 +470,8 @@ async def fetch_user_collect_videos( logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: - params = UserCollect(cursor=max_cursor, count=current_request_size) - response = await crawler.fetch_user_collect(params) + params = UserCollection(cursor=max_cursor, count=current_request_size) + response = await crawler.fetch_user_collection(params) video = UserCollectFilter(response) logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) From f058cf571912535db522f3a74b51d76bbb2d7b8f Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:19:25 +0800 Subject: [PATCH 002/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E4=B8=BB=E9=A1=B5=E6=94=B6=E8=97=8F=E6=A8=A1=E5=BC=8F=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/guide/apps/douyin/index.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/guide/apps/douyin/index.md b/docs/guide/apps/douyin/index.md index 495f955..c95294e 100644 --- a/docs/guide/apps/douyin/index.md +++ b/docs/guide/apps/douyin/index.md @@ -15,7 +15,7 @@ outline: deep | 下载单个作品 | handle_one_video | | 下载用户发布作品 | handle_user_post | | 下载用户喜欢作品 | handle_user_like | -| 下载用户收藏作品 | handle_user_collect | +| 下载用户收藏作品 | handle_user_collection | | 下载用户合辑作品 | handle_user_mix | | 下载用户直播流 | handle_user_live | | 下载用户首页推荐作品 | handle_user_feed | @@ -25,7 +25,7 @@ outline: deep | 单个作品数据 | fetch_one_video | 🟢 | | 用户发布作品数据 | fetch_user_post_videos | 🟢 | | 用户喜欢作品数据 | fetch_user_like_videos | 🟢 | -| 用户收藏作品数据 | fetch_user_collect_videos | 🟢 | +| 用户收藏作品数据 | fetch_user_collection_videos | 🟢 | | 用户合辑作品数据 | fetch_user_mix_videos | 🟢 | | 用户直播流数据 | fetch_user_live_videos | 🟢 | | 用户直播流数据2 | fetch_user_live_videos_by_room_id | 🟢 | @@ -74,7 +74,7 @@ outline: deep | 用户信息接口地址 | DouyinCrawler | fetch_user_profile | 🟢 | | 主页作品接口地址 | DouyinCrawler | fetch_user_post | 🟢 | | 喜欢作品接口地址 | DouyinCrawler | fetch_user_like | 🟢 | -| 收藏作品接口地址 | DouyinCrawler | fetch_user_collect | 🟢 | +| 收藏作品接口地址 | DouyinCrawler | fetch_user_collection | 🟢 | | 合辑作品接口地址 | DouyinCrawler | fetch_user_mix | 🟢 | | 作品详情接口地址 | DouyinCrawler | fetch_post_detail | 🟢 | | 作品评论接口地址 | DouyinCrawler | fetch_post_comment | 🟡 | @@ -167,7 +167,7 @@ outline: deep | :--- | :--- | :--- | | aweme_data | dict | 视频数据字典,包含视频ID、视频文案、作者昵称、页码等 | -<<< @/snippets/douyin/user-collect.py{16-17,22-25} +<<< @/snippets/douyin/user-collection.py{16-17,22-25} ### 用户合辑作品数据 🟢 From c53b249b79c3ae434176e91b3971dc37c5dc8369 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:20:13 +0800 Subject: [PATCH 003/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9douyin=20`user-collec?= =?UTF-8?q?tion`=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/{user-collect.py => user-collection.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename docs/snippets/douyin/{user-collect.py => user-collection.py} (93%) diff --git a/docs/snippets/douyin/user-collect.py b/docs/snippets/douyin/user-collection.py similarity index 93% rename from docs/snippets/douyin/user-collect.py rename to docs/snippets/douyin/user-collection.py index d37064c..c9966bb 100644 --- a/docs/snippets/douyin/user-collect.py +++ b/docs/snippets/douyin/user-collection.py @@ -14,13 +14,13 @@ async def main(): results = [ aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos() + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos() ] print(results) print("-------------------") results = [ aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos( + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos( 0, 10, 20 ) ] From 5e7c54f44fa0e19ad5145594230ba177b9fd9dc0 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:20:37 +0800 Subject: [PATCH 004/164] =?UTF-8?q?=E6=9B=B4=E6=AD=A3douyin=20`user-mix`?= =?UTF-8?q?=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/user-mix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/snippets/douyin/user-mix.py b/docs/snippets/douyin/user-mix.py index 049418a..f6715f7 100644 --- a/docs/snippets/douyin/user-mix.py +++ b/docs/snippets/douyin/user-mix.py @@ -19,7 +19,7 @@ async def main(): ) results = [ aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos( + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( mix_id ) ] @@ -27,7 +27,7 @@ async def main(): print("-------------------") results = [ aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos( + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( mix_id, 0, 10, 20 ) ] From 7883bf1b30be7c1b7fbfe1ff6f4ec0c4bf756c54 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:21:56 +0800 Subject: [PATCH 005/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9douyin=E4=B8=BB?= =?UTF-8?q?=E9=A1=B5=E6=94=B6=E8=97=8F=E6=A8=A1=E5=BC=8F=E4=B8=BA`collecti?= =?UTF-8?q?on`=E7=9A=84=E8=A1=A5=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 4 ++-- f2/apps/douyin/model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 565b888..5ff78c7 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -11,7 +11,7 @@ UserProfile, UserPost, UserLike, - UserCollect, + UserCollection, PostDetail, UserMix, UserLive, @@ -62,7 +62,7 @@ async def fetch_user_like(self, params: UserLike): logger.debug(_("喜欢作品接口地址:" + endpoint)) return await self._fetch_get_json(endpoint) - async def fetch_user_collect(self, params: UserCollect): + async def fetch_user_collection(self, params: UserCollection): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_COLLECTION, params.dict() ) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index da02197..ad142b3 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -91,7 +91,7 @@ class UserLike(BaseRequestModel): sec_user_id: str -class UserCollect(BaseRequestModel): +class UserCollection(BaseRequestModel): # POST cursor: int count: int From 4cf15af66d5df72da49ede67539304f66c889a26 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:22:25 +0800 Subject: [PATCH 006/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 4 ++-- f2/apps/douyin/model.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 5ff78c7..2d892c1 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -59,14 +59,14 @@ async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_FAVORITE_A, params.dict() ) - logger.debug(_("喜欢作品接口地址:" + endpoint)) + logger.debug(_("主页喜欢作品接口地址:" + endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collection(self, params: UserCollection): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_COLLECTION, params.dict() ) - logger.debug(_("收藏作品接口地址:" + endpoint)) + logger.debug(_("主页收藏作品接口地址:" + endpoint)) return await self._fetch_post_json(endpoint, params.dict()) async def fetch_user_mix(self, params: UserMix): diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index ad142b3..132f9c6 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -63,6 +63,7 @@ class BaseLiveModel2(BaseModel): app_id: str = "1128" msToken: str = TokenManager.gen_real_msToken() + class BaseLoginModel(BaseModel): service: str = "https://www.douyin.com" need_logo: str = "false" @@ -175,6 +176,7 @@ class UserLive(BaseLiveModel): web_rid: str room_id_str: str + class UserLive2(BaseLiveModel2): room_id: str @@ -210,8 +212,9 @@ class LoginGetQr(BaseLoginModel): fp: str = "" # msToken: str = TokenManager.gen_real_msToken() + class LoginCheckQr(BaseLoginModel): token: str = "" verifyFp: str = "" fp: str = "" - # msToken: str = TokenManager.gen_real_msToken() \ No newline at end of file + # msToken: str = TokenManager.gen_real_msToken() From 5c162e8753d5819a4c0ad9a652677a31549d5436 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:26:28 +0800 Subject: [PATCH 007/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9F2=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8F=B7=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/cli/cli_commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/f2/cli/cli_commands.py b/f2/cli/cli_commands.py index 73b8134..c9ca6ff 100644 --- a/f2/cli/cli_commands.py +++ b/f2/cli/cli_commands.py @@ -35,8 +35,8 @@ def handle_version( ) -> None: if not value or ctx.resilient_parsing: return - logger.debug(f"Version {__version__._version}") - print(f"Version {__version__._version}") + + click.echo(f"Version {__version__._version}") ctx.exit() From 88957e4ce18e74c833dc16e8924594fa5445a3f9 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:27:01 +0800 Subject: [PATCH 008/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0`run=5Fapp`=E6=97=B6?= =?UTF-8?q?=E8=BE=93=E5=87=BA=E7=89=88=E6=9C=AC=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/cli/cli_commands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/f2/cli/cli_commands.py b/f2/cli/cli_commands.py index c9ca6ff..9bdb674 100644 --- a/f2/cli/cli_commands.py +++ b/f2/cli/cli_commands.py @@ -138,6 +138,7 @@ def set_cli_config(ctx, **kwargs): async def run_app(kwargs): + logger.info(f"Version {__version__._version}") app_name = kwargs["app_name"] app_module = importlib.import_module(f"f2.apps.{app_name}.handler") await app_module.main(kwargs) From 8f6ba398ef7c39e6de1beee3da65b179f8e3c5b0 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:29:43 +0800 Subject: [PATCH 009/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9douyin=E5=B8=AE?= =?UTF-8?q?=E5=8A=A9=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 收藏作品(collect) -> 收藏作品(collection) --- f2/apps/douyin/help.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/apps/douyin/help.py b/f2/apps/douyin/help.py index f225451..b8c03ae 100644 --- a/f2/apps/douyin/help.py +++ b/f2/apps/douyin/help.py @@ -37,7 +37,7 @@ def help() -> None: "-M --mode", "[dark_cyan]Choice", _( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collect),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),合辑(mix),直播(live)" ), ), ( From 2cf9e05e3a5f19c232546ab28f16a04b597bdfc8 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:40:17 +0800 Subject: [PATCH 010/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9douyin=E4=B8=BB?= =?UTF-8?q?=E9=A1=B5=E6=94=B6=E8=97=8F=E6=A8=A1=E5=BC=8F=E4=B8=BA`collecti?= =?UTF-8?q?on`=E7=9A=84=E8=A1=A5=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index abd5b19..99d318d 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -409,7 +409,7 @@ async def fetch_user_like_videos( logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) @mode_handler("collection") - async def handle_user_collect(self): + async def handle_user_collection(self): """ 用于处理用户收藏的视频 (Used to process videos collected by users) From 209685b374444ca6f6720a6d40dd8b36c62d74da Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 16:48:41 +0800 Subject: [PATCH 011/164] =?UTF-8?q?=E4=BF=AE=E6=94=B9douyin=E4=B8=BB?= =?UTF-8?q?=E9=A1=B5=E6=94=B6=E8=97=8F=E8=BF=87=E6=BB=A4=E5=99=A8=E5=90=8D?= =?UTF-8?q?=E4=B8=BA`UserCollectionFilter`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 2 +- f2/apps/douyin/handler.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 312685f..6404be5 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -313,7 +313,7 @@ def _to_list(self): return list_dicts -class UserCollectFilter(UserPostFilter): +class UserCollectionFilter(UserPostFilter): def __init__(self, data): super().__init__(data) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 99d318d..1caff78 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -25,7 +25,7 @@ from f2.apps.douyin.filter import ( UserPostFilter, UserProfileFilter, - UserCollectFilter, + UserCollectionFilter, UserMixFilter, PostDetailFilter, UserLiveFilter, @@ -472,7 +472,7 @@ async def fetch_user_collection_videos( async with DouyinCrawler(self.kwargs) as crawler: params = UserCollection(cursor=max_cursor, count=current_request_size) response = await crawler.fetch_user_collection(params) - video = UserCollectFilter(response) + video = UserCollectionFilter(response) logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( From ab525126ebdd63478c1cd793b1120b0ed0ea220b Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 17:37:28 +0800 Subject: [PATCH 012/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit black格式化 --- f2/apps/douyin/filter.py | 52 ++++++++++++++++++++++++---------------- f2/utils/utils.py | 1 + 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 6404be5..d2d030d 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -173,13 +173,15 @@ def images(self): images_list = self._get_list_attr_value("$.aweme_list[*].images") return [ - [ - img["url_list"][0] - for img in images - if isinstance(img, dict) and "url_list" in img and img["url_list"] - ] - if images - else None + ( + [ + img["url_list"][0] + for img in images + if isinstance(img, dict) and "url_list" in img and img["url_list"] + ] + if images + else None + ) for images in images_list ] @@ -193,9 +195,11 @@ def animated_cover(self): # 逐个视频判断是否存在animated_cover animated_covers = [ - video.get("animated_cover", {}).get("url_list", [None])[0] - if video.get("animated_cover") - else None + ( + video.get("animated_cover", {}).get("url_list", [None])[0] + if video.get("animated_cover") + else None + ) for video in videos ] @@ -216,11 +220,15 @@ def video_bit_rate(self): bit_rate_data = self._get_list_attr_value("$.aweme_list[*].video.bit_rate") return [ - [aweme["bit_rate"]] - if isinstance(aweme, dict) - else [aweme[0]["bit_rate"]] - if len(aweme) == 1 - else [item["bit_rate"] for item in aweme] + ( + [aweme["bit_rate"]] + if isinstance(aweme, dict) + else ( + [aweme[0]["bit_rate"]] + if len(aweme) == 1 + else [item["bit_rate"] for item in aweme] + ) + ) for aweme in bit_rate_data ] @@ -724,11 +732,15 @@ def video_bit_rate(self): ) return [ - [aweme["bit_rate"]] - if isinstance(aweme, dict) - else [aweme[0]["bit_rate"]] - if len(aweme) == 1 - else [item["bit_rate"] for item in aweme] + ( + [aweme["bit_rate"]] + if isinstance(aweme, dict) + else ( + [aweme[0]["bit_rate"]] + if len(aweme) == 1 + else [item["bit_rate"] for item in aweme] + ) + ) for aweme in bit_rate_data ] diff --git a/f2/utils/utils.py b/f2/utils/utils.py index b0cfe5e..62d1eaa 100644 --- a/f2/utils/utils.py +++ b/f2/utils/utils.py @@ -196,6 +196,7 @@ def replaceT(obj: Union[str, Any]) -> Union[str, Any]: if isinstance(obj, str): return re.sub(reSub, "_", obj) + return obj # raise TypeError("输入应为字符串或字符串列表") From a62053c68ef35fcb48ca6919ac8820f082a90e6e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 17:38:15 +0800 Subject: [PATCH 013/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0douyin=E7=94=A8?= =?UTF-8?q?=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E8=BF=87=E6=BB=A4=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 101 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index d2d030d..98b73b1 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -330,6 +330,107 @@ def max_cursor(self): return self._get_attr_value("$.cursor") +class UserCollectsFilter(JSONModel): + + @property + def max_cursor(self): + return self._get_attr_value("$.cursor") + + @property + def status_code(self): + return self._get_attr_value("$.status_code") + + @property + def total_number(self): + return self._get_attr_value("$.total_number") + + @property + def has_more(self): + return bool(self._get_attr_value("$.has_more")) + + @property + def app_id(self): + return self._get_attr_value("$.collects_list[*].app_id") + + @property + def collects_cover(self): + return self._get_attr_value("$.collects_list[*].collects_cover.url_list[0]") + + @property + def collects_id(self): + return self._get_attr_value("$.collects_list[*].collects_id") + + @property + def collects_name(self): + return self._get_attr_value("$.collects_list[*].collects_name") + + @property + def create_time(self): + return timestamp_2_str(self._get_attr_value("$.collects_list[*].create_time")) + + @property + def follow_status(self): + return self._get_attr_value("$.collects_list[*].follow_status") + + @property + def followed_count(self): + return self._get_attr_value("$.collects_list[*].followed_count") + + @property + def is_normal_status(self): + return self._get_attr_value("$.collects_list[*].is_normal_status") + + @property + def item_type(self): + return self._get_attr_value("$.collects_list[*].item_type") + + @property + def last_collect_time(self): + return timestamp_2_str( + self._get_attr_value("$.collects_list[*].last_collect_time") + ) + + @property + def play_count(self): + return self._get_attr_value("$.collects_list[*].play_count") + + @property + def states(self): + return self._get_attr_value("$.collects_list[*].states") + + @property + def status(self): + return self._get_attr_value("$.collects_list[*].status") + + @property + def system_type(self): + return self._get_attr_value("$.collects_list[*].system_type") + + @property + def total_number(self): + return self._get_attr_value("$.collects_list[*].total_number") + + @property + def user_id(self): + return self._get_attr_value("$.collects_list[*].user_id") + + # user_info + @property + def nickname(self): + return replaceT(self._get_attr_value("$.collects_list[*].user_info.nickname")) + + @property + def uid(self): + return self._get_attr_value("$.collects_list[*].user_info.uid") + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + class UserMixFilter(UserPostFilter): def __init__(self, data): super().__init__(data) From d8bf0323e8393e64f81c6530e7410b178da90602 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 17:39:29 +0800 Subject: [PATCH 014/164] =?UTF-8?q?=E5=88=A0=E9=99=A4douyin=20`filter`?= =?UTF-8?q?=E7=9A=84lambda=E6=96=B9=E6=B3=95=E6=B3=A8=E9=87=8A=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 减少代码体积 --- f2/apps/douyin/filter.py | 85 ---------------------------------------- 1 file changed, 85 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 98b73b1..fa3b4f2 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -446,91 +446,6 @@ def __init__(self, data): class PostDetailFilter(JSONModel): - # api_status_code = property(lambda self: self._get_attr_value("$.status_code")) - # # author - # nickname = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.author.nickname"))) - # sec_user_id = property(lambda self: self._get_attr_value("$.aweme_detail.author.sec_uid")) - # short_id = property(lambda self: self._get_attr_value("$.aweme_detail.author.short_id")) - # uid = property(lambda self: self._get_attr_value("$.aweme_detail.author.uid")) - # unique_id = property(lambda self: self._get_attr_value("$.aweme_detail.author.unique_id")) - - # can_comment = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_comment")) - # can_forward = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_forward")) - # can_share = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_share")) - # can_show_comment = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_show_comment")) - # aweme_type = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.aweme_type")) - # aweme_id = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_id")) - # comment_gid = property(lambda self: self._get_attr_value("$.aweme_detail.comment_gid")) - # create_time = property(lambda self: timestamp_2_str(self._get_attr_value("$.aweme_detail.create_time"))) - # desc = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.desc"))) - # duration = property(lambda self: self._get_attr_value("$.aweme_detail.duration")) - # is_ads = property(lambda self: self._get_attr_value("$.aweme_detail.is_ads")) - # is_story = property(lambda self: self._get_attr_value("$.aweme_detail.is_story")) - # is_top = property(lambda self: self._get_attr_value("$.aweme_detail.is_top")) - # video_bit_rate = property(lambda self: [ - # [aweme['bit_rate']] if isinstance(aweme, dict) - # else [aweme[0]['bit_rate']] if len(aweme) == 1 - # else [item['bit_rate'] for item in aweme] - # for aweme in self._get_list_attr_value("$.aweme_detail.video.bit_rate") - # ]) - # video_play_addr = property(lambda self: self._get_attr_value("$.aweme_detail.video.play_addr.url_list[0]")) - # images = property(lambda self: [ - # [img['url_list'][0] for img in images if isinstance(img, dict) and 'url_list' in img and img['url_list']] - # if images else None - # for images in self._get_list_attr_value("$.aweme_detail.images") - # ]) - - # # aweme status - # is_delete = property(lambda self: self._get_attr_value("$.aweme_detail.status.is_delete")) - # is_prohibited = property(lambda self: self._get_attr_value("$.aweme_detail.status.is_prohibited")) - - # is_long_video = property(lambda self: self._get_attr_value("$.aweme_detail.long_video")) - # media_type = property(lambda self: self._get_attr_value("$.aweme_detail.media_type")) - # # mix - # mix_desc = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.mix_info.mix_desc"))) - # mix_create_time = property(lambda self: timestamp_2_str(self._get_attr_value("$.aweme_detail.mix_info.mix_create_time"))) - # mix_id = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_id")) - # mix_name = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_name")) - # mix_pic_type = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_pic_type")) - # mix_type = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_type")) - # mix_share_url = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_share_url")) - # mix_update_time = property(lambda self: timestamp_2_str(self._get_attr_value("$.aweme_detail.mix_info.mix_update_time"))) - # # music - # is_commerce_music = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_commerce_music")) - # is_original = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_original")) - # is_original_sound = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_original_sound")) - # is_pgc = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_pgc")) - # music_author = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.author"))) - # music_author_deleted = property(lambda self: self._get_attr_value("$.aweme_detail.music.author_deleted")) - # music_duration = property(lambda self: self._get_attr_value("$.aweme_detail.music.duration")) - # music_id = property(lambda self: self._get_attr_value("$.aweme_detail.music.id")) - # music_id_str = property(lambda self: self._get_attr_value("$.aweme_detail.music.id_str")) - # music_mid = property(lambda self: self._get_attr_value("$.aweme_detail.music.mid")) - # pgc_author = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author"))) - # pgc_author_title = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author_title"))) - # pgc_music_type = property(lambda self: self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_music_type")) - # music_status = property(lambda self: self._get_attr_value("$.aweme_detail.music.status")) - # music_owner_handle = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.owner_handle"))) - # music_owner_id = property(lambda self: self._get_attr_value("$.aweme_detail.music.owner_id")) - # music_owner_nickname = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.owner_nickname"))) - # music_play_url = property(lambda self: self._get_attr_value("$.aweme_detail.music.play_url.url_list[0]")) - - # # position - # position = property(lambda self: self._get_attr_value("$.aweme_detail.position")) - # # region = property(lambda self: self._get_attr_value("$.aweme_detail.region")) - - # # seo_ocr_content - # seo_ocr_content = property(lambda self: self._get_attr_value("$.aweme_detail.seo_info.seo_ocr_content")) - - # admire_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.admire_count")) - # collect_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.collect_count")) - # comment_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.comment_count")) - # digg_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.digg_count")) - # # play_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.play_count")) - # share_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.share_count")) - - # hashtag_ids = property(lambda self: self._get_list_attr_value("$.aweme_detail.text_extra[*].hashtag_id")) - # hashtag_names = property(lambda self: self._get_list_attr_value("$.aweme_detail.text_extra[*].hashtag_name")) @property def api_status_code(self): From 96b17b7488740d14854ad5e57e33ce31846ab9ca Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 2 Mar 2024 18:14:15 +0800 Subject: [PATCH 015/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0douyin=E7=94=A8?= =?UTF-8?q?=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E8=BF=87=E6=BB=A4=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 对[*]使用self._get_list_attr_value方法 --- f2/apps/douyin/filter.py | 50 +++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index fa3b4f2..9c72b42 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -350,78 +350,90 @@ def has_more(self): @property def app_id(self): - return self._get_attr_value("$.collects_list[*].app_id") + return self._get_list_attr_value("$.collects_list[*].app_id") @property def collects_cover(self): - return self._get_attr_value("$.collects_list[*].collects_cover.url_list[0]") + return self._get_list_attr_value( + "$.collects_list[*].collects_cover.url_list[0]" + ) @property def collects_id(self): - return self._get_attr_value("$.collects_list[*].collects_id") + return self._get_list_attr_value("$.collects_list[*].collects_id") @property def collects_name(self): - return self._get_attr_value("$.collects_list[*].collects_name") + return self._get_list_attr_value("$.collects_list[*].collects_name") @property def create_time(self): - return timestamp_2_str(self._get_attr_value("$.collects_list[*].create_time")) + create_times = self._get_list_attr_value("$.collects_list[*].create_time") + return ( + [timestamp_2_str(ct) for ct in create_times] + if isinstance(create_times, list) + else timestamp_2_str(create_times) + ) @property def follow_status(self): - return self._get_attr_value("$.collects_list[*].follow_status") + return self._get_list_attr_value("$.collects_list[*].follow_status") @property def followed_count(self): - return self._get_attr_value("$.collects_list[*].followed_count") + return self._get_list_attr_value("$.collects_list[*].followed_count") @property def is_normal_status(self): - return self._get_attr_value("$.collects_list[*].is_normal_status") + return self._get_list_attr_value("$.collects_list[*].is_normal_status") @property def item_type(self): - return self._get_attr_value("$.collects_list[*].item_type") + return self._get_list_attr_value("$.collects_list[*].item_type") @property def last_collect_time(self): - return timestamp_2_str( - self._get_attr_value("$.collects_list[*].last_collect_time") + create_times = self._get_list_attr_value("$.collects_list[*].last_collect_time") + return ( + [timestamp_2_str(ct) for ct in create_times] + if isinstance(create_times, list) + else timestamp_2_str(create_times) ) @property def play_count(self): - return self._get_attr_value("$.collects_list[*].play_count") + return self._get_list_attr_value("$.collects_list[*].play_count") @property def states(self): - return self._get_attr_value("$.collects_list[*].states") + return self._get_list_attr_value("$.collects_list[*].states") @property def status(self): - return self._get_attr_value("$.collects_list[*].status") + return self._get_list_attr_value("$.collects_list[*].status") @property def system_type(self): - return self._get_attr_value("$.collects_list[*].system_type") + return self._get_list_attr_value("$.collects_list[*].system_type") @property def total_number(self): - return self._get_attr_value("$.collects_list[*].total_number") + return self._get_list_attr_value("$.collects_list[*].total_number") @property def user_id(self): - return self._get_attr_value("$.collects_list[*].user_id") + return self._get_list_attr_value("$.collects_list[*].user_id") # user_info @property def nickname(self): - return replaceT(self._get_attr_value("$.collects_list[*].user_info.nickname")) + return replaceT( + self._get_list_attr_value("$.collects_list[*].user_info.nickname") + ) @property def uid(self): - return self._get_attr_value("$.collects_list[*].user_info.uid") + return self._get_list_attr_value("$.collects_list[*].user_info.uid") def _to_dict(self) -> dict: return { From 58cfa828109e96fe9a6710b5922d865d057b1f54 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 19:51:09 +0800 Subject: [PATCH 016/164] =?UTF-8?q?=E4=BC=98=E5=8C=96douyin=20`utils`?= =?UTF-8?q?=E4=B8=ADmsToken=E5=AF=B9=E5=85=B7=E4=BD=93=E8=AF=B7=E6=B1=82?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、优化日志内容输出 2、优化http状态码异常 --- f2/apps/douyin/utils.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 0ad9702..b56752b 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -67,40 +67,45 @@ def gen_real_msToken(cls) -> str: with httpx.Client(transport=transport, proxies=cls.proxies) as client: try: response = client.post( - cls.token_conf["url"], headers=headers, content=payload + cls.token_conf["url"], content=payload, headers=headers ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("由于某些错误, 无法获取msToken")) - elif response.status_code == 404: - raise APINotFoundError(_("无法找到API端点")) + response.raise_for_status() msToken = str(httpx.Cookies(response.cookies).get("msToken")) - if len(msToken) not in [120, 128]: - raise APIResponseError( - _( - "msToken: 请检查并更新 f2 中 conf.yaml 配置文件中的 msToken,以匹配 douyin 新规则。" - ) - ) + raise APIResponseError(_("msToken内容不符合要求。")) return msToken - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.token_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0} 代理:{1} 异常类名:{2} 异常详细信息:{3}" + ).format(cls.token_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if e.response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败, 请更新F2配置文件中的 msToken,以匹配 douyin 新规则" + ) + ) + elif e.response.status_code == 404: + raise APINotFoundError(_("msToken无法找到API端点")) + else: + raise APIResponseError( + _( + "链接:{0} 状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) + ) + ) except APIError as e: + logger.error(_("msToken API错误:{0}").format(e)) logger.info(_("生成虚假的msToken")) return cls.gen_false_msToken() From 67664ff5f87563dd03983e3899d15fb5480fb80d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:31:11 +0800 Subject: [PATCH 017/164] =?UTF-8?q?=E4=BC=98=E5=8C=96douyin=20`utils`?= =?UTF-8?q?=E4=B8=ADttwid=E5=AF=B9=E5=85=B7=E4=BD=93=E8=AF=B7=E6=B1=82?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、优化日志内容输出 2、优化http状态码异常 --- f2/apps/douyin/utils.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index b56752b..66f705a 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -127,28 +127,37 @@ def gen_ttwid(cls) -> str: response = client.post( cls.ttwid_conf["url"], content=cls.ttwid_conf["data"] ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("由于某些错误, 无法获取ttwid")) - elif response.status_code == 404: - raise APINotFoundError(_("无法找到API端点")) + response.raise_for_status() ttwid = str(httpx.Cookies(response.cookies).get("ttwid")) return ttwid - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if e.response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新F2配置文件中的 ttwid,以匹配 douyin 新规则" + ) + ) + elif e.response.status_code == 404: + raise APINotFoundError(_("ttwid无法找到API端点")) + else: + raise APIResponseError( + _( + "链接:{0},状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) + ) + ) class VerifyFpManager: From fce93c4703d3dbdb30074bced0f9f1e76ad5dda8 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:31:37 +0800 Subject: [PATCH 018/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 改用中文符号 --- f2/apps/douyin/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 66f705a..6a1d6da 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -81,7 +81,7 @@ def gen_real_msToken(cls) -> str: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "请求端点失败,请检查当前网络环境。 链接:{0} 代理:{1} 异常类名:{2} 异常详细信息:{3}" + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" ).format(cls.token_conf["url"], cls.proxies, cls.__name__, exc) ) @@ -90,7 +90,7 @@ def gen_real_msToken(cls) -> str: if e.response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败, 请更新F2配置文件中的 msToken,以匹配 douyin 新规则" + "参数验证失败,请更新F2配置文件中的 msToken,以匹配 douyin 新规则" ) ) elif e.response.status_code == 404: @@ -98,7 +98,7 @@ def gen_real_msToken(cls) -> str: else: raise APIResponseError( _( - "链接:{0} 状态码 {1}:{2} ".format( + "链接:{0},状态码 {1}:{2} ".format( e.response.url, e.response.status_code, e.response.text ) ) From 091e38219627c95771103bccfe8aea70bbf5e28c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:41:46 +0800 Subject: [PATCH 019/164] =?UTF-8?q?=E4=BC=98=E5=8C=96douyin=20`utils`?= =?UTF-8?q?=E4=B8=ADsec=5Fuser=5Fid=E5=AF=B9=E5=85=B7=E4=BD=93=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E9=94=99=E8=AF=AF=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、优化日志内容输出 2、优化http状态码异常 --- f2/apps/douyin/utils.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 6a1d6da..dfee3f0 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -266,7 +266,7 @@ async def get_sec_user_id(cls, url: str) -> str: transport=transport, proxies=TokenManager.proxies, timeout=10 ) as client: response = await client.get(url, follow_redirects=True) - + # 444一般为Nginx拦截,不返回状态 (444 is generally intercepted by Nginx and does not return status) if response.status_code in {200, 444}: match = pattern.search(str(response.url)) if match: @@ -274,7 +274,7 @@ async def get_sec_user_id(cls, url: str) -> str: else: raise APIResponseError( _( - "未在响应的地址中找到sec_user_id, 检查链接是否为用户主页类名: {0}".format( + "未在响应的地址中找到sec_user_id,检查链接是否为用户主页类名:{0}".format( cls.__name__ ) ) @@ -282,31 +282,30 @@ async def get_sec_user_id(cls, url: str) -> str: elif response.status_code == 401: raise APIUnauthorizedError( - _("未授权的请求。类名: {0}".format(cls.__name__)) + _("未授权的请求。类名:{0}".format(cls.__name__)) ) elif response.status_code == 404: raise APINotFoundError( - _("未找到API端点。类名: {0}".format(cls.__name__)) + _("未找到API端点。类名:{0}".format(cls.__name__)) ) elif response.status_code == 503: raise APIUnavailableError( - _("API服务不可用。类名: {0}".format(cls.__name__)) + _("API服务不可用。类名:{0}".format(cls.__name__)) ) else: - raise APIError( - _("API错误码:{0}。类名: {1}").format( - response.status_code, cls.__name__ + raise APIResponseError( + _( + "链接:{0},状态码 {1}:{2} ".format( + response.url, response.status_code, response.text + ) ) ) - except httpx.RequestError: + except httpx.RequestError as exc: raise APIConnectionError( _( - "连接到API时发生错误,请检查URL或网络情况。类名: {0}".format( - cls.__name__ - ) - ), - url, + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) @classmethod From 170a30495080b7ffa37d4ff042d18c6940172f94 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:47:19 +0800 Subject: [PATCH 020/164] =?UTF-8?q?=E4=BC=98=E5=8C=96douyin=20`utils`?= =?UTF-8?q?=E4=B8=ADaweme=5Fid=E5=AF=B9=E5=85=B7=E4=BD=93=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E9=94=99=E8=AF=AF=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/utils.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index dfee3f0..a702312 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -372,6 +372,7 @@ async def get_aweme_id(cls, url: str) -> str: ) as client: try: response = await client.get(url, follow_redirects=True) + response.raise_for_status() video_pattern = cls._DOUYIN_VIDEO_URL_PATTERN note_pattern = cls._DOUYIN_NOTE_URL_PATTERN @@ -385,18 +386,24 @@ async def get_aweme_id(cls, url: str) -> str: aweme_id = match.group(1) else: raise APIResponseError( - _("未在响应的地址中找到aweme_id, 检查链接是否为作品页") + _("未在响应的地址中找到aweme_id,检查链接是否为作品页") ) return aweme_id - except httpx.RequestError: + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format( - url, - TokenManager.proxies, - cls.__name__, + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) + ) + + except httpx.HTTPStatusError as e: + raise APIResponseError( + _( + "链接:{0},状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) ) ) From c2bcda13065965a1ad51587a2db7f50966ada11e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:48:33 +0800 Subject: [PATCH 021/164] =?UTF-8?q?=E4=BC=98=E5=8C=96douyin=20`utils`?= =?UTF-8?q?=E5=AF=B9webcast=5Fid=E7=9A=84=E6=AD=A3=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修改后的正则表达式将匹配http与https --- f2/apps/douyin/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index a702312..bab7fdf 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -447,7 +447,7 @@ class WebCastIdFetcher: _DOUYIN_LIVE_URL_PATTERN = re.compile(r"live/([^/?]*)") # https://live.douyin.com/766545142636?cover_type=0&enter_from_merge=web_live&enter_method=web_card&game_name=&is_recommend=1&live_type=game&more_detail=&request_id=20231110224012D47CD00C18B4AE4BFF9B&room_id=7299828646049827596&stream_type=vertical&title_type=1&web_live_page=hot_live&web_live_tab=all # https://live.douyin.com/766545142636 - _DOUYIN_LIVE_URL_PATTERN2 = re.compile(r"https://live.douyin.com/(\d+)") + _DOUYIN_LIVE_URL_PATTERN2 = re.compile(r"http[s]?://live.douyin.com/(\d+)") # https://webcast.amemv.com/douyin/webcast/reflow/7318296342189919011?u_code=l1j9bkbd&did=MS4wLjABAAAAEs86TBQPNwAo-RGrcxWyCdwKhI66AK3Pqf3ieo6HaxI&iid=MS4wLjABAAAA0ptpM-zzoliLEeyvWOCUt-_dQza4uSjlIvbtIazXnCY&with_sec_did=1&use_link_command=1&ecom_share_track_params=&extra_params={"from_request_id":"20231230162057EC005772A8EAA0199906","im_channel_invite_id":"0"}&user_id=3644207898042206&liveId=7318296342189919011&from=share&style=share&enter_method=click_share&roomId=7318296342189919011&activity_info={} _DOUYIN_LIVE_URL_PATTERN3 = re.compile(r"reflow/([^/?]*)") From d3e36b3a766a6be1ae810206f7bbdcda64ddc012 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:52:26 +0800 Subject: [PATCH 022/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0douyin=20`utils`?= =?UTF-8?q?=E4=B8=ADwebcast=5Fid=E5=AF=B9=E5=85=B7=E4=BD=93=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E9=94=99=E8=AF=AF=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、添加了对webcast_id的错误捕获 2、优化日志内容输出 3、优化http状态码异常 --- f2/apps/douyin/utils.py | 68 ++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index bab7fdf..a7e4888 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -473,37 +473,55 @@ async def get_webcast_id(cls, url: str) -> str: raise ( APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) ) + try: + # 重定向到完整链接 + transport = httpx.AsyncHTTPTransport(retries=5) + async with httpx.AsyncClient( + transport=transport, proxies=TokenManager.proxies, timeout=10 + ) as client: + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + url = str(response.url) + + live_pattern = cls._DOUYIN_LIVE_URL_PATTERN + live_pattern2 = cls._DOUYIN_LIVE_URL_PATTERN2 + live_pattern3 = cls._DOUYIN_LIVE_URL_PATTERN3 + + if live_pattern.search(url): + match = live_pattern.search(url) + elif live_pattern2.search(url): + match = live_pattern2.search(url) + elif live_pattern3.search(url): + match = live_pattern3.search(url) + logger.warning( + _( + "该链接返回的是room_id,请使用`fetch_user_live_videos_by_room_id`接口" + ) + ) + else: + raise APIResponseError( + _("未在响应的地址中找到webcast_id,检查链接是否为直播页") + ) - # 重定向到完整链接 - transport = httpx.AsyncHTTPTransport(retries=5) - async with httpx.AsyncClient( - transport=transport, proxies=TokenManager.proxies, timeout=10 - ) as client: - response = await client.get(url, follow_redirects=True) - url = str(response.url) - - live_pattern = cls._DOUYIN_LIVE_URL_PATTERN - live_pattern2 = cls._DOUYIN_LIVE_URL_PATTERN2 - live_pattern3 = cls._DOUYIN_LIVE_URL_PATTERN3 - - if live_pattern.search(url): - match = live_pattern.search(url) - elif live_pattern2.search(url): - match = live_pattern2.search(url) - elif live_pattern3.search(url): - match = live_pattern3.search(url) - logger.debug( + return match.group(1) + + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) + raise APIConnectionError( _( - "该链接返回的是room_id,请使用`fetch_user_live_videos_by_room_id`接口" - ) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) - else: + + except httpx.HTTPStatusError as e: raise APIResponseError( - _("未在响应的地址中找到webcast_id, 检查链接是否为直播页") + _( + "链接:{0},状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) + ) ) - return match.group(1) - @classmethod async def get_all_webcast_id(cls, urls: list) -> list: """ From 895b989f0818a1106c80c8a54cfc77f56332258a Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 20:55:34 +0800 Subject: [PATCH 023/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index a7e4888..83b8634 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -437,6 +437,7 @@ async def get_all_aweme_id(cls, urls: list) -> list: class MixIdFetcher: + # 获取方法同AwemeIdFetcher @classmethod async def get_mix_id(cls, url: str) -> str: return @@ -694,11 +695,12 @@ def create_or_rename_user_folder( def show_qrcode(qrcode_url: str, show_image: bool = False) -> None: """ - 显示二维码 + 显示二维码 (Show QR code) Args: - qrcode_url (str): 登录二维码链接 + qrcode_url (str): 登录二维码链接 (Login QR code link) show_image (bool): 是否显示图像,True 表示显示,False 表示在控制台显示 + (Whether to display the image, True means display, False means display in the console) """ if show_image: # 创建并显示QR码图像 From 54a5fc41dc8d571bc8b5acf36bf4b0ee885fd6df Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:29:39 +0800 Subject: [PATCH 024/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0douyin=20`filter`?= =?UTF-8?q?=E5=AF=B9=E9=9D=9E=E6=B3=95=E6=94=B6=E8=97=8F=E5=A4=B9=E5=90=8D?= =?UTF-8?q?=E5=AD=97=E7=AC=A6=E7=9A=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 9c72b42..13024dd 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -364,7 +364,7 @@ def collects_id(self): @property def collects_name(self): - return self._get_list_attr_value("$.collects_list[*].collects_name") + return replaceT(self._get_list_attr_value("$.collects_list[*].collects_name")) @property def create_time(self): From 4163ee5a89e3af5d39e912c9b8b7c07515c58dde Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:32:04 +0800 Subject: [PATCH 025/164] =?UTF-8?q?=E6=98=8E=E7=A1=AE=E4=BA=86douyin=20`ha?= =?UTF-8?q?ndler`=E7=9A=84=E9=83=A8=E5=88=86=E5=87=BD=E6=95=B0=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 1caff78..0d4f93d 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -2,6 +2,7 @@ import asyncio from typing import AsyncGenerator, Dict, Any, List +from pathlib import Path from f2.log.logger import logger from f2.i18n.translator import _ @@ -94,7 +95,7 @@ async def get_user_nickname(self, sec_user_id: str, db: AsyncUserDB) -> str: async def get_or_add_user_data( self, kwargs: dict, sec_user_id: str, db: AsyncUserDB - ) -> Any: + ) -> Path: """ 获取或创建用户数据同时创建用户目录 (Get or create user data and create user directory) @@ -493,6 +494,9 @@ async def fetch_user_collection_videos( videos_collected += len(aweme_data_list) max_cursor = video.max_cursor + ) -> Union[str, List[str]]: + ) -> AsyncGenerator[UserCollectsFilter, None]: + ) -> AsyncGenerator[List[Dict[str, Any]], None]: @mode_handler("mix") async def handle_user_mix(self): """ From 1335b97c58388b415fa4fede75ce40fa4f07bfca Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:32:52 +0800 Subject: [PATCH 026/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 0d4f93d..043843e 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -1,8 +1,8 @@ # path: f2/apps/douyin/handler.py import asyncio -from typing import AsyncGenerator, Dict, Any, List from pathlib import Path +from typing import AsyncGenerator, Union, Dict, Any, List from f2.log.logger import logger from f2.i18n.translator import _ @@ -236,9 +236,9 @@ async def handle_user_post(self): async def fetch_user_post_videos( self, sec_user_id: str, - max_cursor: int = 0, - page_counts: int = 20, - max_counts: int = None, + max_cursor: int, + page_counts: int, + max_counts: int, ): """ 用于获取指定用户发布的视频列表。 @@ -341,9 +341,9 @@ async def handle_user_like(self): async def fetch_user_like_videos( self, sec_user_id: str, - max_cursor: int = 0, - page_counts: int = 20, - max_counts: int = None, + max_cursor: int, + page_counts: int, + max_counts: int, ) -> AsyncGenerator[List[Dict[str, Any]], None]: """ 用于获取指定用户喜欢的视频列表。 @@ -440,8 +440,6 @@ async def fetch_user_collection_videos( """ 用于获取指定用户收藏的视频列表。 (Used to get the list of videos collected by the specified user.) - 该接口需要用POST且只靠cookie来获取数据。 - (This interface needs to be POST and only relies on cookies to get data.) Args: max_cursor: int: 起始页 (Start page) @@ -452,6 +450,10 @@ async def fetch_user_collection_videos( aweme_data: dict: 视频数据字典, 包含视频ID列表、视频文案、作者昵称、起始页 (Video data dictionary, including video ID list, video description, author nickname, start page) + + Note: + 该接口需要用POST且只靠cookie来获取数据。 + (This interface needs to use POST and only rely on cookies to obtain data.) """ max_counts = max_counts or float("inf") @@ -728,9 +730,9 @@ async def handle_user_feed(self): async def fetch_user_feed_videos( self, sec_user_id: str, - max_cursor: int = 0, - page_counts: int = 20, - max_counts: int = None, + max_cursor: int, + page_counts: int, + max_counts: int, ) -> AsyncGenerator[List[Dict[str, Any]], None]: """ 用于获取指定用户feed的视频列表。 From 2c18ae8284f8c071046402178b95062c3810a4ed Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:34:33 +0800 Subject: [PATCH 027/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0douyin=20`handler`?= =?UTF-8?q?=E5=AF=B9=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C=E5=93=81=E7=9A=84?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 227 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 043843e..dfc4f17 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -16,6 +16,8 @@ UserProfile, UserLike, UserCollection, + UserCollects, + UserCollectsVideo, UserMix, PostDetail, UserLive, @@ -27,6 +29,7 @@ UserPostFilter, UserProfileFilter, UserCollectionFilter, + UserCollectsFilter, UserMixFilter, PostDetailFilter, UserLiveFilter, @@ -496,9 +499,233 @@ async def fetch_user_collection_videos( videos_collected += len(aweme_data_list) max_cursor = video.max_cursor + @mode_handler("collects") + async def handle_user_collects(self): + """ + 用于处理用户收藏夹的视频 (Used to process videos in user collections) + + Args: + kwargs: dict: 参数字典 (Parameter dictionary) + """ + + max_cursor = self.kwargs.get("max_cursor", 0) + page_counts = self.kwargs.get("page_counts", 20) + max_counts = self.kwargs.get("max_counts") + # 由于无法在Web端获取收藏夹的URL,因此无法通过URL来获取收藏夹作品。 + # Web端收藏夹作品的接口只能通过登录的cookie获取,与配置的URL无关。 + # 因此,即使填写了其他人的URL,也只能获取到你自己的收藏夹作品。 + # 此外,收藏夹作品的文件夹将根据所配置的URL主页用户名来确定。 + # 为避免将文件下载到其他人的文件夹下,请务必确保填写的URL是你自己的主页URL。 + sec_user_id = await SecUserIdFetcher.get_sec_user_id(self.kwargs.get("url")) + + async with AsyncUserDB("douyin_users.db") as db: + user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) + + async for collects in self.fetch_user_collects( + max_cursor, page_counts, max_counts + ): + choose_collects_id = await self.select_user_collects(collects) + + if isinstance(choose_collects_id, str): + choose_collects_id = [choose_collects_id] + + for collects_id in choose_collects_id: + # 由于收藏夹作品包含在用户名下且存在收藏夹名,因此将额外创建收藏夹名的文件夹 + # 将会根据是否设置了 --folderize 参数来决定是否创建收藏夹名的文件夹 + # 例如: 用户名/收藏夹名/作品名.mp4 + if self.kwargs.get("folderize"): + tmp_user_path = user_path + tmp_user_path = ( + tmp_user_path + / collects.collects_name[ + collects.collects_id.index(collects_id) + ] + ) + else: + tmp_user_path = user_path + + async for aweme_data_list in self.fetch_user_collects_videos( + collects_id, max_cursor, page_counts, max_counts + ): + await self.downloader.create_download_tasks( + self.kwargs, aweme_data_list, tmp_user_path + ) + + async def select_user_collects( + self, collects: UserCollectsFilter ) -> Union[str, List[str]]: + """ + 用于选择收藏夹 + (Used to select the collection) + + Args: + collects: UserCollectsFilter: 收藏夹列表过滤器 (Collection list Filter) + + Return: + collects_id: Union[str, List[str]]: 选择的收藏夹ID (Selected collects_id) + """ + + rich_console.print(_("0: [bold]全部下载[/bold]")) + for i in range(len(collects.collects_id)): + rich_console.print( + _( + "{0}: {1} (包含 {2} 个作品,收藏夹ID {3})".format( + i + 1, + collects.collects_name[i], + collects.total_number[i], + collects.collects_id[i], + ) + ) + ) + + # rich_prompt 会有字符刷新问题,暂时使用rich_print + rich_console.print(_("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]")) + selected_index = int( + rich_prompt.ask( + # _("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]"), + choices=[str(i) for i in range(len(collects.collects_id) + 1)], + ) + ) + + if selected_index == 0: + return collects.collects_id + else: + return collects.collects_id[selected_index - 1] + + async def fetch_user_collects( + self, max_cursor: int, page_counts: int, max_counts: int ) -> AsyncGenerator[UserCollectsFilter, None]: + """ + 用于获取指定用户收藏夹。 + (Used to get the list of videos in the specified user's collection.) + + Args: + max_cursor: int: 起始页 (Page cursor) + page_counts: int: 每页收藏夹数 (Page counts) + max_counts: int: 最大收藏夹数 (Max counts) + + Return: + collects: AsyncGenerator[UserCollectsFilter, None]: 收藏夹列表过滤器 (Collection list Filter) + """ + + max_counts = max_counts or float("inf") + collected = 0 + + while collected < max_counts: + logger.debug(_("开始爬取用户收藏夹")) + logger.debug("=====================================") + logger.debug( + _("当前请求的max_cursor: {0}, max_counts: {1}").format( + max_cursor, max_counts + ) + ) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserCollects(cursor=max_cursor, count=page_counts) + response = await crawler.fetch_user_collects(params) + collects = UserCollectsFilter(response) + + logger.debug( + _("收藏夹ID: {0} 收藏夹标题: {1}").format( + collects.collects_id, collects.collects_name + ) + ) + logger.debug("=====================================") + + yield collects + + if not collects.has_more: + logger.info(_("所有收藏夹ID采集完毕")) + break + + # 更新已经处理的收藏夹数量 (Update the number of collections processed) + collected += len(collects.collects_id) + max_cursor = collects.max_cursor + + logger.debug(_("用户收藏夹爬取结束")) + + async def fetch_user_collects_videos( + self, + collects_id: str, + max_cursor: int, + page_counts: int, + max_counts: int, ) -> AsyncGenerator[List[Dict[str, Any]], None]: + """ + 用于获取指定用户收藏夹的视频列表。 + (Used to get the list of videos in the specified user's collection.) + + Args: + collects_id: str: 收藏夹ID (Collection ID) + max_cursor: int: 起始页 (Page cursor) + page_counts: int: 每页视频数 (Number of videos per page) + max_counts: int: 最大视频数 (Maximum number of videos) + + Return: + aweme_data: dict: 视频数据字典, 包含视频ID列表、视频文案、作者昵称、起始页 + (Video data dictionary, including video ID list, video description, + author nickname, start page) + """ + + max_counts = max_counts or float("inf") + videos_collected = 0 + + logger.debug(_("开始爬取收藏夹: {0} 的视频").format(collects_id)) + + while videos_collected < max_counts: + current_request_size = min(page_counts, max_counts - videos_collected) + + logger.debug("=====================================") + logger.debug( + _("最大数量: {0} 每次请求数量: {1}").format( + max_counts, current_request_size + ) + ) + logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserCollectsVideo( + cursor=max_cursor, + count=current_request_size, + collects_id=collects_id, + ) + response = await crawler.fetch_user_collects_video(params) + video = UserCollectionFilter(response) + + logger.debug( + "是否有作品: {0} 是否有更多: {1}".format( + video.has_aweme, video.has_more + ) + ) + if video.has_aweme: + if not video.has_more: + logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) + yield video._to_list() + break + else: + logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug( + _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + video.aweme_id, video.desc, video.nickname + ) + ) + logger.debug("=====================================") + + aweme_data_list = video._to_list() + yield aweme_data_list + + # 更新已经处理的视频数量 (Update the number of videos processed) + videos_collected += len(aweme_data_list) + max_cursor = video.max_cursor + else: + logger.debug(_("{0} 页没有找到作品".format(max_cursor))) + if not video.has_more: + logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) + break + max_cursor = video.max_cursor + + logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + @mode_handler("mix") async def handle_user_mix(self): """ From 93ec02313fea6bbb32781aae826fae7ab40f67b5 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:35:08 +0800 Subject: [PATCH 028/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86douyin=20`cr?= =?UTF-8?q?awler`=E5=AF=B9=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C=E5=93=81?= =?UTF-8?q?=E7=9A=84=E7=88=AC=E8=99=AB=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 2d892c1..a2fccbb 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -12,6 +12,8 @@ UserPost, UserLike, UserCollection, + UserCollects, + UserCollectsVideo, PostDetail, UserMix, UserLive, @@ -69,6 +71,20 @@ async def fetch_user_collection(self, params: UserCollection): logger.debug(_("主页收藏作品接口地址:" + endpoint)) return await self._fetch_post_json(endpoint, params.dict()) + async def fetch_user_collects(self, params: UserCollects): + endpoint = XBogusManager.model_2_endpoint( + dyendpoint.USER_COLLECTS, params.dict() + ) + logger.debug(_("收藏夹接口地址:" + endpoint)) + return await self._fetch_get_json(endpoint) + + async def fetch_user_collects_video(self, params: UserCollectsVideo): + endpoint = XBogusManager.model_2_endpoint( + dyendpoint.USER_COLLECTS_VIDEO, params.dict() + ) + logger.debug(_("收藏夹作品接口地址:" + endpoint)) + return await self._fetch_get_json(endpoint) + async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( dyendpoint.MIX_AWEME, params.dict() From fbb9a6fcd276ebcf92c4e03c003102323c1596d7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:35:59 +0800 Subject: [PATCH 029/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86douyin=20`mo?= =?UTF-8?q?del`=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C?= =?UTF-8?q?=E5=93=81=E7=9A=84=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/model.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index 132f9c6..7e0814a 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -98,6 +98,19 @@ class UserCollection(BaseRequestModel): count: int +class UserCollects(BaseRequestModel): + # GET + cursor: int + count: int + + +class UserCollectsVideo(BaseRequestModel): + # GET + cursor: int + count: int + collects_id: str + + class UserMix(BaseRequestModel): cursor: int count: int From 1e656d1e49d97bbd87be4729c165e7498b711024 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:37:00 +0800 Subject: [PATCH 030/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86douyin=20`ap?= =?UTF-8?q?i`=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C?= =?UTF-8?q?=E5=93=81=E7=9A=84=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/f2/apps/douyin/api.py b/f2/apps/douyin/api.py index b6eea0e..fff0cda 100644 --- a/f2/apps/douyin/api.py +++ b/f2/apps/douyin/api.py @@ -63,6 +63,12 @@ class DouyinAPIEndpoints: # 用户收藏 (User Collection) USER_COLLECTION = f"{DOUYIN_DOMAIN}/aweme/v1/web/aweme/listcollection/" + # 用户收藏夹 (User Collects) + USER_COLLECTS = f"{DOUYIN_DOMAIN}/aweme/v1/web/collects/list/" + + # 用户收藏夹作品 (User Collects Posts) + USER_COLLECTS_VIDEO = f"{DOUYIN_DOMAIN}/aweme/v1/web/collects/video/list/" + # 首页朋友作品 (Friend Feed) FRIEND_FEED = f"{DOUYIN_DOMAIN}/aweme/v1/web/familiar/feed/" From 027d54b010971d475993cc53940c3091f12367c7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:37:36 +0800 Subject: [PATCH 031/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86douyin=20`cl?= =?UTF-8?q?i`=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C?= =?UTF-8?q?=E5=93=81=E6=A8=A1=E5=BC=8F=E9=80=89=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index c84b18a..3b59183 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -302,11 +302,11 @@ def merge_config(main_conf, custom_conf, **kwargs): @click.option( "--mode", "-M", - type=click.Choice(["one", "post", "like", "collection", "mix", "live"]), + type=click.Choice(["one", "post", "like", "collection", "collects", "mix", "live"]), # default="post", # required=True, help=_( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),收藏夹作品(collects),合辑(mix),直播(live)" ), ) @click.option( From 86156e898b6f8c81ffc9e7f937ada2e74bc8db83 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:38:30 +0800 Subject: [PATCH 032/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86douyin=20`he?= =?UTF-8?q?lp`=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C?= =?UTF-8?q?=E5=93=81=E6=A8=A1=E5=BC=8F=E5=B8=AE=E5=8A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/help.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/apps/douyin/help.py b/f2/apps/douyin/help.py index b8c03ae..8d1b3b2 100644 --- a/f2/apps/douyin/help.py +++ b/f2/apps/douyin/help.py @@ -37,7 +37,7 @@ def help() -> None: "-M --mode", "[dark_cyan]Choice", _( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),收藏夹作品(collects),合辑(mix),直播(live)" ), ), ( From 7b5a9b8bd9e6d5ef0672655c04303c6a357bf0d2 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:40:50 +0800 Subject: [PATCH 033/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BA=86F2=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=8F=B7=E7=9A=84=E5=AF=BC=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、删除了冗余版本文件 2、优化了导入与使用 --- f2/cli/cli_commands.py | 8 ++++---- f2/helps.py | 14 ++++++-------- f2/utils/__version__.py | 31 ------------------------------- 3 files changed, 10 insertions(+), 43 deletions(-) delete mode 100644 f2/utils/__version__.py diff --git a/f2/cli/cli_commands.py b/f2/cli/cli_commands.py index 9bdb674..c17a532 100644 --- a/f2/cli/cli_commands.py +++ b/f2/cli/cli_commands.py @@ -1,5 +1,6 @@ # path: f2/cli/cli_command.py +import f2 import click import typing import asyncio @@ -7,7 +8,6 @@ from f2 import helps from f2.apps import __apps__ as apps_module -from f2.utils import __version__ from f2.exceptions import APIError from f2.cli.cli_console import RichConsoleManager from f2.utils._signal import SignalManager @@ -23,7 +23,7 @@ def handle_help( ) -> None: if not value or ctx.resilient_parsing: return - helps.f2() + helps.main() ctx.exit() @@ -36,7 +36,7 @@ def handle_version( if not value or ctx.resilient_parsing: return - click.echo(f"Version {__version__._version}") + click.echo(f"Version {f2.__version__}") ctx.exit() @@ -138,7 +138,7 @@ def set_cli_config(ctx, **kwargs): async def run_app(kwargs): - logger.info(f"Version {__version__._version}") + logger.info(f"Version {f2.__version__}") app_name = kwargs["app_name"] app_module = importlib.import_module(f"f2.apps.{app_name}.handler") await app_module.main(kwargs) diff --git a/f2/helps.py b/f2/helps.py index 4c2ae61..cb1dcc4 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -14,13 +14,13 @@ ------------------------------------------------- """ +import f2 import importlib from rich.console import Console from rich.panel import Panel from rich.table import Table from f2.i18n.translator import _ -from f2.utils import __version__ def get_help(app_name: str) -> None: @@ -34,15 +34,13 @@ def get_help(app_name: str) -> None: print(_("没有找到 {0} 应用").format(app_name)) -def f2() -> None: +def main() -> None: # 真彩 console = Console(color_system="truecolor") - console.print( - f"\n:rocket: [bold]f2 {__version__._version} :rocket:", justify="center" - ) - console.print(f"\n[i]{__version__._description_cn}", justify="center") - console.print(f"[i]{__version__._description_en}", justify="center") - console.print(f"[i]GitHub {__version__._repourl}\n", justify="center") + console.print(f"\n:rocket: [bold]f2 {f2.__version__} :rocket:", justify="center") + console.print(f"\n[i]{f2.__description_cn__}", justify="center") + console.print(f"[i]{f2.__description_en__}", justify="center") + console.print(f"[i]GitHub {f2.__repourl__}\n", justify="center") table = Table.grid(padding=1, pad_edge=True, expand=True) table.add_column("Website", no_wrap=True, justify="left", style="bold") diff --git a/f2/utils/__version__.py b/f2/utils/__version__.py deleted file mode 100644 index 7006cd0..0000000 --- a/f2/utils/__version__.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- -""" -@Description:__version__.py -@Date :2023/01/15 23:42:17 -@Author :JohnserfSeed -@version :0.0.1.4 -@License :(C)Copyright 2019-2022, Liugroup-NLPR-CASIA -@Github :https://github.com/johnserf-seed -@Mail :johnserf-seed@foxmail.com -------------------------------------------------- -Change Log : -2023/01/15 23:43:07 - Create __version__.py -------------------------------------------------- -""" - -_author = "JohnserfSeed" -_description_cn = "基于[red]异步[/red]的[green]全平台下载工具." -_description_en = "[yellow]Asynchronous based [/yellow]full-platform download tool." -_reponame = "f2" -_repourl = "https://github.com/Johnserf-Seed/f2" -_version = "0.0.1.4" - -__all__ = [ - "_author", - "_description_cn", - "_description_en", - "_reponame", - "_repourl", - "_version", -] From 9e87cb5c92ae99dcdcbe5a3655aec68f52af8322 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:42:24 +0800 Subject: [PATCH 034/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、black的格式化 2、去除或改动了日志输出 --- f2/apps/douyin/cli.py | 4 ++-- f2/apps/douyin/handler.py | 5 ++++- f2/dl/base_downloader.py | 1 - f2/helps.py | 16 +++++++++++++--- f2/log/logger.py | 4 +++- 5 files changed, 22 insertions(+), 8 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 3b59183..e0afd3c 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -404,14 +404,14 @@ def merge_config(main_conf, custom_conf, **kwargs): type=click.Choice(["none", "chrome", "firefox", "edge", "opera"]), # default="none", help=_( - "自动从浏览器获取[yellow]cookie[/yellow]。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取cookie。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" ), callback=handler_auto_cookie, ) @click.option( "--sso-login", is_flag=True, - help=_("使用SSO扫码登录获取[yellow]cookie[/yellow],保存低频主配置文件"), + help=_("使用SSO扫码登录获取cookie,保存低频主配置文件"), callback=handler_sso_login, ) @click.option( diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index dfc4f17..6cc2ba7 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -424,7 +424,10 @@ async def handle_user_collection(self): max_cursor = self.kwargs.get("max_cursor", 0) page_counts = self.kwargs.get("page_counts", 20) max_counts = self.kwargs.get("max_counts") - + # 由于Web端收藏作品的接口只能通过登录的cookie获取,而与配置的URL无关。 + # 因此,即使填写了其他人的URL,也只能获取到你自己的收藏作品。 + # 此外,收藏作品的文件夹将根据所配置的URL主页用户名来确定。 + # 为避免将文件下载到其他人的文件夹下,请务必确保填写的URL是你自己的主页URL。 sec_user_id = await SecUserIdFetcher.get_sec_user_id(self.kwargs.get("url")) async with AsyncUserDB("douyin_users.db") as db: diff --git a/f2/dl/base_downloader.py b/f2/dl/base_downloader.py index fb4957a..9483aa2 100644 --- a/f2/dl/base_downloader.py +++ b/f2/dl/base_downloader.py @@ -42,7 +42,6 @@ def __init__(self, kwargs: dict = {}): super().__init__(proxies=proxies, crawler_headers=self.headers) self.progress = RichConsoleManager().progress self.download_tasks = [] - logger.debug(_("BaseDownloader 请求头headers:{0}".format(self.headers))) @staticmethod def _ensure_path(path: Union[str, Path]) -> Path: diff --git a/f2/helps.py b/f2/helps.py index cb1dcc4..91c55d2 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -76,10 +76,18 @@ def main() -> None: table.add_row(_("weibo 或 wb"), _("- 获取微博")) table.add_row( - _("douyin 或 dy"), _("- 单个作品,主页作品,点赞作品,收藏作品,合辑作品,图文,文案,封面,直播,原声。"), _("✔") + _("douyin 或 dy"), + _( + "- 单个作品,主页作品,点赞作品,收藏作品,合辑作品,图文,文案,封面,直播,原声。" + ), + _("✔"), ) table.add_row( - _("tiktok 或 tk"), _("- 单个作品,主页作品,点赞作品,收藏作品,播放列表(合辑)作品,文案,封面,原声。"), _("✔") + _("tiktok 或 tk"), + _( + "- 单个作品,主页作品,点赞作品,收藏作品,播放列表(合辑)作品,文案,封面,原声。" + ), + _("✔"), ) table.add_row(_("instagram 或 ig"), _("- 获取ig的作品"), _("⏳")) table.add_row(_("twitch 或 tv"), _("- 获取Twitch直播")) @@ -91,7 +99,9 @@ def main() -> None: table.add_row("\n") table.add_row( "f2 -d [magenta] [/magenta][cyan][COMMAND]", - _("- 记录app的debug到/logs下,如遇BUG提交Issue时请附带该文件并[red]删除个人敏感信息[/red]"), + _( + "- 记录app的debug到/logs下,如遇BUG提交Issue时请附带该文件并[red]删除个人敏感信息[/red]" + ), _("⚠"), ) table.add_row( diff --git a/f2/log/logger.py b/f2/log/logger.py index 39f2d9d..6b982f5 100644 --- a/f2/log/logger.py +++ b/f2/log/logger.py @@ -68,7 +68,9 @@ def clean_logs(self, keep_last_n=10): try: log_file.unlink() except PermissionError: - self.logger.warning(f"无法删除日志文件 {log_file}, 它正被另一个进程使用") + self.logger.warning( + f"无法删除日志文件 {log_file}, 它正被另一个进程使用" + ) def shutdown(self): for handler in self.logger.handlers: From 3a1bfed52bc1a8ef0ac52bd57fc2b4aba244dc07 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:55:58 +0800 Subject: [PATCH 035/164] =?UTF-8?q?=E6=98=8E=E7=A1=AE=E4=BA=86tiktok=20`ha?= =?UTF-8?q?ndler`=E7=9A=84=E9=83=A8=E5=88=86=E5=87=BD=E6=95=B0=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/handler.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 8caf343..e8d26bd 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -1,7 +1,7 @@ # path: f2/apps/tiktok/handler.py from pathlib import Path -from typing import AsyncGenerator +from typing import AsyncGenerator, Union, List from f2.i18n.translator import _ from f2.log.logger import logger @@ -147,7 +147,7 @@ async def fetch_play_list( secUid: str, cursor: int, page_counts: int, - ) -> dict: + ) -> Union[dict, UserPlayListFilter]: """ 用于获取指定用户的视频合集列表 (Used to get video mix list of specified user) @@ -158,7 +158,7 @@ async def fetch_play_list( page_counts: int: 分页数量 (Page counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + playlist: Union[dict, UserPlayListFilter]: 视频合集列表 (Video mix list) """ logger.debug(_("开始爬取用户: {0} 的视频合集列表").format(secUid)) @@ -181,16 +181,18 @@ async def fetch_play_list( logger.debug("=====================================") return playlist._to_dict() - async def select_playlist(playlists: dict) -> int: + async def select_playlist( + self, playlists: Union[dict, UserPlayListFilter] + ) -> Union[str, List[str]]: """ 用于选择要下载的视频合辑 (Used to select the video mix to download) Args: - playlists: dict: 视频合辑列表 (Video mix list) + playlists: Union[dict, UserPlayListFilter]: 视频合辑列表 (Video mix list) Return: - selected_index: str: 选择的视频合辑序号 (Selected video mix index) + selected_index: Union[str, List[str]]: 选择的视频合辑序号 (Selected video mix index) """ rich_console.print("[bold]请选择要下载的合辑:[/bold]") From 7ba49205c660635731592bb39559d1e75b03215a Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:57:06 +0800 Subject: [PATCH 036/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 改用中文符号 --- f2/apps/tiktok/handler.py | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index e8d26bd..da62c3c 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -161,7 +161,7 @@ async def fetch_play_list( playlist: Union[dict, UserPlayListFilter]: 视频合集列表 (Video mix list) """ - logger.debug(_("开始爬取用户: {0} 的视频合集列表").format(secUid)) + logger.debug(_("开始爬取用户:{0} 的视频合集列表").format(secUid)) async with TiktokCrawler(self.kwargs) as crawler: params = UserPlayList(secUid=secUid, cursor=cursor, count=page_counts) @@ -172,9 +172,9 @@ async def fetch_play_list( logger.debug(_("用户: {0} 没有视频合集").format(secUid)) return {} - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频合集ID: {0} 视频合集标题: {1}").format( + _("视频合集ID:{0} 视频合集标题:{1}").format( playlist.mixId, playlist.mixName ) ) @@ -235,7 +235,7 @@ async def handler_one_video(self): async with AsyncVideoDB("tiktok_videos.db") as vdb: await self.get_or_add_video_data(aweme_data, vdb) - logger.debug(_("单个视频数据: {0}".format(aweme_data))) + logger.debug(_("单个视频数据:{0}".format(aweme_data))) # 创建下载任务 await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) @@ -252,14 +252,14 @@ async def fetch_one_video(self, itemId: str) -> dict: post: dict: 视频信息 (Video info) """ - logger.debug(_("开始爬取视频: {0}").format(itemId)) + logger.debug(_("开始爬取视频:{0}").format(itemId)) async with TiktokCrawler(self.kwargs) as crawler: params = PostDetail(itemId=itemId) response = await crawler.fetch_post_detail(params) video = PostDetailFilter(response) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -313,7 +313,7 @@ async def fetch_user_post_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 发布的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 发布的视频").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -334,15 +334,15 @@ async def fetch_user_post_videos( if not video.has_aweme: logger.debug(_("{0} 页没有找到作品".format(cursor))) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕".format(secUid))) + logger.debug(_("用户:{0} 所有作品采集完毕".format(secUid))) break else: cursor = video.cursor continue - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -403,14 +403,14 @@ async def fetch_user_like_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 点赞的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 点赞的视频").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) logger.debug("=====================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -422,9 +422,9 @@ async def fetch_user_like_videos( video = UserPostFilter(response) if video.has_aweme: - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -437,14 +437,14 @@ async def fetch_user_like_videos( videos_collected += len(video.aweme_id) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: logger.debug(_("{0} 页没有找到作品").format(cursor)) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break # 更新已经处理的视频数量 (Update the number of videos processed) @@ -500,14 +500,14 @@ async def fetch_user_collect_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 收藏的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 收藏的视频").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) logger.debug("=====================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -519,9 +519,9 @@ async def fetch_user_collect_videos( video = UserPostFilter(response) if video.has_aweme: - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -534,14 +534,14 @@ async def fetch_user_collect_videos( videos_collected += len(video.aweme_id) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: logger.debug(_("{0} 页没有找到作品").format(cursor)) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break # 更新已经处理的视频数量 (Update the number of videos processed) @@ -566,7 +566,7 @@ async def handler_user_mix(self): secUid = await SecUserIdFetcher.get_secuid(self.kwargs.get("url")) playlist = await self.fetch_play_list(secUid, cursor, page_counts) - selected_index = await self.select_playlist(playlist) + mixId = await self.select_playlist(playlist) async with AsyncUserDB("tiktok_users.db") as audb: user_path = await self.get_or_add_user_data(secUid, audb) From 4a6885fb215e51a2bf712b85f1cf267a6c11f364 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:57:51 +0800 Subject: [PATCH 037/164] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BA=86tiktok=20`ha?= =?UTF-8?q?ndler`=E5=A4=84=E7=90=86=E6=92=AD=E6=94=BE=E5=88=97=E8=A1=A8?= =?UTF-8?q?=E7=9A=84=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/handler.py | 53 ++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index da62c3c..7e5bd4e 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -179,7 +179,7 @@ async def fetch_play_list( ) ) logger.debug("=====================================") - return playlist._to_dict() + return playlist async def select_playlist( self, playlists: Union[dict, UserPlayListFilter] @@ -195,23 +195,37 @@ async def select_playlist( selected_index: Union[str, List[str]]: 选择的视频合辑序号 (Selected video mix index) """ - rich_console.print("[bold]请选择要下载的合辑:[/bold]") + if playlists == {}: + sys.exit(_("用户没有视频合辑")) - for i, mix_id in enumerate(playlists.get("mixId", [])): - mix_name = playlists.get("mixName", [""])[i] - video_count = int(playlists.get("videoCount", [""])[i]) + rich_console.print("[bold]请选择要下载的合辑:[/bold]") + rich_console.print("0: [bold]全部下载[/bold]") + + for i in range(len(playlists.mixId)): rich_console.print( - f"[cyan]{i + 1}[/cyan]: {mix_name} ({video_count} videos)" + _( + "{0}: {1} (包含 {2} 个作品,收藏夹ID {3})".format( + i + 1, + playlists.mixName[i], + playlists.videoCount[i], + playlists.mixId[i], + ) + ) ) - rich_console.print(f"[cyan]0[/cyan]: [bold]全部下载[/bold]") - - selected_index = rich_prompt.ask( - "[bold yellow]请输入希望下载的合辑序号:[/bold yellow]", - choices=[str(i) for i in range(len(playlists) + 1)], + # rich_prompt 会有字符刷新问题,暂时使用rich_print + rich_console.print(_("[bold yellow]请输入希望下载的合辑序号:[/bold yellow]")) + selected_index = int( + rich_prompt.ask( + # _("[bold yellow]请输入希望下载的合辑序号:[/bold yellow]"), + choices=[str(i) for i in range(len(playlists) + 1)], + ) ) - return int(selected_index) + if selected_index == 0: + return playlists.mixId + else: + return playlists.mixId[selected_index - 1] @mode_handler("one") async def handler_one_video(self): @@ -571,17 +585,10 @@ async def handler_user_mix(self): async with AsyncUserDB("tiktok_users.db") as audb: user_path = await self.get_or_add_user_data(secUid, audb) - if selected_index == 0: - for mixId in playlist.get("mixId", []): - async for aweme_data_list in self.fetch_user_mix_videos( - mixId, cursor, page_counts, max_counts - ): - # 创建下载任务 - await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path - ) - else: - mixId = playlist.get("mixId", [])[selected_index - 1] + if isinstance(mixId, str): + mixId = [mixId] + + for mixId in playlist.get("mixId", []): async for aweme_data_list in self.fetch_user_mix_videos( mixId, cursor, page_counts, max_counts ): From eac43ec2664641649d3844b43df9e015a82f91fd Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:58:35 +0800 Subject: [PATCH 038/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、修改日志输出 2、添加导入 --- f2/apps/tiktok/handler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 7e5bd4e..3b74762 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -1,5 +1,7 @@ # path: f2/apps/tiktok/handler.py +import sys + from pathlib import Path from typing import AsyncGenerator, Union, List @@ -169,7 +171,7 @@ async def fetch_play_list( playlist = UserPlayListFilter(response) if not playlist.hasPlayList: - logger.debug(_("用户: {0} 没有视频合集").format(secUid)) + logger.info(_("用户:{0} 没有视频合集").format(secUid)) return {} logger.debug(_("当前请求的cursor:{0}").format(cursor)) From 665396b4c951d9e94716884cbbdb196de6a5f427 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 3 Mar 2024 22:59:14 +0800 Subject: [PATCH 039/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0rich=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=88=B013.7.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dc31bfd..b5c7ed3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ dependencies = [ "click==8.1.7", - "rich==13.6.0", + "rich==13.7.1", "httpx==0.25.0", "aiofiles==22.1.0", "aiosqlite==0.19.0", From f4df03d0004a623454474e5b6ed26ab840e04ba3 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 4 Mar 2024 00:11:53 +0800 Subject: [PATCH 040/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、优化了日志输出的翻译合理性 --- f2/apps/douyin/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 83b8634..ca41754 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -73,7 +73,7 @@ def gen_real_msToken(cls) -> str: msToken = str(httpx.Cookies(response.cookies).get("msToken")) if len(msToken) not in [120, 128]: - raise APIResponseError(_("msToken内容不符合要求。")) + raise APIResponseError(_("{0} 内容不符合要求".format("msToken"))) return msToken @@ -90,11 +90,13 @@ def gen_real_msToken(cls) -> str: if e.response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新F2配置文件中的 msToken,以匹配 douyin 新规则" + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( + "msToken", "douyin" + ) ) ) elif e.response.status_code == 404: - raise APINotFoundError(_("msToken无法找到API端点")) + raise APINotFoundError(_("{0} 无法找到API端点".format("msToken"))) else: raise APIResponseError( _( @@ -105,6 +107,7 @@ def gen_real_msToken(cls) -> str: ) except APIError as e: + # 返回虚假的msToken (Return a fake msToken) logger.error(_("msToken API错误:{0}").format(e)) logger.info(_("生成虚假的msToken")) return cls.gen_false_msToken() @@ -145,7 +148,9 @@ def gen_ttwid(cls) -> str: if e.response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新F2配置文件中的 ttwid,以匹配 douyin 新规则" + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( + "ttwid", "douyin" + ) ) ) elif e.response.status_code == 404: From 536a444aa08867bd7e3c742195743b5f79b7ed2e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 4 Mar 2024 00:12:58 +0800 Subject: [PATCH 041/164] =?UTF-8?q?=E4=BC=98=E5=8C=96tiktok=20`utils`?= =?UTF-8?q?=E4=B8=AD=E5=AF=B9=E5=85=B7=E4=BD=93=E8=AF=B7=E6=B1=82=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/utils.py | 172 ++++++++++++++++++++++++++-------------- 1 file changed, 111 insertions(+), 61 deletions(-) diff --git a/f2/apps/tiktok/utils.py b/f2/apps/tiktok/utils.py index f625b73..f7fc080 100644 --- a/f2/apps/tiktok/utils.py +++ b/f2/apps/tiktok/utils.py @@ -67,39 +67,47 @@ def gen_real_msToken(cls) -> str: response = client.post( cls.token_conf["url"], headers=headers, content=payload ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("由于某些错误, 无法获取msToken")) - elif response.status_code == 404: - raise APINotFoundError(_("无法找到API端点")) + response.raise_for_status() msToken = str(httpx.Cookies(response.cookies).get("msToken")) if len(msToken) not in [148]: - raise APIResponseError( - _( - "msToken: 请检查并更新 f2 中 conf.yaml 配置文件中的 msToken,以匹配 tiktok 新规则。" - ) - ) + raise APIResponseError(_("{0} 内容不符合要求".format("msToken"))) return msToken - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.token_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.token_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( + "msToken", "tiktok" + ) + ) + ) + elif response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点".format("msToken"))) + else: + raise APIResponseError( + _( + "链接:{0},状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) + ) + ) except APIError as e: # 返回虚假的msToken (Return a fake msToken) + logger.error(_("msToken API错误:{0}").format(e)) logger.info(_("生成虚假的msToken")) return cls.gen_false_msToken() @@ -118,17 +126,13 @@ def gen_ttwid(cls) -> str: try: response = client.post( cls.ttwid_conf["url"], + content=cls.ttwid_conf["data"], headers={ "Cookie": cls.ttwid_conf.get("cookie"), "Content-Type": "text/plain", }, - content=cls.ttwid_conf["data"], ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("401 由于某些错误, 无法获取ttwid")) - elif response.status_code == 404: - raise APINotFoundError(_("404 无法找到API端点")) + response.raise_for_status() ttwid = httpx.Cookies(response.cookies).get("ttwid") @@ -139,19 +143,34 @@ def gen_ttwid(cls) -> str: return ttwid - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( + "ttwid", "tiktok" + ) + ) + ) + elif response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点".format("ttwid"))) + else: + raise APIResponseError( + _( + "链接:{0},状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) + ) + ) @classmethod def gen_odin_tt(cls): @@ -162,34 +181,43 @@ def gen_odin_tt(cls): with httpx.Client(transport=transport, proxies=cls.proxies) as client: try: response = client.get(cls.odin_tt_conf["url"]) - - if response.status_code == 401: - raise APIUnauthorizedError(_("401 由于某些错误, 无法获取ttwid")) - elif response.status_code == 404: - raise APINotFoundError(_("404 无法找到API端点")) + response.raise_for_status() odin_tt = httpx.Cookies(response.cookies).get("odin_tt") if odin_tt is None: - raise APIResponseError( - _("odin_tt: 检查没有通过, 请更新配置文件中的odin_tt") - ) + raise APIResponseError(_("{0} 内容不符合要求".format("odin_tt"))) return odin_tt - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.odin_tt_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.odin_tt_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( + "odin_tt", "tiktok" + ) + ) + ) + elif response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点".format("odin_tt"))) + else: + raise APIResponseError( + _( + "链接:{0},状态码 {1}:{2} ".format( + e.response.url, e.response.status_code, e.response.text + ) + ) + ) class XBogusManager: @@ -224,6 +252,7 @@ def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: class SecUserIdFetcher: + # 预编译正则表达式 _TIKTOK_SECUID_PARREN = re.compile( r"" ) @@ -258,7 +287,7 @@ async def get_secuid(cls, url: str) -> str: ) as client: try: response = await client.get(url, follow_redirects=True) - + # 444一般为Nginx拦截,不返回状态 (444 is generally intercepted by Nginx and does not return status) if response.status_code in {200, 444}: if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): raise APINotFoundError( @@ -272,8 +301,8 @@ async def get_secuid(cls, url: str) -> str: if not match: raise APIResponseError( _( - "未在响应的地址中找到sec_uid, 检查链接是否为用户主页类名: {0}".format( - cls.__name__ + "未在响应中找到 {0},检查链接是否为用户主页。类名: {1}".format( + "sec_uid", cls.__name__ ) ) ) @@ -286,17 +315,20 @@ async def get_secuid(cls, url: str) -> str: sec_uid = user_info.get("secUid") if sec_uid is None: - raise RuntimeError(_("获取sec_uid失败, {0}".format(user_info))) + raise RuntimeError( + _("获取 {0} 失败,{1}".format(sec_uid, user_info)) + ) return sec_uid else: raise ConnectionError(_("接口状态码异常, 请检查重试")) - except httpx.RequestError: + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(url, TokenManager.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) @classmethod @@ -357,15 +389,25 @@ async def get_uniqueid(cls, url: str) -> str: response = await client.get(url, follow_redirects=True) if response.status_code in {200, 444}: + if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): + raise APINotFoundError( + _( + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}".format( + cls.__name__ + ) + ) + ) match = cls._TIKTOK_UNIQUEID_PARREN.search(str(response.url)) if not match: - raise APIResponseError(_("未在响应中找到unique_id")) + raise APIResponseError( + _("未在响应中找到 {0}".format("unique_id")) + ) unique_id = match.group(1) if unique_id is None: raise RuntimeError( - _("获取unique_id失败, {0}".format(response.url)) + _("获取 {0} 失败,{1}".format("unique_id", response.url)) ) return unique_id @@ -416,6 +458,7 @@ class AwemeIdFetcher: # 预编译正则表达式 _TIKTOK_AWEMEID_PARREN = re.compile(r"video/(\d*)") + _TIKTOK_NOTFOUND_PARREN = re.compile(r"notfound") @classmethod async def get_aweme_id(cls, url: str) -> str: @@ -447,32 +490,39 @@ async def get_aweme_id(cls, url: str) -> str: response = await client.get(url, follow_redirects=True) if response.status_code in {200, 444}: + if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): + raise APINotFoundError( + _( + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}".format( + cls.__name__ + ) + ) + ) match = cls._TIKTOK_AWEMEID_PARREN.search(str(response.url)) if not match: - raise APIResponseError(_("未在响应中找到aweme_id")) + raise APIResponseError( + _("未在响应中找到 {0}".format("aweme_id")) + ) aweme_id = match.group(1) if aweme_id is None: raise RuntimeError( - _("获取aweme_id失败, {0}".format(response.url)) + _("获取 {0} 失败,{1}".format("aweme_id", response.url)) ) return aweme_id else: raise ConnectionError( - _("接口状态码异常 {0}, 请检查重试").format(response.status_code) + _("接口状态码异常 {0},请检查重试").format(response.status_code) ) - except httpx.RequestError: + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format( - url, - TokenManager.proxies, - cls.__name__, - ) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) @classmethod From eb98b0d19c9879806d0857e2cacd50becc381b4d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:43:39 +0800 Subject: [PATCH 042/164] =?UTF-8?q?=E4=B8=BA=20douyin=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E7=94=A8=E6=88=B7=E9=9F=B3=E4=B9=90=E6=94=B6=E8=97=8F?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/f2/apps/douyin/api.py b/f2/apps/douyin/api.py index fff0cda..7a6d596 100644 --- a/f2/apps/douyin/api.py +++ b/f2/apps/douyin/api.py @@ -69,6 +69,9 @@ class DouyinAPIEndpoints: # 用户收藏夹作品 (User Collects Posts) USER_COLLECTS_VIDEO = f"{DOUYIN_DOMAIN}/aweme/v1/web/collects/video/list/" + # 用户音乐收藏 (User Music Collection) + USER_MUSIC_COLLECTION = f"{DOUYIN_DOMAIN}/aweme/v1/web/music/listcollection/" + # 首页朋友作品 (Friend Feed) FRIEND_FEED = f"{DOUYIN_DOMAIN}/aweme/v1/web/familiar/feed/" From 06691e5a60b03c85d0ec0ed974d35e0d4730660b Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:45:17 +0800 Subject: [PATCH 043/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`cli`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E6=94=B6=E8=97=8F=E9=9F=B3=E4=B9=90=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index e0afd3c..902d72c 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -302,11 +302,13 @@ def merge_config(main_conf, custom_conf, **kwargs): @click.option( "--mode", "-M", - type=click.Choice(["one", "post", "like", "collection", "collects", "mix", "live"]), + type=click.Choice( + ["one", "post", "like", "collection", "collects", "music", "mix", "live"] + ), # default="post", # required=True, help=_( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),收藏夹作品(collects),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),收藏夹作品(collects),收藏音乐(music),合辑(mix),直播(live)" ), ) @click.option( From 33e1f692de38f6a8b90df716bbf28818e7f8b8ca Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:45:45 +0800 Subject: [PATCH 044/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`crawler`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E6=94=B6=E8=97=8F=E9=9F=B3=E4=B9=90=E7=88=AC?= =?UTF-8?q?=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index a2fccbb..d527c03 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -14,6 +14,7 @@ UserCollection, UserCollects, UserCollectsVideo, + UserMusicCollection, PostDetail, UserMix, UserLive, @@ -85,6 +86,13 @@ async def fetch_user_collects_video(self, params: UserCollectsVideo): logger.debug(_("收藏夹作品接口地址:" + endpoint)) return await self._fetch_get_json(endpoint) + async def fetch_user_music_collection(self, params: UserMusicCollection): + endpoint = XBogusManager.model_2_endpoint( + dyendpoint.USER_MUSIC_COLLECTION, params.dict() + ) + logger.debug(_("音乐收藏接口地址:" + endpoint)) + return await self._fetch_get_json(endpoint) + async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( dyendpoint.MIX_AWEME, params.dict() From 1868b719b7f4824f31f652d38767d8c6a50b84bc Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:46:21 +0800 Subject: [PATCH 045/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`filter`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E9=9F=B3?= =?UTF-8?q?=E4=B9=90=E8=BF=87=E6=BB=A4=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 134 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 13024dd..3a3078f 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -443,6 +443,140 @@ def _to_dict(self) -> dict: } +class UserMusicCollectionFilter(JSONModel): + + @property + def max_cursor(self): + return self._get_attr_value("$.cursor") + + @property + def has_more(self): + return self._get_attr_value("$.has_more") + + @property + def status_code(self): + return self._get_attr_value("$.status_code") + + @property + def msg(self): + return self._get_attr_value("$.msg") + + @property + def album(self): + return self._get_list_attr_value("$.mc_list[*].album") + + @property + def audition_duration(self): + return self._get_list_attr_value("$.mc_list[*].audition_duration") + + @property + def duration(self): + return self._get_list_attr_value("$.mc_list[*].duration") + + @property + def author(self): + return replaceT(self._get_list_attr_value("$.mc_list[*].author")) + + @property + def collect_status(self): + return self._get_list_attr_value("$.mc_list[*].collect_stat") + + @property + def music_status(self): + return self._get_list_attr_value("$.mc_list[*].music_status") + + @property + def cover_hd(self): + return self._get_list_attr_value("$.mc_list[*].cover_hd.url_list[0]") + + @property + def music_id(self): + return self._get_list_attr_value("$.mc_list[*].id") + + @property + def mid(self): + return self._get_list_attr_value("$.mc_list[*].mid") + + @property + def is_commerce_music(self): + return self._get_list_attr_value("$.mc_list[*].is_commerce_music") + + @property + def is_original(self): + return self._get_list_attr_value("$.mc_list[*].is_original") + + @property + def is_original_sound(self): + return self._get_list_attr_value("$.mc_list[*].is_original_sound") + + @property + def lyric_type(self): + return self._get_list_attr_value("$.mc_list[*].lyric_type") + + @property + def lyric_url(self): + return self._get_list_attr_value("$.mc_list[*].lyric_url") + + @property + def play_url(self): + return self._get_list_attr_value("$.mc_list[*].play_url.url_list[0]") + + @property + def title(self): + return replaceT(self._get_list_attr_value("$.mc_list[*].title")) + + @property + def strong_beat_url(self): + return self._get_list_attr_value("$.mc_list[*].strong_beat_url.url_list[0]") + + @property + def owner_nickname(self): + return replaceT(self._get_list_attr_value("$.mc_list[*].owner_nickname")) + + @property + def owner_id(self): + return self._get_list_attr_value("$.mc_list[*].owner_id") + + @property + def sec_uid(self): + return self._get_list_attr_value("$.mc_list[*].sec_uid") + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + def _to_list(self): + exclude_list = ["has_more", "max_cursor", "status_code", "msg"] + + keys = [ + prop_name + for prop_name in dir(self) + if not prop_name.startswith("__") + and not prop_name.startswith("_") + and prop_name not in exclude_list + ] + + aweme_entries = self._get_attr_value("$.mc_list") or [] + + list_dicts = [] + for entry in aweme_entries: + d = { + "has_more": self.has_more, + "max_cursor": self.max_cursor, + "status_code": self.status_code, + "msg": self.msg, + } + for key in keys: + attr_values = getattr(self, key) + index = aweme_entries.index(entry) + d[key] = attr_values[index] if index < len(attr_values) else None + list_dicts.append(d) + return list_dicts + + class UserMixFilter(UserPostFilter): def __init__(self, data): super().__init__(data) From 1993c5a94a150e9936b6c96bdc486e06ceac3226 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:48:47 +0800 Subject: [PATCH 046/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`handler`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E9=9F=B3?= =?UTF-8?q?=E4=B9=90=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 6cc2ba7..5d7a210 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -412,6 +412,36 @@ async def fetch_user_like_videos( logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + @mode_handler("music") + async def handle_user_music_collection(self): + """ + 用于处理用户收藏的音乐 (Used to process music collected by users) + + Args: + kwargs: dict: 参数字典 (Parameter dictionary) + """ + + max_cursor = self.kwargs.get("max_cursor", 0) + page_counts = self.kwargs.get("page_counts", 20) + max_counts = self.kwargs.get("max_counts") + + # Web端音乐收藏作品的接口只能通过登录的cookie获取,与配置的URL无关。 + # 因此,即使填写了其他人的URL,也只能获取到你自己的音乐收藏作品。 + # 此外,音乐收藏作品的文件夹将根据所配置的URL主页用户名来确定。 + # 为避免将文件下载到其他人的文件夹下,请务必确保填写的URL是你自己的主页URL。 + sec_user_id = await SecUserIdFetcher.get_sec_user_id(self.kwargs.get("url")) + + async with AsyncUserDB("douyin_users.db") as db: + user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) + + async for aweme_data_list in self.fetch_user_music_collection( + max_cursor, page_counts, max_counts + ): + # 创建下载任务 + await self.downloader.create_music_download_tasks( + self.kwargs, aweme_data_list, user_path + ) + @mode_handler("collection") async def handle_user_collection(self): """ From 6328e024d3908ab5cef1d8762c263acd51e6542c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:49:22 +0800 Subject: [PATCH 047/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`handler`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E9=9F=B3?= =?UTF-8?q?=E4=B9=90=E6=95=B0=E6=8D=AE=E9=87=87=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 5d7a210..7d97786 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -18,6 +18,7 @@ UserCollection, UserCollects, UserCollectsVideo, + UserMusicCollection, UserMix, PostDetail, UserLive, @@ -30,6 +31,7 @@ UserProfileFilter, UserCollectionFilter, UserCollectsFilter, + UserMusicCollectionFilter, UserMixFilter, PostDetailFilter, UserLiveFilter, @@ -442,6 +444,62 @@ async def handle_user_music_collection(self): self.kwargs, aweme_data_list, user_path ) + async def fetch_user_music_collection( + self, max_cursor: int, page_counts: int, max_counts: int + ) -> AsyncGenerator[List[Dict[str, Any]], Any]: + """ + 用于获取指定用户收藏的音乐作品列表。 + + Args: + max_cursor: int: 起始页 + page_counts: int: 每页视频数 + max_counts: int: 最大视频数 + + Return: + aweme_data: AsyncGenerator[List[Dict[str, Any]], None]: 音乐作品数据 + """ + + max_counts = max_counts or float("inf") + music_collected = 0 + + logger.debug(_("开始爬取用户收藏的音乐作品")) + + while music_collected < max_counts: + current_request_size = min(page_counts, max_counts - music_collected) + + logger.debug("=====================================") + logger.debug( + _("最大数量: {0} 每次请求数量: {1}").format( + max_counts, current_request_size + ) + ) + logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserMusicCollection( + cursor=max_cursor, count=current_request_size + ) + response = await crawler.fetch_user_music_collection(params) + music = UserMusicCollectionFilter(response) + + logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug( + _("音乐ID: {0} 音乐标题: {1} 作者: {2}").format( + music.music_id, music.title, music.author + ) + ) + logger.debug("=====================================") + + yield music._to_list() + + if not music.has_more: + logger.debug(_("用户收藏的音乐作品采集完毕")) + break + + # 更新已经处理的音乐数量 (Update the number of music processed) + music_collected += len(music.music_id) + max_cursor = music.max_cursor + @mode_handler("collection") async def handle_user_collection(self): """ From 516f64e17fee45e1ad26722ff4b138c4b57da29d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:50:02 +0800 Subject: [PATCH 048/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`model`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E9=9F=B3?= =?UTF-8?q?=E4=B9=90=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index 7e0814a..9a7c78d 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -111,6 +111,12 @@ class UserCollectsVideo(BaseRequestModel): collects_id: str +class UserMusicCollection(BaseRequestModel): + # GET + cursor: int + count: int + + class UserMix(BaseRequestModel): cursor: int count: int From 8f1acbf37231adb7126a6b2455d56cb349a0bcb8 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:54:44 +0800 Subject: [PATCH 049/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0douyin=20`utils`=20js?= =?UTF-8?q?on=E8=BD=AClrc=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 抖音json格式歌词生成lrc格式歌词 --- f2/apps/douyin/utils.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index ca41754..8d63af6 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -718,3 +718,33 @@ def show_qrcode(qrcode_url: str, show_image: bool = False) -> None: qr.make(fit=True) # 在控制台以 ASCII 形式打印二维码 qr.print_ascii(invert=True) + + +def json_2_lrc(data: Union[str, list, dict]) -> str: + """ + 从抖音原声json格式歌词生成lrc格式歌词 + (Generate lrc lyrics format from Douyin original json lyrics format) + + Args: + data (Union[str, list, dict]): 抖音原声json格式歌词 (Douyin original json lyrics format) + + Returns: + str: 生成的lrc格式歌词 (Generated lrc format lyrics) + """ + try: + lrc_lines = [] + for item in data: + text = item["text"] + time_seconds = float(item["timeId"]) + minutes = int(time_seconds // 60) + seconds = int(time_seconds % 60) + milliseconds = int((time_seconds % 1) * 1000) + time_str = f"{minutes:02}:{seconds:02}.{milliseconds:03}" + lrc_lines.append(f"[{time_str}] {text}") + except KeyError as e: + raise KeyError(_("歌词数据字段错误:{0}").format(e)) + except RuntimeError as e: + raise RuntimeError(_("生成歌词文件失败:{0},请检查歌词 `data` 内容").format(e)) + except TypeError as e: + raise TypeError(_("歌词数据类型错误:{0}").format(e)) + return "\n".join(lrc_lines) From 03fe2319bc2453844e5e2d459c06ba1146fd1dd2 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:55:59 +0800 Subject: [PATCH 050/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`dl`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E9=9F=B3?= =?UTF-8?q?=E4=B9=90=E4=B8=8B=E8=BD=BD=E4=BB=BB=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/dl.py | 75 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index 1eb6c00..ca077a7 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -9,7 +9,7 @@ from f2.dl.base_downloader import BaseDownloader from f2.utils.utils import get_timestamp, timestamp_2_str from f2.apps.douyin.db import AsyncUserDB -from f2.apps.douyin.utils import format_file_name +from f2.apps.douyin.utils import format_file_name, json_2_lrc class DouyinDownloader(BaseDownloader): @@ -271,6 +271,79 @@ async def handler_download( # 保存最后一个aweme_id await self.save_last_aweme_id(sec_user_id, aweme_id) + async def create_music_download_tasks( + self, kwargs: dict, music_datas: Union[list, dict], user_path: Any + ) -> None: + """ + 创建音乐下载任务 + + Args: + kwargs (dict): 命令行参数 + music_datas (list, dict): 音乐数据列表或字典 + user_path (Any): 用户目录路径 + """ + + if ( + not kwargs + or not music_datas + or not isinstance(music_datas, (list, dict)) + or not user_path + ): + return + + if isinstance(music_datas, dict): + await self.handler_music_download(kwargs, music_datas, user_path) + else: + for music_data in music_datas: + await self.handler_music_download(kwargs, music_data, user_path) + + # 执行下载任务 + await self.execute_tasks() + + async def handler_music_download( + self, kwargs: dict, music_data_dict: dict, user_path: Any + ) -> None: + """ + 处理音乐下载任务 + + Args: + kwargs (dict): 命令行参数 + music_data_dict (dict): 音乐数据字典 + user_path (Any): 用户目录路径 + """ + + # 构建文件夹路径 + base_path = ( + user_path / music_data_dict.get("title") + if kwargs.get("folderize") + else user_path + ) + music_name = music_data_dict.get("title") + "_music" + music_url = music_data_dict.get("play_url") + lyric_name = music_data_dict.get("title") + "_lyric" + lyric_url = music_data_dict.get("lyric_url") + + if music_url != None: + await self.initiate_download( + _("音乐"), music_url, base_path, music_name, ".mp3" + ) + + if kwargs.get("lyric"): + if lyric_url is None: + return + + # 下载str格式的json歌词文件 + lyric = await self.get_fetch_data(lyric_url) + + # 如果json歌词文件下载成功,则读取并处理成lrc格式 + if lyric.status_code != 200: + return + + lrc_content = json_2_lrc(lyric.json()) + await self.initiate_static_download( + _("歌词"), lrc_content, base_path, lyric_name, ".lrc" + ) + async def create_stream_tasks( self, kwargs: dict, webcast_datas: Union[list, dict], user_path: Any ) -> None: From cf69fe55338d592238454fd7fbd8ce9f25ddcfa3 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:58:26 +0800 Subject: [PATCH 051/164] =?UTF-8?q?=E6=9B=B4=E6=AD=A3=E5=8F=97collects=5Fi?= =?UTF-8?q?d=E7=B1=BB=E5=9E=8B=E5=AF=BC=E8=87=B4=E7=9A=84=E5=A4=9A?= =?UTF-8?q?=E6=AC=A1=E8=BD=AC=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 只在计算下标时转换为int --- f2/apps/douyin/handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 7d97786..f51b7c1 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -629,7 +629,7 @@ async def handle_user_collects(self): tmp_user_path = ( tmp_user_path / collects.collects_name[ - collects.collects_id.index(collects_id) + collects.collects_id.index(int(collects_id)) ] ) else: @@ -681,7 +681,7 @@ async def select_user_collects( if selected_index == 0: return collects.collects_id else: - return collects.collects_id[selected_index - 1] + return str(collects.collects_id[selected_index - 1]) async def fetch_user_collects( self, max_cursor: int, page_counts: int, max_counts: int From dc9246da2e18f30cc5ec6f8a15fae9480c9c308f Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 22:59:01 +0800 Subject: [PATCH 052/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/dl.py | 3 +-- f2/apps/douyin/handler.py | 10 +++++----- f2/apps/tiktok/dl.py | 11 +++++++---- f2/apps/tiktok/handler.py | 8 ++++---- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index ca077a7..9d00219 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -17,8 +17,7 @@ def __init__(self, kwargs: dict = {}): if kwargs["cookie"] is None: raise ValueError( _( - "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 f2 -d dy --help,如扫码登录请保留双引号cookie: " - ",再使用--sso-login命令。" + "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 `--auto-cookie edge`" ) ) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index f51b7c1..4a5eb4d 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -307,7 +307,7 @@ async def fetch_user_post_videos( videos_collected += len(video.aweme_id) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("like") async def handle_user_like(self): @@ -412,7 +412,7 @@ async def fetch_user_like_videos( videos_collected += len(aweme_data_list) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("music") async def handle_user_music_collection(self): @@ -815,7 +815,7 @@ async def fetch_user_collects_videos( break max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("mix") async def handle_user_mix(self): @@ -912,7 +912,7 @@ async def fetch_user_mix_videos( logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) break - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("live") async def handle_user_live(self): @@ -1114,7 +1114,7 @@ async def fetch_user_feed_videos( videos_collected += len(video.aweme_id) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) async def handle_sso_login(): diff --git a/f2/apps/tiktok/dl.py b/f2/apps/tiktok/dl.py index 3b7fe3e..bc0d298 100644 --- a/f2/apps/tiktok/dl.py +++ b/f2/apps/tiktok/dl.py @@ -17,8 +17,7 @@ def __init__(self, kwargs: dict = {}): if kwargs["cookie"] is None: raise ValueError( _( - "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 f2 -d dy --help,如扫码登录请保留双引号cookie: " - ",再使用--sso-login命令。" + "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 `--auto-cookie edge`" ) ) @@ -82,7 +81,9 @@ async def filter_aweme_datas_by_interval( ) return aweme_datas else: - logger.warning(_("作品发布时间不在指定区间内:{0}").format(aweme_date_str)) + logger.warning( + _("作品发布时间不在指定区间内:{0}").format(aweme_date_str) + ) return None elif isinstance(aweme_datas, list): @@ -159,7 +160,9 @@ async def handler_download( ) secUid = str(aweme_data_dict.get("secUid")) # 用户ID - aweme_privateItem = aweme_data_dict.get("privateItem") # 作品权限 false公开, true私密 + aweme_privateItem = aweme_data_dict.get( + "privateItem" + ) # 作品权限 false公开, true私密 aweme_secret = aweme_data_dict.get("secret") # 作品权限 false公开, true私密 aweme_id = str(aweme_data_dict.get("aweme_id")) # 视频ID diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 3b74762..c8bdbb9 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -370,7 +370,7 @@ async def fetch_user_post_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("like") async def handler_user_like(self): @@ -467,7 +467,7 @@ async def fetch_user_like_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("collect") async def handler_user_collect(self): @@ -564,7 +564,7 @@ async def fetch_user_collect_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) @mode_handler("mix") async def handler_user_mix(self): @@ -667,7 +667,7 @@ async def fetch_user_mix_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) async def main(kwargs): From be86ff737593c7638d7a8526e5e6a3b5394a8894 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:00:10 +0800 Subject: [PATCH 053/164] =?UTF-8?q?=E6=9B=B4=E6=AD=A3tiktok=20`handler`=20?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E7=94=A8=E6=88=B7=E4=BF=A1=E6=81=AF=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E7=9A=84=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在仅提供uniqueId的时候secUid为空 --- f2/apps/tiktok/handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index c8bdbb9..8262ee4 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -48,7 +48,7 @@ def __init__(self, kwargs) -> None: self.downloader = TiktokDownloader(kwargs) async def handler_user_profile( - self, secUid: str, uniqueId: str = "" + self, secUid: str = "", uniqueId: str = "" ) -> UserProfileFilter: """ 用于获取指定用户的个人信息 @@ -56,6 +56,7 @@ async def handler_user_profile( Args: secUid: str: 用户ID (User ID) + uniqueId: str: 用户唯一ID (User unique ID) Return: user: UserProfileFilter: 用户信息过滤器 (User info filter) From ca37dc7f763a34be1fd59e3e97711007ef09a996 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:01:00 +0800 Subject: [PATCH 054/164] =?UTF-8?q?=E6=B7=BB=E5=8A=A0`base=5Fdownloader`?= =?UTF-8?q?=E5=AF=B9=E9=87=8D=E5=91=BD=E5=90=8D=E6=96=87=E4=BB=B6=E6=97=B6?= =?UTF-8?q?=E7=9A=84=E5=BC=82=E5=B8=B8=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/dl/base_downloader.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/f2/dl/base_downloader.py b/f2/dl/base_downloader.py index 9483aa2..734ac4a 100644 --- a/f2/dl/base_downloader.py +++ b/f2/dl/base_downloader.py @@ -147,7 +147,28 @@ async def download_file( ) # 下载完成后重命名文件 (Rename file after download is complete) - tmp_path.rename(full_path) + try: + tmp_path.rename(full_path) + except FileExistsError: + logger.warning(_("{0} 已存在,将覆盖".format(full_path))) + tmp_path.replace(full_path) + except PermissionError: + logger.error( + _("另一个程序正在使用此文件或受异步调度影响,该任务需要重新下载") + ) + # 尝试删除临时文件 (Try to delete the temporary file) + try: + tmp_path.unlink() + tmp_path.rename(full_path) + except Exception as e: + logger.error(_("尝试删除临时文件失败: {0}".format(e))) + + await self.progress.update( + task_id, + description=_("[ 失败 ]:"), + filename=trim_filename(full_path.name, 45), + state="completed", + ) await self.progress.update( task_id, From af4f75eda02948753ac3a293da65e2425e186b07 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:02:28 +0800 Subject: [PATCH 055/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0`=5Fdl`=20head?= =?UTF-8?q?=E8=8E=B7=E5=8F=96`Content-Length`=E5=A4=B1=E6=95=88=E6=97=B6?= =?UTF-8?q?=E7=94=A8get=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/utils/_dl.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/f2/utils/_dl.py b/f2/utils/_dl.py index 0283888..22257f0 100644 --- a/f2/utils/_dl.py +++ b/f2/utils/_dl.py @@ -25,7 +25,17 @@ async def get_content_length(url: str, headers: dict = {}, proxies: dict = {}) - ) as client: try: response = await client.head(url, headers=headers, follow_redirects=True) + # 当head请求被禁止时,释放status异常被捕获 (When head requests are forbidden, release status exceptions are caught) response.raise_for_status() + + if ( + response.headers.get("Content-Length") != None + and int(response.headers.get("Content-Length")) == 0 + ): + # 如果head请求无法获取Content-Length, 则使用GET请求再次尝试获取 + response = await client.get(url, headers=headers, follow_redirects=True) + response.raise_for_status() + except httpx.ConnectTimeout: # 连接超时错误处理 (Handling connection timeout errors) logger.error(_("连接超时错误: {0}".format(url))) From b97cbda09a378cbd0cbdf20a4234d3d524d1e5ab Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:03:52 +0800 Subject: [PATCH 056/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0douyin=20=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复fetch_user_post_videos去除默认值后调用时的参数 --- docs/snippets/douyin/user-post.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/snippets/douyin/user-post.py b/docs/snippets/douyin/user-post.py index 1414141..c83ba07 100644 --- a/docs/snippets/douyin/user-post.py +++ b/docs/snippets/douyin/user-post.py @@ -16,7 +16,7 @@ async def main(): results = [ aweme_data_list async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( - user_sec_id + user_sec_id, 0, 10, 20 ) ] print(results) From 534ead0d723a13b27927186347d8c6ddd7482fc9 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:06:29 +0800 Subject: [PATCH 057/164] =?UTF-8?q?=E4=B8=BA=20douyin=20=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E4=B8=8E=E9=AB=98=E9=A2=91=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E6=AD=8C=E8=AF=8D=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/conf/app.yaml | 1 + f2/conf/defaults.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/f2/conf/app.yaml b/f2/conf/app.yaml index 941dca4..fae14ec 100644 --- a/f2/conf/app.yaml +++ b/f2/conf/app.yaml @@ -5,6 +5,7 @@ douyin: path: Download timeout: 10 max_retries: 5 + lyric: yes max_connections: 5 max_counts: 0 max_tasks: 10 diff --git a/f2/conf/defaults.yaml b/f2/conf/defaults.yaml index 106b169..2f7c0c1 100644 --- a/f2/conf/defaults.yaml +++ b/f2/conf/defaults.yaml @@ -1,6 +1,7 @@ douyin: url: music: + lyric: cover: desc: path: From 360e307b9b19c4c03569e22202ceb3fafcfcbff3 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:12:24 +0800 Subject: [PATCH 058/164] =?UTF-8?q?=E4=B8=BA=20douyin=20`cli`=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86"--lyric"=E9=80=89=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 902d72c..6dfb186 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -392,6 +392,7 @@ def merge_config(main_conf, custom_conf, **kwargs): "代理服务器,最多 2 个参数,http与https。空格区分 2 个参数 http://x.x.x.x https://x.x.x.x" ), ) +@click.option("--lyric", "-L", type=bool, help=_("是否保存原声歌词。可选:'yes'、'no'")) @click.option( "--update-config", type=bool, From a5ecd83b4b0232956087f243cdce118b99ba7848 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:18:07 +0800 Subject: [PATCH 059/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新 douyin 帮助内容 --- f2/apps/douyin/cli.py | 10 +++++----- f2/apps/douyin/help.py | 11 ++++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 6dfb186..1ce8c3f 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -269,21 +269,21 @@ def merge_config(main_conf, custom_conf, **kwargs): "-m", type=bool, # default="yes", - help=_("是否保存视频原声。可选:'yes'、'no'"), + help=_("是否保存视频原声"), ) @click.option( "--cover", "-v", type=bool, # default="yes", - help=_("是否保存视频封面。可选:'yes'、'no'"), + help=_("是否保存视频封面"), ) @click.option( "--desc", "-d", type=bool, # default="yes", - help=_("是否保存视频文案。可选:'yes'、'no'"), + help=_("是否保存视频文案。可选"), ) @click.option( "--path", @@ -297,7 +297,7 @@ def merge_config(main_conf, custom_conf, **kwargs): "-f", type=bool, # default="yes", - help=_("是否将作品保存到单独的文件夹。可选:'yes'、'no'"), + help=_("是否将作品保存到单独的文件夹"), ) @click.option( "--mode", @@ -392,7 +392,7 @@ def merge_config(main_conf, custom_conf, **kwargs): "代理服务器,最多 2 个参数,http与https。空格区分 2 个参数 http://x.x.x.x https://x.x.x.x" ), ) -@click.option("--lyric", "-L", type=bool, help=_("是否保存原声歌词。可选:'yes'、'no'")) +@click.option("--lyric", "-L", type=bool, help=_("是否保存原声歌词")) @click.option( "--update-config", type=bool, diff --git a/f2/apps/douyin/help.py b/f2/apps/douyin/help.py index 8d1b3b2..ae18050 100644 --- a/f2/apps/douyin/help.py +++ b/f2/apps/douyin/help.py @@ -24,14 +24,14 @@ def help() -> None: "根据模式提供相应的链接。例如:主页、点赞、收藏作品填入主页链接,单作品填入作品链接,合辑与直播同上" ), ), - ("-m --music", "[dark_cyan]Choice", _("是否保存视频原声。可选:'yes'、'no'")), - ("-v --cover", "[dark_cyan]Choice", _("是否保存视频封面。可选:'yes'、'no'")), - ("-d --desc", "[dark_cyan]Choice", _("是否保存视频文案。可选:'yes'、'no'")), + ("-m --music", "[dark_cyan]Bool", _("是否保存视频原声")), + ("-v --cover", "[dark_cyan]Bool", _("是否保存视频封面")), + ("-d --desc", "[dark_cyan]Bool", _("是否保存视频文案")), ("-p --path", "[dark_cyan]str", _("作品保存位置,支持绝对与相对路径。")), ( "-f --folderize", - "[dark_cyan]Choice", - _("是否将作品保存到单独的文件夹。可选:'yes'、'no'"), + "[dark_cyan]Bool", + _("是否将作品保存到单独的文件夹"), ), ( "-M --mode", @@ -81,6 +81,7 @@ def help() -> None: "代理服务器,最多 2 个参数,http与https。空格区分 2 个参数 http://x.x.x.x https://x.x.x.x" ), ), + ("-L --lyric", "[dark_cyan]Bool", _("是否保存视频歌词")), ( "--update-config", "[dark_cyan]Flag", From 589167b48995f032ef0448d6c2fbe56c81743a24 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:21:04 +0800 Subject: [PATCH 060/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除了多余的。 --- f2/apps/douyin/cli.py | 16 ++++++++-------- f2/apps/douyin/help.py | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 1ce8c3f..0d2dfba 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -283,14 +283,14 @@ def merge_config(main_conf, custom_conf, **kwargs): "-d", type=bool, # default="yes", - help=_("是否保存视频文案。可选"), + help=_("是否保存视频文案"), ) @click.option( "--path", "-p", type=str, # default="Download", - help=_("作品保存位置,支持绝对与相对路径。"), + help=_("作品保存位置,支持绝对与相对路径"), ) @click.option( "--folderize", @@ -338,28 +338,28 @@ def merge_config(main_conf, custom_conf, **kwargs): "-e", type=int, # default=10, - help=_("网络请求超时时间。"), + help=_("网络请求超时时间"), ) @click.option( "--max_retries", "-r", type=int, # default=5, - help=_("网络请求超时重试数。"), + help=_("网络请求超时重试数"), ) @click.option( "--max-connections", "-x", type=int, # default=5, - help=_("网络请求并发连接数。"), + help=_("网络请求并发连接数"), ) @click.option( "--max-tasks", "-t", type=int, # default=10, - help=_("异步的任务数。"), + help=_("异步的任务数"), ) @click.option( "--max-counts", @@ -373,14 +373,14 @@ def merge_config(main_conf, custom_conf, **kwargs): "-s", type=int, # default=20, - help=_("从接口每页可获取作品数,不建议超过20。"), + help=_("从接口每页可获取作品数,不建议超过20"), ) @click.option( "--languages", "-l", type=click.Choice(["zh_CN", "en_US"]), default="zh_CN", - help=_("显示语言。默认为 'zh_CN'。可选:'zh_CN'、'en_US'。不支持配置文件修改。"), + help=_("显示语言。默认为 'zh_CN',可选:'zh_CN'、'en_US',不支持配置文件修改"), callback=handler_language, ) @click.option( diff --git a/f2/apps/douyin/help.py b/f2/apps/douyin/help.py index ae18050..e367819 100644 --- a/f2/apps/douyin/help.py +++ b/f2/apps/douyin/help.py @@ -59,15 +59,15 @@ def help() -> None: "下载日期区间发布的作品,格式:2022-01-01|2023-01-01,'all' 为下载所有作品" ), ), - ("-e --timeout", "[dark_cyan]int", _("网络请求超时时间。")), - ("-r --max-retries", "[dark_cyan]int", _("网络请求超时重试数。")), - ("-x --max-connections", "[dark_cyan]int", _("网络请求并发连接数。")), - ("-t --max-tasks", "[dark_cyan]int", _("异步的任务数。")), + ("-e --timeout", "[dark_cyan]int", _("网络请求超时时间")), + ("-r --max-retries", "[dark_cyan]int", _("网络请求超时重试数")), + ("-x --max-connections", "[dark_cyan]int", _("网络请求并发连接数")), + ("-t --max-tasks", "[dark_cyan]int", _("异步的任务数")), ("-o --max-counts", "[dark_cyan]int", _("最大作品下载数。0 表示无限制")), ( "-s --page-counts", "[dark_cyan]int", - _("从接口每页可获取作品数,不建议超过20。"), + _("从接口每页可获取作品数,不建议超过20"), ), ( "-l --languages", From c5eb9b6070a8a53076870596fc0b92fcee16b671 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:25:20 +0800 Subject: [PATCH 061/164] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E7=9A=84=5F=5Finit=5F=5F.py=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/__init__.py | 3 --- f2/apps/tiktok/__init__.py | 3 --- 2 files changed, 6 deletions(-) delete mode 100644 f2/apps/douyin/__init__.py delete mode 100644 f2/apps/tiktok/__init__.py diff --git a/f2/apps/douyin/__init__.py b/f2/apps/douyin/__init__.py deleted file mode 100644 index 11af569..0000000 --- a/f2/apps/douyin/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# path: f2/apps/douyin/__init__.py - -from f2.apps.douyin.help import help diff --git a/f2/apps/tiktok/__init__.py b/f2/apps/tiktok/__init__.py deleted file mode 100644 index 256d3a8..0000000 --- a/f2/apps/tiktok/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# path: f2/apps/tiktok/__init__.py - -from f2.apps.tiktok.help import help From 9fe3adb2f4f9bbe839045944e8c5b45a9275b07c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 10 Mar 2024 23:41:15 +0800 Subject: [PATCH 062/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 调整日志的=长度 --- f2/apps/douyin/dl.py | 2 +- f2/apps/douyin/handler.py | 40 +++++++++++++++++++-------------------- f2/apps/tiktok/dl.py | 2 +- f2/apps/tiktok/handler.py | 18 +++++++++--------- f2/utils/_dl.py | 4 ++-- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index 9d00219..f1cd470 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -174,7 +174,7 @@ async def handler_download( logger.debug(f"========{aweme_id}========") logger.debug(aweme_data_dict) - logger.debug("================") + logger.debug("===================================") # 检查作品是否被屏蔽 if aweme_prohibited: diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 4a5eb4d..de5494e 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -266,7 +266,7 @@ async def fetch_user_post_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -298,7 +298,7 @@ async def fetch_user_post_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -371,7 +371,7 @@ async def fetch_user_like_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -403,7 +403,7 @@ async def fetch_user_like_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -467,7 +467,7 @@ async def fetch_user_music_collection( while music_collected < max_counts: current_request_size = min(page_counts, max_counts - music_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -488,7 +488,7 @@ async def fetch_user_music_collection( music.music_id, music.title, music.author ) ) - logger.debug("=====================================") + logger.debug("===================================") yield music._to_list() @@ -558,7 +558,7 @@ async def fetch_user_collection_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -577,7 +577,7 @@ async def fetch_user_collection_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -704,7 +704,7 @@ async def fetch_user_collects( while collected < max_counts: logger.debug(_("开始爬取用户收藏夹")) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("当前请求的max_cursor: {0}, max_counts: {1}").format( max_cursor, max_counts @@ -721,7 +721,7 @@ async def fetch_user_collects( collects.collects_id, collects.collects_name ) ) - logger.debug("=====================================") + logger.debug("===================================") yield collects @@ -766,7 +766,7 @@ async def fetch_user_collects_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -800,7 +800,7 @@ async def fetch_user_collects_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -878,7 +878,7 @@ async def fetch_user_mix_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -899,7 +899,7 @@ async def fetch_user_mix_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -954,7 +954,7 @@ async def fetch_user_live_videos(self, webcast_id: str): """ logger.debug(_("开始爬取直播: {0} 的数据").format(webcast_id)) - logger.debug("=====================================") + logger.debug("===================================") async with DouyinCrawler(self.kwargs) as crawler: params = UserLive(web_rid=webcast_id, room_id_str="") @@ -971,7 +971,7 @@ async def fetch_user_live_videos(self, webcast_id: str): live.sub_partition_title, live.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") logger.debug(_("直播信息爬取结束")) webcast_data = live._to_dict() @@ -992,7 +992,7 @@ async def fetch_user_live_videos_by_room_id(self, room_id: str): """ logger.debug(_("开始爬取房间号: {0} 的数据").format(room_id)) - logger.debug("=====================================") + logger.debug("===================================") async with DouyinCrawler(self.kwargs) as crawler: params = UserLive2(room_id=room_id) @@ -1013,7 +1013,7 @@ async def fetch_user_live_videos_by_room_id(self, room_id: str): ), ) ) - logger.debug("=====================================") + logger.debug("===================================") logger.debug(_("直播信息爬取结束")) webcast_data = live._to_dict() @@ -1073,7 +1073,7 @@ async def fetch_user_feed_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -1105,7 +1105,7 @@ async def fetch_user_feed_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list diff --git a/f2/apps/tiktok/dl.py b/f2/apps/tiktok/dl.py index bc0d298..e95b207 100644 --- a/f2/apps/tiktok/dl.py +++ b/f2/apps/tiktok/dl.py @@ -168,7 +168,7 @@ async def handler_download( logger.debug(f"========{aweme_id}========") logger.debug(aweme_data_dict) - logger.debug("================") + logger.debug("===================================") # 检查作品是否被屏蔽 if aweme_privateItem: diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 8262ee4..f256bab 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -181,7 +181,7 @@ async def fetch_play_list( playlist.mixId, playlist.mixName ) ) - logger.debug("=====================================") + logger.debug("===================================") return playlist async def select_playlist( @@ -335,7 +335,7 @@ async def fetch_user_post_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -363,7 +363,7 @@ async def fetch_user_post_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") yield video._to_list() @@ -425,7 +425,7 @@ async def fetch_user_like_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size @@ -445,7 +445,7 @@ async def fetch_user_like_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -522,7 +522,7 @@ async def fetch_user_collect_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size @@ -542,7 +542,7 @@ async def fetch_user_collect_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list @@ -625,7 +625,7 @@ async def fetch_user_mix_videos( while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -645,7 +645,7 @@ async def fetch_user_mix_videos( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") aweme_data_list = video._to_list() yield aweme_data_list diff --git a/f2/utils/_dl.py b/f2/utils/_dl.py index 22257f0..d38d9c3 100644 --- a/f2/utils/_dl.py +++ b/f2/utils/_dl.py @@ -39,9 +39,9 @@ async def get_content_length(url: str, headers: dict = {}, proxies: dict = {}) - except httpx.ConnectTimeout: # 连接超时错误处理 (Handling connection timeout errors) logger.error(_("连接超时错误: {0}".format(url))) - logger.error("==========================") + logger.error("===================================") logger.error(f"headers:{headers}, proxies:{proxies}") - logger.error("==========================") + logger.error("===================================") return 0 # 对HTTP状态错误进行处理 (Handling HTTP status errors) except httpx.HTTPStatusError as exc: From 15b0654ee97b91aba28e5b9dd6000cd28da09628 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:10:33 +0800 Subject: [PATCH 063/164] =?UTF-8?q?=E5=88=A0=E9=99=A4douyin=20`cli`?= =?UTF-8?q?=E4=B8=8B=E7=9A=84get=5Fcookie=5Ffrom=5Fbrowser=E6=96=B9?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/cli.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 0d2dfba..0c4955a 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -4,7 +4,6 @@ import click import typing import asyncio -import browser_cookie3 from pathlib import Path @@ -78,36 +77,9 @@ def handler_auto_cookie( ctx.abort() -def get_cookie_from_browser(browser_choice: str): - """ - 根据用户选择的浏览器获取douyin.com的cookie。 - - Args: - browser_choice (str): 用户选择的浏览器名称 - - Returns: - str: *.douyin.com的cookie值 - """ - - BROWSER_FUNCTIONS = { - "chrome": browser_cookie3.chrome, - "firefox": browser_cookie3.firefox, - "edge": browser_cookie3.edge, - "opera": browser_cookie3.opera, - } - cj_function = BROWSER_FUNCTIONS.get(browser_choice) - if not cj_function: - raise ValueError(_("不支持的浏览器选项, 输入f2 dy --help查看更多帮助!")) - - cj = cj_function(domain_name="douyin.com") - # cookie_value = next((c.value for c in cj if c.name == 'ttwid'), None) - cookie_value = {c.name: c.value for c in cj if c.domain.endswith("douyin.com")} - if not cookie_value: - raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(browser_choice)) - return cookie_value def handler_language( From 65e4c9f4757e61eca658b4efbca47ba2b6bd2f81 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:11:39 +0800 Subject: [PATCH 064/164] =?UTF-8?q?=E5=88=A0=E9=99=A4tiktok=20`cli`?= =?UTF-8?q?=E4=B8=8B=E7=9A=84get=5Fcookie=5Ffrom=5Fbrowser=E6=96=B9?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/cli.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index 8122a6e..e3c3f03 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -3,7 +3,6 @@ import f2 import click import typing -import browser_cookie3 from pathlib import Path @@ -75,36 +74,6 @@ def handler_auto_cookie( ctx.abort() -def get_cookie_from_browser(browser_choice: str): - """ - 根据用户选择的浏览器获取tiktok.com的cookie。 - - Args: - browser_choice (str): 用户选择的浏览器名称 - - Returns: - str: *.tiktok.com的cookie值 - """ - - BROWSER_FUNCTIONS = { - "chrome": browser_cookie3.chrome, - "firefox": browser_cookie3.firefox, - "edge": browser_cookie3.edge, - "opera": browser_cookie3.opera, - } - cj_function = BROWSER_FUNCTIONS.get(browser_choice) - if not cj_function: - raise ValueError(_("不支持的浏览器选项, 输入f2 dy --help查看更多帮助!")) - - cj = cj_function(domain_name="tiktok.com") - - # cookie_value = next((c.value for c in cj if c.name == 'ttwid'), None) - cookie_value = {c.name: c.value for c in cj if c.domain.endswith("tiktok.com")} - - if not cookie_value: - raise ValueError(_("无法从{0}浏览器中获取cookie").format(browser_choice)) - - return cookie_value def handler_language( From 1111d479933efc9e6fcbd9dcda50d41bcd15b7f7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:13:51 +0800 Subject: [PATCH 065/164] =?UTF-8?q?f2=20`utils`=E6=B7=BB=E5=8A=A0get=5Fcoo?= =?UTF-8?q?kie=5Ffrom=5Fbrowser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、utils添加了更多的浏览器列表 2、添加BROWSER_LIST --- f2/__init__.py | 13 +++++++++++++ f2/utils/utils.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/f2/__init__.py b/f2/__init__.py index 7f8ba0b..dcc8236 100644 --- a/f2/__init__.py +++ b/f2/__init__.py @@ -8,3 +8,16 @@ APP_CONFIG_FILE_PATH = "conf/app.yaml" F2_CONFIG_FILE_PATH = "conf/conf.yaml" F2_DEFAULTS_FILE_PATH = "conf/defaults.yaml" + +BROWSER_LIST = [ + "chrome", + "firefox", + "edge", + "opera", + "opera_gx", + "safari", + "chromium", + "brave", + "vivaldi", + "librewolf", +] diff --git a/f2/utils/utils.py b/f2/utils/utils.py index 62d1eaa..988763e 100644 --- a/f2/utils/utils.py +++ b/f2/utils/utils.py @@ -5,6 +5,7 @@ import random import secrets import datetime +import browser_cookie3 import importlib_resources from typing import Union, Any @@ -237,3 +238,35 @@ def split_filename(text: str, os_limit: dict) -> str: def ensure_path(path: Union[str, Path]) -> Path: """确保路径是一个Path对象 (Ensure the path is a Path object)""" return Path(path) if isinstance(path, str) else path + + +def get_cookie_from_browser(browser_choice: str, domain: str = "") -> dict: + """ + 根据用户选择的浏览器获取domain的cookie。 + + Args: + browser_choice (str): 用户选择的浏览器名称 + + Returns: + str: *.domain的cookie值 + """ + + if not browser_choice or not domain: + return "" + + BROWSER_FUNCTIONS = { + "chrome": browser_cookie3.chrome, + "firefox": browser_cookie3.firefox, + "edge": browser_cookie3.edge, + "opera": browser_cookie3.opera, + "opera_gx": browser_cookie3.opera_gx, + "safari": browser_cookie3.safari, + "chromium": browser_cookie3.chromium, + "brave": browser_cookie3.brave, + "vivaldi": browser_cookie3.vivaldi, + "librewolf": browser_cookie3.librewolf, + } + cj_function = BROWSER_FUNCTIONS.get(browser_choice) + cj = cj_function(domain_name=domain) + cookie_value = {c.name: c.value for c in cj if c.domain.endswith(domain)} + return cookie_value From c0d44f8ca3ced69e1ca9a49efbf51ef72810e0f5 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:18:33 +0800 Subject: [PATCH 066/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0douyin=20`cli`=20hand?= =?UTF-8?q?ler=5Fauto=5Fcookie=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、优化逻辑 2、使用utils的get_cookie_from_browser方法 3、支持保存到自定义配置文件 --- f2/apps/douyin/cli.py | 44 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 0c4955a..cc55437 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -10,7 +10,7 @@ from f2 import helps from f2.cli.cli_commands import set_cli_config from f2.log.logger import logger -from f2.utils.utils import split_dict_cookie, get_resource_path +from f2.utils.utils import split_dict_cookie, get_resource_path, get_cookie_from_browser from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ from f2.apps.douyin.handler import handle_sso_login @@ -52,34 +52,30 @@ def handler_auto_cookie( param: 提供的参数或选项 (The provided parameter or option) value: 参数或选项的值 (The value of the parameter or option) """ - if not value or ctx.resilient_parsing: - return - - # 如果用户明确设置了 --cookie,那么跳过自动获取过程 - if ctx.params.get("cookie"): + # 如果没有提供值或者用户已经设置了 resilient_parsing 或者提供了 --cookie 参数则跳过 + if not value or ctx.resilient_parsing or ctx.params.get("cookie"): return # 根据浏览器选择获取cookie - if value in ["chrome", "firefox", "edge", "opera"]: - try: - cookie_value = split_dict_cookie(get_cookie_from_browser(value)) - manager = ConfigManager(ctx.params.get("config", "conf/app.yaml")) - manager.update_config_with_args("douyin", cookie=cookie_value) - except PermissionError: - message = _("请关闭所有已打开的浏览器重试, 并且你有适当的权限访问浏览器 !") - logger.error(message) - click.echo(message) - ctx.abort() - except Exception as e: - message = _("自动获取Cookie失败: {0}".format(str(e))) - logger.error(message) - click.echo(message) - ctx.abort() - - + try: + cookie_value = split_dict_cookie(get_cookie_from_browser(value, "douyin.com")) + if not cookie_value: + raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(value)) + # 如果没有提供配置文件,那么使用高频配置文件 + if not ctx.params.get("config"): + manager = ConfigManager(get_resource_path(f2.APP_CONFIG_FILE_PATH)) + else: + manager = ConfigManager(ctx.params.get("config")) + manager.update_config_with_args("douyin", cookie=cookie_value) + except PermissionError: + logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) + ctx.abort() + except Exception as e: + logger.error(_("自动获取Cookie失败:{0}".format(str(e)))) + ctx.abort() def handler_language( @@ -376,7 +372,7 @@ def merge_config(main_conf, custom_conf, **kwargs): ) @click.option( "--auto-cookie", - type=click.Choice(["none", "chrome", "firefox", "edge", "opera"]), + type=click.Choice(f2.BROWSER_LIST), # default="none", help=_( "自动从浏览器获取cookie。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" From 7e82527d9157dba091acc980a465a9f854bddc92 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:19:24 +0800 Subject: [PATCH 067/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0tiktok=20`cli`=20hand?= =?UTF-8?q?ler=5Fauto=5Fcookie=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、优化逻辑 2、使用utils的get_cookie_from_browser方法 3、支持保存到自定义配置文件 --- f2/apps/tiktok/cli.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index e3c3f03..32845b6 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -9,7 +9,7 @@ from f2 import helps from f2.cli.cli_commands import set_cli_config from f2.log.logger import logger -from f2.utils.utils import split_dict_cookie, get_resource_path +from f2.utils.utils import split_dict_cookie, get_resource_path, get_cookie_from_browser from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ @@ -49,31 +49,30 @@ def handler_auto_cookie( param: 提供的参数或选项 (The provided parameter or option) value: 参数或选项的值 (The value of the parameter or option) """ - if not value or ctx.resilient_parsing: - return - - # 如果用户明确设置了 --cookie,那么跳过自动获取过程 - if ctx.params.get("cookie"): + # 如果用户没有提供值或者设置了 resilient_parsing 或者设置了 --cookie,那么跳过自动获取过程 + if not value or ctx.resilient_parsing or ctx.params.get("cookie"): return # 根据浏览器选择获取cookie - if value in ["chrome", "firefox", "edge", "opera"]: - try: - cookie_value = split_dict_cookie(get_cookie_from_browser(value)) - manager = ConfigManager(ctx.params.get("config", "conf/app.yaml")) - manager.update_config_with_args("tiktok", cookie=cookie_value) - except PermissionError: - message = _("请关闭所有已打开的浏览器重试, 并且你有适当的权限访问浏览器 !") - logger.error(message) - click.echo(message) - ctx.abort() - except Exception as e: - message = _("自动获取Cookie失败: {0}".format(str(e))) - logger.error(message) - click.echo(message) - ctx.abort() + try: + cookie_value = split_dict_cookie(get_cookie_from_browser(value, "tiktok.com")) + + if not cookie_value: + raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(value)) + # 如果没有提供配置文件,那么使用高频配置文件 + if not ctx.params.get("config"): + manager = ConfigManager(get_resource_path(f2.APP_CONFIG_FILE_PATH)) + else: + manager = ConfigManager(ctx.params.get("config")) + manager.update_config_with_args("tiktok", cookie=cookie_value) + except PermissionError: + logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) + ctx.abort() + except Exception as e: + logger.error(_("自动获取Cookie失败:{0}".format(str(e)))) + ctx.abort() def handler_language( @@ -336,7 +335,7 @@ def merge_config(main_conf, custom_conf, **kwargs): # @click.confirmation_option(prompt='是否要使用命令行的参数更新配置文件?') @click.option( "--auto-cookie", - type=click.Choice(["none", "chrome", "firefox", "edge", "opera"]), + type=click.Choice(f2.BROWSER_LIST), # default="none", help=_( "自动从浏览器获取cookie。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" From a431271c09822e574d015b1442c8f7eb9f364147 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:20:55 +0800 Subject: [PATCH 068/164] =?UTF-8?q?=E9=98=B2=E6=AD=A2=E5=87=BA=E7=8E=B0?= =?UTF-8?q?=E6=B2=A1=E6=9C=89=E6=89=BE=E5=88=B0=E5=B8=AE=E5=8A=A9=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit c5eb9b6070a8a53076870596fc0b92fcee16b671 --- f2/helps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/helps.py b/f2/helps.py index 91c55d2..bbcaf06 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -25,7 +25,7 @@ def get_help(app_name: str) -> None: try: - module = importlib.import_module(f"f2.apps.{app_name}") + module = importlib.import_module(f"f2.apps.{app_name}.help") if hasattr(module, "help"): module.help() else: From d400f967b632989110575a858af975fdeeaa9101 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:21:50 +0800 Subject: [PATCH 069/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 默认值用省略值代替 --- f2/apps/douyin/crawler.py | 5 ++++- f2/apps/tiktok/crawler.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index d527c03..1cdaeed 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -27,7 +27,10 @@ class DouyinCrawler(BaseCrawler): - def __init__(self, kwargs: dict = {}): + def __init__( + self, + kwargs: dict = ..., + ): f2_manager = ConfigManager(f2.F2_CONFIG_FILE_PATH) f2_conf = f2_manager.get_config("f2").get("douyin") proxies_conf = kwargs.get("proxies", {"http": None, "https": None}) diff --git a/f2/apps/tiktok/crawler.py b/f2/apps/tiktok/crawler.py index 1e84c96..87d1b5a 100644 --- a/f2/apps/tiktok/crawler.py +++ b/f2/apps/tiktok/crawler.py @@ -21,7 +21,10 @@ class TiktokCrawler(BaseCrawler): - def __init__(self, kwargs: dict = {}): + def __init__( + self, + kwargs: dict = ..., + ): f2_manager = ConfigManager(f2.F2_CONFIG_FILE_PATH) f2_conf = f2_manager.get_config("f2").get("tiktok") proxies_conf = kwargs.get("proxies", {"http": None, "https": None}) From 439c4aab1a023cd8934a7988e9e3ee8c696729f1 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:26:27 +0800 Subject: [PATCH 070/164] change get_help @ importlib path a431271c09822e574d015b1442c8f7eb9f364147 --- f2/helps.py | 1 + 1 file changed, 1 insertion(+) diff --git a/f2/helps.py b/f2/helps.py index bbcaf06..16520d1 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -11,6 +11,7 @@ ------------------------------------------------- Change Log : 2023/02/06 17:36:41 - create output help +2024/03/11 18:23:30 - change get_help @ importlib path ------------------------------------------------- """ From 1ee30d72fe65afa50c5c7de84e4136fc85093271 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:32:36 +0800 Subject: [PATCH 071/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handle_help -> handler_help --- f2/apps/douyin/cli.py | 16 ++++++++++++---- f2/apps/tiktok/cli.py | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index cc55437..32343be 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -16,7 +16,7 @@ from f2.apps.douyin.handler import handle_sso_login -def handle_help( +def handler_help( ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, @@ -83,8 +83,16 @@ def handler_language( param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> typing.Any: - """用于设置语言 (For setting the language)""" + """用于设置语言 (For setting the language) + Args: + ctx: click的上下文对象 (Click's context object) + param: 提供的参数或选项 (The provided parameter or option) + value: 参数或选项的值 (The value of the parameter or option) + """ + + if not value or ctx.resilient_parsing: + return TranslationManager.get_instance().set_language(value) global _ _ = TranslationManager.get_instance().gettext @@ -391,7 +399,7 @@ def merge_config(main_conf, custom_conf, **kwargs): is_eager=True, expose_value=False, help=_("显示富文本帮助"), - callback=handle_help, + callback=handler_help, ) @click.pass_context def douyin(ctx, config, init_config, update_config, **kwargs): @@ -473,7 +481,7 @@ def douyin(ctx, config, init_config, update_config, **kwargs): # 尝试从命令行参数或kwargs中获取URL if not kwargs.get("url"): logger.error("缺乏URL参数,详情看命令帮助") - handle_help(ctx, None, True) + handler_help(ctx, None, True) # 添加app_name到kwargs kwargs["app_name"] = "douyin" diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index 32845b6..9f7dbef 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -14,7 +14,7 @@ from f2.i18n.translator import TranslationManager, _ -def handle_help( +def handler_help( ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, @@ -80,8 +80,16 @@ def handler_language( param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> typing.Any: - """用于设置语言 (For setting the language)""" + """用于设置语言 (For setting the language) + Args: + ctx: click的上下文对象 (Click's context object) + param: 提供的参数或选项 (The provided parameter or option) + value: 参数或选项的值 (The value of the parameter or option) + """ + + if not value or ctx.resilient_parsing: + return TranslationManager.get_instance().set_language(value) global _ _ = TranslationManager.get_instance().gettext @@ -348,7 +356,7 @@ def merge_config(main_conf, custom_conf, **kwargs): is_eager=True, expose_value=False, help="显示富文本帮助", - callback=handle_help, + callback=handler_help, ) @click.pass_context def tiktok(ctx, config, init_config, update_config, **kwargs): @@ -430,7 +438,7 @@ def tiktok(ctx, config, init_config, update_config, **kwargs): # 尝试从命令行参数或kwargs中获取URL if not kwargs.get("url"): logger.error("缺乏URL参数,详情看命令帮助") - handle_help(ctx, None, True) + handler_help(ctx, None, True) # 添加app_name到kwargs kwargs["app_name"] = "tiktok" From f0f49995fc75eb5afffe0a7565a63e1a22a69804 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:56:32 +0800 Subject: [PATCH 072/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0douyin=20`cli`?= =?UTF-8?q?=E4=B8=8B=E7=9A=84handler=5Fnaming=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将处理错误naming的方法单独放在f2 utils中 --- f2/apps/douyin/cli.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 32343be..7913da7 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -10,7 +10,12 @@ from f2 import helps from f2.cli.cli_commands import set_cli_config from f2.log.logger import logger -from f2.utils.utils import split_dict_cookie, get_resource_path, get_cookie_from_browser +from f2.utils.utils import ( + split_dict_cookie, + get_resource_path, + get_cookie_from_browser, + check_invalid_naming, +) from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ from f2.apps.douyin.handler import handle_sso_login @@ -125,28 +130,8 @@ def handler_naming( ALLOWED_PATTERNS = ["{nickname}", "{create}", "{aweme_id}", "{desc}", "{uid}"] ALLOWED_SEPARATORS = ["-", "_"] - temp_naming = value - invalid_patterns = [] - - # 检查提供的模式是否有效 - for pattern in ALLOWED_PATTERNS: - if pattern in temp_naming: - temp_naming = temp_naming.replace(pattern, "") - - # 此时,temp_naming应只包含分隔符 - for char in temp_naming: - if char not in ALLOWED_SEPARATORS: - invalid_patterns.append(char) - - # 检查连续的无效模式或分隔符 - for pattern in ALLOWED_PATTERNS: - # 检查像"{aweme_id}{aweme_id}"这样的模式 - if pattern + pattern in value: - invalid_patterns.append(pattern + pattern) - for sep in ALLOWED_SEPARATORS: - # 检查像"{aweme_id}-{aweme_id}"这样的模式 - if pattern + sep + pattern in value: - invalid_patterns.append(pattern + sep + pattern) + # 检查命名是否符合命名规范 + invalid_patterns = check_invalid_naming(value, ALLOWED_PATTERNS, ALLOWED_SEPARATORS) if invalid_patterns: raise click.BadParameter( From 8c85055567960589e43670ee4a2ce6df9d3edfce Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:57:03 +0800 Subject: [PATCH 073/164] =?UTF-8?q?=E6=9B=B4=E6=96=B0tiktok=20`cli`?= =?UTF-8?q?=E4=B8=8B=E7=9A=84handler=5Fnaming=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将处理错误naming的方法单独放在f2 utils中 --- f2/apps/tiktok/cli.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index 9f7dbef..3610be6 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -9,7 +9,12 @@ from f2 import helps from f2.cli.cli_commands import set_cli_config from f2.log.logger import logger -from f2.utils.utils import split_dict_cookie, get_resource_path, get_cookie_from_browser +from f2.utils.utils import ( + split_dict_cookie, + get_resource_path, + get_cookie_from_browser, + check_invalid_naming, +) from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ @@ -122,28 +127,8 @@ def handler_naming( ALLOWED_PATTERNS = ["{nickname}", "{create}", "{aweme_id}", "{desc}", "{uid}"] ALLOWED_SEPARATORS = ["-", "_"] - temp_naming = value - invalid_patterns = [] - - # 检查提供的模式是否有效 - for pattern in ALLOWED_PATTERNS: - if pattern in temp_naming: - temp_naming = temp_naming.replace(pattern, "") - - # 此时,temp_naming应只包含分隔符 - for char in temp_naming: - if char not in ALLOWED_SEPARATORS: - invalid_patterns.append(char) - - # 检查连续的无效模式或分隔符 - for pattern in ALLOWED_PATTERNS: - # 检查像"{aweme_id}{aweme_id}"这样的模式 - if pattern + pattern in value: - invalid_patterns.append(pattern + pattern) - for sep in ALLOWED_SEPARATORS: - # 检查像"{aweme_id}-{aweme_id}"这样的模式 - if pattern + sep + pattern in value: - invalid_patterns.append(pattern + sep + pattern) + # 检查命名是否符合命名规范 + invalid_patterns = check_invalid_naming(value, ALLOWED_PATTERNS, ALLOWED_SEPARATORS) if invalid_patterns: raise click.BadParameter( From ada1cde47afcebd25acb565808adef4ae664af34 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:57:39 +0800 Subject: [PATCH 074/164] =?UTF-8?q?f2=20`utils`=E6=B7=BB=E5=8A=A0check=5Fi?= =?UTF-8?q?nvalid=5Fnaming?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、减少冗余代码 --- f2/utils/utils.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/f2/utils/utils.py b/f2/utils/utils.py index 988763e..9b97c34 100644 --- a/f2/utils/utils.py +++ b/f2/utils/utils.py @@ -270,3 +270,45 @@ def get_cookie_from_browser(browser_choice: str, domain: str = "") -> dict: cj = cj_function(domain_name=domain) cookie_value = {c.name: c.value for c in cj if c.domain.endswith(domain)} return cookie_value + + +def check_invalid_naming( + naming: str, allowed_patterns: list, allowed_separators: list +) -> list: + """ + 检查命名是否符合命名模板 (Check if the naming conforms to the naming template) + + Args: + naming (str): 命名字符串 (Naming string) + allowed_patterns (list): 允许的模式列表 (List of allowed patterns) + allowed_separators (list): 允许的分隔符列表 (List of allowed separators) + Returns: + list: 无效的模式列表 (List of invalid patterns) + """ + if not naming or not allowed_patterns or not allowed_separators: + return [] + + temp_naming = naming + invalid_patterns = [] + + # 检查提供的模式是否有效 + for pattern in allowed_patterns: + if pattern in temp_naming: + temp_naming = temp_naming.replace(pattern, "") + + # 此时,temp_naming应只包含分隔符 + for char in temp_naming: + if char not in allowed_separators: + invalid_patterns.append(char) + + # 检查连续的无效模式或分隔符 + for pattern in allowed_patterns: + # 检查像"{xxx}{xxx}"这样的模式 + if pattern + pattern in naming: + invalid_patterns.append(pattern + pattern) + for sep in allowed_patterns: + # 检查像"{xxx}-{xxx}"这样的模式 + if pattern + sep + pattern in naming: + invalid_patterns.append(pattern + sep + pattern) + + return invalid_patterns From c7718b73c2ab723db21f3d38efbbb26de7799634 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 18:58:01 +0800 Subject: [PATCH 075/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 优化逻辑 --- f2/apps/douyin/cli.py | 10 ++++------ f2/apps/tiktok/cli.py | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 7913da7..31d6e3a 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -69,11 +69,9 @@ def handler_auto_cookie( raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(value)) # 如果没有提供配置文件,那么使用高频配置文件 - if not ctx.params.get("config"): - manager = ConfigManager(get_resource_path(f2.APP_CONFIG_FILE_PATH)) - else: - manager = ConfigManager(ctx.params.get("config")) - + manager = ConfigManager( + ctx.params.get("config", get_resource_path(f2.APP_CONFIG_FILE_PATH)) + ) manager.update_config_with_args("douyin", cookie=cookie_value) except PermissionError: logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) @@ -123,7 +121,7 @@ def handler_naming( value: 命名模式模板 (Naming pattern template) """ # 避免和配置文件参数冲突 - if value is None: + if not value or ctx.resilient_parsing: return # 允许的模式和分隔符 diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index 3610be6..f82e67a 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -66,11 +66,9 @@ def handler_auto_cookie( raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(value)) # 如果没有提供配置文件,那么使用高频配置文件 - if not ctx.params.get("config"): - manager = ConfigManager(get_resource_path(f2.APP_CONFIG_FILE_PATH)) - else: - manager = ConfigManager(ctx.params.get("config")) - + manager = ConfigManager( + ctx.params.get("config", get_resource_path(f2.APP_CONFIG_FILE_PATH)) + ) manager.update_config_with_args("tiktok", cookie=cookie_value) except PermissionError: logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) @@ -120,7 +118,7 @@ def handler_naming( value: 命名模式模板 (Naming pattern template) """ # 避免和配置文件参数冲突 - if value is None: + if not value or ctx.resilient_parsing: return # 允许的模式和分隔符 From a9363381e0861df28199281bcae2836686c53b18 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 19:02:41 +0800 Subject: [PATCH 076/164] =?UTF-8?q?=E7=A7=BB=E5=8A=A8douyin=20`cli`?= =?UTF-8?q?=E4=B8=8B=E7=9A=84merge=5Fconfig=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 减少冗余 --- f2/apps/douyin/cli.py | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 31d6e3a..ebd13b2 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -15,6 +15,7 @@ get_resource_path, get_cookie_from_browser, check_invalid_naming, + merge_config, ) from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ @@ -178,35 +179,6 @@ def handler_sso_login( raise click.UsageError(_("SSO登录失败,请重试!")) -def merge_config(main_conf, custom_conf, **kwargs): - """ - 合并配置参数,使 CLI 参数优先级高于自定义配置,自定义配置优先级高于主配置,最终生成完整配置参数字典。 - Args: - main_conf (dict): 主配置参数字典 - custom_conf (dict): 自定义配置参数字典 - **kwargs: CLI 参数和其他额外的配置参数 - - Returns: - dict: 合并后的配置参数字典 - """ - # 合并主配置和自定义配置 - merged_conf = {} - for key, value in main_conf.items(): - merged_conf[key] = value # 将主配置复制到合并后的配置中 - for key, value in custom_conf.items(): - if value is not None and value != "": # 只有值不为 None 和 空值,才进行合并 - merged_conf[key] = value # 自定义配置参数会覆盖主配置中的同名参数 - - # 合并 CLI 参数与合并后的配置,确保 CLI 参数的优先级最高 - for key, value in kwargs.items(): - if key not in merged_conf: # 如果合并后的配置中没有这个键,则直接添加 - merged_conf[key] = value - elif value is not None and value != "": # 如果值不为 None 和 空值,则进行合并 - merged_conf[key] = value # CLI 参数会覆盖自定义配置和主配置中的同名参数 - - return merged_conf - - @click.command(name="douyin", help=_("抖音无水印解析")) @click.option( "--config", From 94c63afc00ac134a79962a00fc0ec64bd478cec6 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 19:03:10 +0800 Subject: [PATCH 077/164] =?UTF-8?q?=E7=A7=BB=E5=8A=A8tiktok=20`cli`?= =?UTF-8?q?=E4=B8=8B=E7=9A=84merge=5Fconfig=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 减少冗余 --- f2/apps/tiktok/cli.py | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index f82e67a..a7f6e0c 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -14,6 +14,7 @@ get_resource_path, get_cookie_from_browser, check_invalid_naming, + merge_config, ) from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ @@ -140,35 +141,6 @@ def handler_naming( return value -def merge_config(main_conf, custom_conf, **kwargs): - """ - 合并配置参数,使 CLI 参数优先级高于自定义配置,自定义配置优先级高于主配置,最终生成完整配置参数字典。 - Args: - main_conf (dict): 主配置参数字典 - custom_conf (dict): 自定义配置参数字典 - **kwargs: CLI 参数和其他额外的配置参数 - - Returns: - dict: 合并后的配置参数字典 - """ - # 合并主配置和自定义配置 - merged_conf = {} - for key, value in main_conf.items(): - merged_conf[key] = value # 将主配置复制到合并后的配置中 - for key, value in custom_conf.items(): - if value is not None and value != "": # 只有值不为 None 和 空值,才进行合并 - merged_conf[key] = value # 自定义配置参数会覆盖主配置中的同名参数 - - # 合并 CLI 参数与合并后的配置,确保 CLI 参数的优先级最高 - for key, value in kwargs.items(): - if key not in merged_conf: # 如果合并后的配置中没有这个键,则直接添加 - merged_conf[key] = value - elif value is not None and value != "": # 如果值不为 None 和 空值,则进行合并 - merged_conf[key] = value # CLI 参数会覆盖自定义配置和主配置中的同名参数 - - return merged_conf - - @click.command(name="tiktok", help=_("TikTok无水印解析")) @click.option( "--config", From e557162e13f22dd70c240059370ff69e3444803c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 11 Mar 2024 19:03:48 +0800 Subject: [PATCH 078/164] =?UTF-8?q?f2=20`utils`=E6=B7=BB=E5=8A=A0merge=5Fc?= =?UTF-8?q?onfig?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将重复的方法独立,减少cli的冗余 --- f2/utils/utils.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/f2/utils/utils.py b/f2/utils/utils.py index 9b97c34..11cf639 100644 --- a/f2/utils/utils.py +++ b/f2/utils/utils.py @@ -312,3 +312,37 @@ def check_invalid_naming( invalid_patterns.append(pattern + sep + pattern) return invalid_patterns + + +def merge_config( + main_conf: dict = ..., + custom_conf: dict = ..., + **kwargs, +): + """ + 合并配置参数,使 CLI 参数优先级高于自定义配置,自定义配置优先级高于主配置,最终生成完整配置参数字典。 + + Args: + main_conf (dict): 主配置参数字典 + custom_conf (dict): 自定义配置参数字典 + **kwargs: CLI 参数和其他额外的配置参数 + + Returns: + dict: 合并后的配置参数字典 + """ + # 合并主配置和自定义配置 + merged_conf = {} + for key, value in main_conf.items(): + merged_conf[key] = value # 将主配置复制到合并后的配置中 + for key, value in custom_conf.items(): + if value is not None and value != "": # 只有值不为 None 和 空值,才进行合并 + merged_conf[key] = value # 自定义配置参数会覆盖主配置中的同名参数 + + # 合并 CLI 参数与合并后的配置,确保 CLI 参数的优先级最高 + for key, value in kwargs.items(): + if key not in merged_conf: # 如果合并后的配置中没有这个键,则直接添加 + merged_conf[key] = value + elif value is not None and value != "": # 如果值不为 None 和 空值,则进行合并 + merged_conf[key] = value # CLI 参数会覆盖自定义配置和主配置中的同名参数 + + return merged_conf From 3e0c9042eb36c18b172583376b3586683467e9f1 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 17 Mar 2024 15:48:29 +0800 Subject: [PATCH 079/164] =?UTF-8?q?=E6=B3=A8=E9=87=8A=E4=B8=8E=E9=83=A8?= =?UTF-8?q?=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、添加参数类型注释 2、修改cli帮助信息 3、添加代码路径注释 --- f2/apps/douyin/cli.py | 10 ++++++++-- f2/apps/douyin/db.py | 2 +- f2/apps/douyin/filter.py | 2 ++ f2/apps/douyin/help.py | 2 +- f2/apps/tiktok/cli.py | 22 ++++++++++++++-------- f2/apps/tiktok/help.py | 2 +- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index ebd13b2..ba0fd6b 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -338,7 +338,7 @@ def handler_sso_login( type=click.Choice(f2.BROWSER_LIST), # default="none", help=_( - "自动从浏览器获取cookie。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器" ), callback=handler_auto_cookie, ) @@ -357,7 +357,13 @@ def handler_sso_login( callback=handler_help, ) @click.pass_context -def douyin(ctx, config, init_config, update_config, **kwargs): +def douyin( + ctx: click.Context, + config: str, + init_config: str, + update_config: bool, + **kwargs, +): ################## # f2 存在2个主配置文件,分别是app低频配置(app.yaml)和f2低频配置(conf.yaml) # app低频配置存放app相关的参数 diff --git a/f2/apps/douyin/db.py b/f2/apps/douyin/db.py index 43da126..d7231fc 100644 --- a/f2/apps/douyin/db.py +++ b/f2/apps/douyin/db.py @@ -77,7 +77,7 @@ async def add_user_info(self, ignore_fields=None, **kwargs) -> None: # VALUES (?, {placeholders})', (kwargs.get('sec_user_id'), *values)) await self.commit() - async def update_user_info(self, sec_user_id, **kwargs) -> None: + async def update_user_info(self, sec_user_id: str, **kwargs) -> None: """ 更新用户信息 diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 3a3078f..bd856f1 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -1,3 +1,5 @@ +# path: f2/apps/douyin/filter.py + from f2.utils.json_filter import JSONModel from f2.utils.utils import _get_first_item_from_list, timestamp_2_str, replaceT diff --git a/f2/apps/douyin/help.py b/f2/apps/douyin/help.py index e367819..8929832 100644 --- a/f2/apps/douyin/help.py +++ b/f2/apps/douyin/help.py @@ -96,7 +96,7 @@ def help() -> None: "--auto-cookie", "[dark_cyan]Choice", _( - "自动从浏览器获取[yellow]cookie[/yellow]。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取[yellow]cookie[/yellow],使用该命令前请确保关闭所选的浏览器" ), ), ( diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index a7f6e0c..c4e9d59 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -162,35 +162,35 @@ def handler_naming( "-m", type=bool, # default="yes", - help=_("是否保存视频原声。可选:'yes'、'no'"), + help=_("是否保存视频原声"), ) @click.option( "--cover", "-v", type=bool, # default="yes", - help=_("是否保存视频封面。可选:'yes'、'no'"), + help=_("是否保存视频封面"), ) @click.option( "--desc", "-d", type=bool, # default="yes", - help=_("是否保存视频文案。可选:'yes'、'no'"), + help=_("是否保存视频文案"), ) @click.option( "--path", "-p", type=str, # default="Download", - help=_("作品保存位置,支持绝对与相对路径。"), + help=_("作品保存位置,支持绝对与相对路径"), ) @click.option( "--folderize", "-f", type=bool, # default="yes", - help=_("是否将作品保存到单独的文件夹。可选:'yes'、'no'"), + help=_("是否将作品保存到单独的文件夹"), ) @click.option( "--mode", @@ -301,7 +301,7 @@ def handler_naming( type=click.Choice(f2.BROWSER_LIST), # default="none", help=_( - "自动从浏览器获取cookie。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器" ), callback=handler_auto_cookie, ) @@ -310,11 +310,17 @@ def handler_naming( is_flag=True, is_eager=True, expose_value=False, - help="显示富文本帮助", + help=_("显示富文本帮助"), callback=handler_help, ) @click.pass_context -def tiktok(ctx, config, init_config, update_config, **kwargs): +def tiktok( + ctx: click.Context, + config: str, + init_config: str, + update_config: bool, + **kwargs, +) -> None: ################## # f2 存在2个主配置文件,分别是app低频配置(app.yaml)和f2低频配置(conf.yaml) # app低频配置存放app相关的参数 diff --git a/f2/apps/tiktok/help.py b/f2/apps/tiktok/help.py index 664088c..c862894 100644 --- a/f2/apps/tiktok/help.py +++ b/f2/apps/tiktok/help.py @@ -99,7 +99,7 @@ def help() -> None: "--auto-cookie", "[dark_cyan]Choice", _( - "自动从浏览器获取[yellow]cookie[/yellow]。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取[yellow]cookie[/yellow],使用该命令前请确保关闭所选的浏览器" ), ), ("--help", "[dark_cyan]Flag", _("显示经典帮助信息")), From aa88b9784f1a6822935650a22e72040bf4ee4392 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 17 Mar 2024 16:10:46 +0800 Subject: [PATCH 080/164] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=9C=AC?= =?UTF-8?q?=E5=9C=B0=E5=8C=96=E6=9C=8D=E5=8A=A1(#58)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 需获取翻译后再格式化 #58 --- f2/apps/douyin/cli.py | 22 ++++----- f2/apps/douyin/crawler.py | 42 ++++++++-------- f2/apps/douyin/dl.py | 2 +- f2/apps/douyin/handler.py | 54 ++++++++++---------- f2/apps/douyin/utils.py | 74 ++++++++++++---------------- f2/apps/tiktok/cli.py | 22 ++++----- f2/apps/tiktok/crawler.py | 18 +++---- f2/apps/tiktok/dl.py | 2 +- f2/apps/tiktok/handler.py | 18 +++---- f2/apps/tiktok/utils.py | 100 ++++++++++++++++---------------------- 10 files changed, 157 insertions(+), 197 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index ba0fd6b..20609f3 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -78,7 +78,7 @@ def handler_auto_cookie( logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) ctx.abort() except Exception as e: - logger.error(_("自动获取Cookie失败:{0}".format(str(e)))) + logger.error(_("自动获取Cookie失败:{0}").format(str(e))) ctx.abort() @@ -134,10 +134,8 @@ def handler_naming( if invalid_patterns: raise click.BadParameter( - _( - "`{0}` 中的 `{1}` 不符合命名模式".format( - value, "".join(invalid_patterns) - ) + _("`{0}` 中的 `{1}` 不符合命名模式").format( + value, "".join(invalid_patterns) ) ) @@ -337,9 +335,7 @@ def handler_sso_login( "--auto-cookie", type=click.Choice(f2.BROWSER_LIST), # default="none", - help=_( - "自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器" - ), + help=_("自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器"), callback=handler_auto_cookie, ) @click.option( @@ -433,11 +429,11 @@ def douyin( # 从低频配置开始到高频配置再到cli参数,逐级覆盖,如果键值不存在使用父级的键值 kwargs = merge_config(main_conf, custom_conf, **kwargs) - logger.info(_("主配置路径: {0}".format(main_conf_path))) - logger.info(_("自定义配置路径: {0}".format(Path.cwd() / config))) - logger.debug(_("主配置参数:{0}".format(main_conf))) - logger.debug(_("自定义配置参数:{0}".format(custom_conf))) - logger.debug(_("CLI参数:{0}".format(kwargs))) + logger.info(_("主配置路径: {0}").format(main_conf_path)) + logger.info(_("自定义配置路径: {0}").format(Path.cwd() / config)) + logger.debug(_("主配置参数:{0}").format(main_conf)) + logger.debug(_("自定义配置参数:{0}").format(custom_conf)) + logger.debug(_("CLI参数:{0}").format(kwargs)) # 尝试从命令行参数或kwargs中获取URL if not kwargs.get("url"): diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 1cdaeed..a9e47f0 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -51,105 +51,105 @@ async def fetch_user_profile(self, params: UserProfile): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_DETAIL, params.dict() ) # fmt: off - logger.debug(_("用户信息接口地址:" + endpoint)) + logger.debug(_("用户信息接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_POST, params.dict() ) # fmt: off - logger.debug(_("主页作品接口地址:" + endpoint)) + logger.debug(_("主页作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_FAVORITE_A, params.dict() ) - logger.debug(_("主页喜欢作品接口地址:" + endpoint)) + logger.debug(_("主页喜欢作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collection(self, params: UserCollection): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_COLLECTION, params.dict() ) - logger.debug(_("主页收藏作品接口地址:" + endpoint)) + logger.debug(_("主页收藏作品接口地址:{0}").format(endpoint)) return await self._fetch_post_json(endpoint, params.dict()) async def fetch_user_collects(self, params: UserCollects): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_COLLECTS, params.dict() ) - logger.debug(_("收藏夹接口地址:" + endpoint)) + logger.debug(_("收藏夹接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collects_video(self, params: UserCollectsVideo): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_COLLECTS_VIDEO, params.dict() ) - logger.debug(_("收藏夹作品接口地址:" + endpoint)) + logger.debug(_("收藏夹作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_music_collection(self, params: UserMusicCollection): endpoint = XBogusManager.model_2_endpoint( dyendpoint.USER_MUSIC_COLLECTION, params.dict() ) - logger.debug(_("音乐收藏接口地址:" + endpoint)) + logger.debug(_("音乐收藏接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( dyendpoint.MIX_AWEME, params.dict() ) # fmt: off - logger.debug(_("合集作品接口地址:" + endpoint)) + logger.debug(_("合集作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_detail(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( dyendpoint.POST_DETAIL, params.dict() ) # fmt: off - logger.debug(_("作品详情接口地址:" + endpoint)) + logger.debug(_("作品详情接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_comment(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( dyendpoint.POST_COMMENT, params.dict() ) - logger.debug(_("作品评论接口地址:" + endpoint)) + logger.debug(_("作品评论接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( dyendpoint.TAB_FEED, params.dict() ) # fmt: off - logger.debug(_("首页推荐作品接口地址:" + endpoint)) + logger.debug(_("首页推荐作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_follow_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( dyendpoint.FOLLOW_FEED, params.dict() ) # fmt: off - logger.debug(_("关注作品接口地址:" + endpoint)) + logger.debug(_("关注作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_friend_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( dyendpoint.FRIEND_FEED, params.dict() ) # fmt: off - logger.debug(_("朋友作品接口地址:" + endpoint)) + logger.debug(_("朋友作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_related(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( dyendpoint.POST_RELATED, params.dict() ) - logger.debug(_("相关推荐作品接口地址:" + endpoint)) + logger.debug(_("相关推荐作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_live(self, params: UserLive): endpoint = XBogusManager.model_2_endpoint( dyendpoint.LIVE_INFO, params.dict() ) # fmt: off - logger.debug(_("直播接口地址:" + endpoint)) + logger.debug(_("直播接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_live_room_id(self, params: UserLive2): @@ -160,7 +160,7 @@ async def fetch_live_room_id(self, params: UserLive2): endpoint = XBogusManager.model_2_endpoint( dyendpoint.LIVE_INFO_ROOM_ID, params.dict() ) - logger.debug(_("直播接口地址(room_id):" + endpoint)) + logger.debug(_("直播接口地址(room_id):{0}").format(endpoint)) return await self._fetch_get_json(endpoint) finally: self.aclient.headers = original_headers @@ -169,35 +169,35 @@ async def fetch_follow_live(self, params: FollowUserLive): endpoint = XBogusManager.model_2_endpoint( dyendpoint.FOLLOW_USER_LIVE, params.dict() ) - logger.debug(_("关注用户直播接口地址:" + endpoint)) + logger.debug(_("关注用户直播接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_locate_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( dyendpoint.LOCATE_POST, params.dict() ) # fmt: off - logger.debug(_("定位上一次作品接口地址:" + endpoint)) + logger.debug(_("定位上一次作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_login_qrcode(self, parms: LoginGetQr): endpoint = XBogusManager.model_2_endpoint( dyendpoint.SSO_LOGIN_GET_QR, parms.dict() ) - logger.debug(_("SSO获取二维码接口地址:" + endpoint)) + logger.debug(_("SSO获取二维码接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_check_qrcode(self, parms: LoginCheckQr): endpoint = XBogusManager.model_2_endpoint( dyendpoint.SSO_LOGIN_CHECK_QR, parms.dict() ) - logger.debug(_("SSO检查扫码状态接口地址:" + endpoint)) + logger.debug(_("SSO检查扫码状态接口地址:{0}").format(endpoint)) return await self._fetch_response(endpoint) async def fetch_check_login(self, parms: LoginCheckQr): endpoint = XBogusManager.model_2_endpoint( dyendpoint.SSO_LOGIN_CHECK_LOGIN, parms.dict() ) - logger.debug(_("SSO检查登录状态接口地址:" + endpoint)) + logger.debug(_("SSO检查登录状态接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def __aenter__(self): diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index f1cd470..6ed9410 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -17,7 +17,7 @@ def __init__(self, kwargs: dict = {}): if kwargs["cookie"] is None: raise ValueError( _( - "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 `--auto-cookie edge`" + "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取。如 `--auto-cookie edge`" ) ) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index de5494e..5e8cd79 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -179,7 +179,7 @@ async def handle_one_video(self): async with AsyncVideoDB("douyin_videos.db") as db: await self.get_or_add_video_data(aweme_data, db, self.ignore_fields) - logger.debug(_("单个视频数据: {0}".format(aweme_data))) + logger.debug(_("单个视频数据:{0}").format(aweme_data)) await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) async def fetch_one_video(self, aweme_id: str) -> dict: @@ -193,14 +193,14 @@ async def fetch_one_video(self, aweme_id: str) -> dict: video_data: dict: 视频数据字典,包含视频ID、视频文案、作者昵称 """ - logger.debug(_("开始爬取视频: {0}").format(aweme_id)) + logger.debug(_("开始爬取视频:{0}").format(aweme_id)) async with DouyinCrawler(self.kwargs) as crawler: params = PostDetail(aweme_id=aweme_id) response = await crawler.fetch_post_detail(params) video = PostDetailFilter(response) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -261,14 +261,14 @@ async def fetch_user_post_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 发布的视频").format(sec_user_id)) + logger.debug(_("开始爬取用户:{0} 发布的视频").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -284,17 +284,17 @@ async def fetch_user_post_videos( video = UserPostFilter(response) if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) + logger.debug(_("{0} 页没有找到作品").format(max_cursor)) if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕".format(sec_user_id))) + logger.debug(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = video.max_cursor continue - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -366,14 +366,14 @@ async def fetch_user_like_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 喜欢的视频").format(sec_user_id)) + logger.debug(_("开始爬取用户:{0} 喜欢的视频").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -389,17 +389,17 @@ async def fetch_user_like_videos( video = UserPostFilter(response) if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) + logger.debug(_("{0} 页没有找到作品").format(max_cursor)) if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕".format(sec_user_id))) + logger.debug(_("用户:{0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = video.max_cursor continue - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -469,7 +469,7 @@ async def fetch_user_music_collection( logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -482,9 +482,9 @@ async def fetch_user_music_collection( response = await crawler.fetch_user_music_collection(params) music = UserMusicCollectionFilter(response) - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("音乐ID: {0} 音乐标题: {1} 作者: {2}").format( + _("音乐ID:{0} 音乐标题:{1} 作者:{2}").format( music.music_id, music.title, music.author ) ) @@ -659,13 +659,11 @@ async def select_user_collects( rich_console.print(_("0: [bold]全部下载[/bold]")) for i in range(len(collects.collects_id)): rich_console.print( - _( - "{0}: {1} (包含 {2} 个作品,收藏夹ID {3})".format( - i + 1, - collects.collects_name[i], - collects.total_number[i], - collects.collects_id[i], - ) + _("{0}: {1} (包含 {2} 个作品,收藏夹ID {3})").format( + i + 1, + collects.collects_name[i], + collects.total_number[i], + collects.collects_id[i], ) ) @@ -809,7 +807,7 @@ async def fetch_user_collects_videos( videos_collected += len(aweme_data_list) max_cursor = video.max_cursor else: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) + logger.debug(_("{0} 页没有找到作品").format(max_cursor)) if not video.has_more: logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) break @@ -1091,9 +1089,9 @@ async def fetch_user_feed_videos( video = UserPostFilter(response) if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) + logger.debug(_("{0} 页没有找到作品").format(max_cursor)) if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕".format(sec_user_id))) + logger.debug(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = video.max_cursor diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 8d63af6..2e927d9 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -73,7 +73,7 @@ def gen_real_msToken(cls) -> str: msToken = str(httpx.Cookies(response.cookies).get("msToken")) if len(msToken) not in [120, 128]: - raise APIResponseError(_("{0} 内容不符合要求".format("msToken"))) + raise APIResponseError(_("{0} 内容不符合要求").format("msToken")) return msToken @@ -90,19 +90,16 @@ def gen_real_msToken(cls) -> str: if e.response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( - "msToken", "douyin" - ) - ) + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("msToken", "douyin") ) + elif e.response.status_code == 404: - raise APINotFoundError(_("{0} 无法找到API端点".format("msToken"))) + raise APINotFoundError(_("{0} 无法找到API端点").format("msToken")) else: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -148,19 +145,16 @@ def gen_ttwid(cls) -> str: if e.response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( - "ttwid", "douyin" - ) - ) + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("ttwid", "douyin") ) + elif e.response.status_code == 404: raise APINotFoundError(_("ttwid无法找到API端点")) else: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -256,7 +250,7 @@ async def get_sec_user_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) pattern = ( @@ -279,30 +273,26 @@ async def get_sec_user_id(cls, url: str) -> str: else: raise APIResponseError( _( - "未在响应的地址中找到sec_user_id,检查链接是否为用户主页类名:{0}".format( - cls.__name__ - ) - ) + "未在响应的地址中找到sec_user_id,检查链接是否为用户主页类名:{0}" + ).format(cls.__name__) ) elif response.status_code == 401: raise APIUnauthorizedError( - _("未授权的请求。类名:{0}".format(cls.__name__)) + _("未授权的请求。类名:{0}").format(cls.__name__) ) elif response.status_code == 404: raise APINotFoundError( - _("未找到API端点。类名:{0}".format(cls.__name__)) + _("未找到API端点。类名:{0}").format(cls.__name__) ) elif response.status_code == 503: raise APIUnavailableError( - _("API服务不可用。类名:{0}".format(cls.__name__)) + _("API服务不可用。类名:{0}").format(cls.__name__) ) else: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - response.url, response.status_code, response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + response.url, response.status_code, response.text ) ) @@ -334,7 +324,7 @@ async def get_all_sec_user_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -367,7 +357,7 @@ async def get_aweme_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) # 重定向到完整链接 @@ -405,10 +395,8 @@ async def get_aweme_id(cls, url: str) -> str: except httpx.HTTPStatusError as e: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -433,7 +421,7 @@ async def get_all_aweme_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -477,7 +465,7 @@ async def get_webcast_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) try: # 重定向到完整链接 @@ -521,10 +509,8 @@ async def get_webcast_id(cls, url: str) -> str: except httpx.HTTPStatusError as e: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -549,7 +535,7 @@ async def get_all_webcast_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -608,7 +594,7 @@ def format_file_name( try: return naming_template.format(**fields) except KeyError as e: - raise KeyError(_("文件名模板字段 {0} 不存在,请检查".format(e))) + raise KeyError(_("文件名模板字段 {0} 不存在,请检查").format(e)) def create_user_folder(kwargs: dict, nickname: Union[str, int]) -> Path: diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index c4e9d59..5a01384 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -75,7 +75,7 @@ def handler_auto_cookie( logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) ctx.abort() except Exception as e: - logger.error(_("自动获取Cookie失败:{0}".format(str(e)))) + logger.error(_("自动获取Cookie失败:{0}").format(str(e))) ctx.abort() @@ -131,10 +131,8 @@ def handler_naming( if invalid_patterns: raise click.BadParameter( - _( - "`{0}` 中的 `{1}` 不符合命名模式".format( - value, "".join(invalid_patterns) - ) + _("`{0}` 中的 `{1}` 不符合命名模式").format( + value, "".join(invalid_patterns) ) ) @@ -300,9 +298,7 @@ def handler_naming( "--auto-cookie", type=click.Choice(f2.BROWSER_LIST), # default="none", - help=_( - "自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器" - ), + help=_("自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器"), callback=handler_auto_cookie, ) @click.option( @@ -390,11 +386,11 @@ def tiktok( # 从低频配置开始到高频配置再到cli参数,逐级覆盖,如果键值不存在使用父级的键值 kwargs = merge_config(main_conf, custom_conf, **kwargs) - logger.info(_("主配置路径: {0}".format(main_conf_path))) - logger.info(_("自定义配置路径: {0}".format(Path.cwd() / config))) - logger.debug(_("主配置参数:{0}".format(main_conf))) - logger.debug(_("自定义配置参数:{0}".format(custom_conf))) - logger.debug(_("CLI参数:{0}".format(kwargs))) + logger.info(_("主配置路径: {0}").format(main_conf_path)) + logger.info(_("自定义配置路径: {0}").format(Path.cwd() / config)) + logger.debug(_("主配置参数:{0}").format(main_conf)) + logger.debug(_("自定义配置参数:{0}").format(custom_conf)) + logger.debug(_("CLI参数:{0}").format(kwargs)) # 尝试从命令行参数或kwargs中获取URL if not kwargs.get("url"): diff --git a/f2/apps/tiktok/crawler.py b/f2/apps/tiktok/crawler.py index 87d1b5a..ae92374 100644 --- a/f2/apps/tiktok/crawler.py +++ b/f2/apps/tiktok/crawler.py @@ -45,63 +45,63 @@ async def fetch_user_profile(self, params: UserProfile): endpoint = XBogusManager.model_2_endpoint( tkendpoint.USER_DETAIL, params.dict() ) # fmt: off - logger.debug(_("用户信息接口地址:" + endpoint)) + logger.debug(_("用户信息接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( tkendpoint.USER_POST, params.dict() ) # fmt: off - logger.debug(_("主页作品接口地址:" + endpoint)) + logger.debug(_("主页作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( tkendpoint.USER_LIKE, params.dict() ) # fmt: off - logger.debug(_("喜欢作品接口地址:" + endpoint)) + logger.debug(_("喜欢作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collect(self, params: UserCollect): endpoint = XBogusManager.model_2_endpoint( tkendpoint.USER_COLLECT, params.dict() ) - logger.debug(_("收藏作品接口地址:" + endpoint)) + logger.debug(_("收藏作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_play_list(self, params: UserPlayList): endpoint = XBogusManager.model_2_endpoint( tkendpoint.USER_PLAY_LIST, params.dict() ) - logger.debug(_("合辑列表接口地址:" + endpoint)) + logger.debug(_("合辑列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( tkendpoint.USER_MIX, params.dict() ) # fmt: off - logger.debug(_("合辑作品接口地址:" + endpoint)) + logger.debug(_("合辑作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_detail(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( tkendpoint.AWEME_DETAIL, params.dict() ) - logger.debug(_("作品详情接口地址:" + endpoint)) + logger.debug(_("作品详情接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_comment(self, params: PostComment): endpoint = XBogusManager.model_2_endpoint( tkendpoint.POST_COMMENT, params.dict() ) - logger.debug(_("作品评论接口地址:" + endpoint)) + logger.debug(_("作品评论接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_recommend(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( tkendpoint.HOME_RECOMMEND, params.dict() ) - logger.debug(_("首页推荐接口地址:" + endpoint)) + logger.debug(_("首页推荐接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def __aenter__(self): diff --git a/f2/apps/tiktok/dl.py b/f2/apps/tiktok/dl.py index e95b207..04cc11b 100644 --- a/f2/apps/tiktok/dl.py +++ b/f2/apps/tiktok/dl.py @@ -17,7 +17,7 @@ def __init__(self, kwargs: dict = {}): if kwargs["cookie"] is None: raise ValueError( _( - "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 `--auto-cookie edge`" + "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取。如 `--auto-cookie edge`" ) ) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index f256bab..55e7d97 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -206,13 +206,11 @@ async def select_playlist( for i in range(len(playlists.mixId)): rich_console.print( - _( - "{0}: {1} (包含 {2} 个作品,收藏夹ID {3})".format( - i + 1, - playlists.mixName[i], - playlists.videoCount[i], - playlists.mixId[i], - ) + _("{0}: {1} (包含 {2} 个作品,收藏夹ID {3})").format( + i + 1, + playlists.mixName[i], + playlists.videoCount[i], + playlists.mixId[i], ) ) @@ -252,7 +250,7 @@ async def handler_one_video(self): async with AsyncVideoDB("tiktok_videos.db") as vdb: await self.get_or_add_video_data(aweme_data, vdb) - logger.debug(_("单个视频数据:{0}".format(aweme_data))) + logger.debug(_("单个视频数据:{0}").format(aweme_data)) # 创建下载任务 await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) @@ -349,9 +347,9 @@ async def fetch_user_post_videos( video = UserPostFilter(response) if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(cursor))) + logger.debug(_("{0} 页没有找到作品").format(cursor)) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户:{0} 所有作品采集完毕".format(secUid))) + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: cursor = video.cursor diff --git a/f2/apps/tiktok/utils.py b/f2/apps/tiktok/utils.py index f7fc080..6eb8a4e 100644 --- a/f2/apps/tiktok/utils.py +++ b/f2/apps/tiktok/utils.py @@ -72,7 +72,7 @@ def gen_real_msToken(cls) -> str: msToken = str(httpx.Cookies(response.cookies).get("msToken")) if len(msToken) not in [148]: - raise APIResponseError(_("{0} 内容不符合要求".format("msToken"))) + raise APIResponseError(_("{0} 内容不符合要求").format("msToken")) return msToken @@ -89,19 +89,16 @@ def gen_real_msToken(cls) -> str: if response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( - "msToken", "tiktok" - ) - ) + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("msToken", "tiktok") ) + elif response.status_code == 404: - raise APINotFoundError(_("{0} 无法找到API端点".format("msToken"))) + raise APINotFoundError(_("{0} 无法找到API端点").format("msToken")) else: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -156,19 +153,16 @@ def gen_ttwid(cls) -> str: if response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( - "ttwid", "tiktok" - ) - ) + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("ttwid", "tiktok") ) + elif response.status_code == 404: - raise APINotFoundError(_("{0} 无法找到API端点".format("ttwid"))) + raise APINotFoundError(_("{0} 无法找到API端点").format("ttwid")) else: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -186,7 +180,7 @@ def gen_odin_tt(cls): odin_tt = httpx.Cookies(response.cookies).get("odin_tt") if odin_tt is None: - raise APIResponseError(_("{0} 内容不符合要求".format("odin_tt"))) + raise APIResponseError(_("{0} 内容不符合要求").format("odin_tt")) return odin_tt @@ -203,19 +197,16 @@ def gen_odin_tt(cls): if response.status_code == 401: raise APIUnauthorizedError( _( - "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则".format( - "odin_tt", "tiktok" - ) - ) + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("odin_tt", "tiktok") ) + elif response.status_code == 404: - raise APINotFoundError(_("{0} 无法找到API端点".format("odin_tt"))) + raise APINotFoundError(_("{0} 无法找到API端点").format("odin_tt")) else: raise APIResponseError( - _( - "链接:{0},状态码 {1}:{2} ".format( - e.response.url, e.response.status_code, e.response.text - ) + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -278,7 +269,7 @@ async def get_secuid(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) transport = httpx.AsyncHTTPTransport(retries=5) @@ -292,19 +283,16 @@ async def get_secuid(cls, url: str) -> str: if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): raise APINotFoundError( _( - "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}".format( - cls.__name__ - ) - ) + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}" + ).format(cls.__name__) ) + match = cls._TIKTOK_SECUID_PARREN.search(str(response.text)) if not match: raise APIResponseError( _( - "未在响应中找到 {0},检查链接是否为用户主页。类名: {1}".format( - "sec_uid", cls.__name__ - ) - ) + "未在响应中找到 {0},检查链接是否为用户主页。类名: {1}" + ).format("sec_uid", cls.__name__) ) # 提取SIGI_STATE对象中的sec_uid @@ -316,7 +304,7 @@ async def get_secuid(cls, url: str) -> str: if sec_uid is None: raise RuntimeError( - _("获取 {0} 失败,{1}".format(sec_uid, user_info)) + _("获取 {0} 失败,{1}").format(sec_uid, user_info) ) return sec_uid @@ -352,7 +340,7 @@ async def get_all_secuid(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -378,7 +366,7 @@ async def get_uniqueid(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) transport = httpx.AsyncHTTPTransport(retries=5) @@ -392,22 +380,21 @@ async def get_uniqueid(cls, url: str) -> str: if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): raise APINotFoundError( _( - "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}".format( - cls.__name__ - ) - ) + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}" + ).format(cls.__name__) ) + match = cls._TIKTOK_UNIQUEID_PARREN.search(str(response.url)) if not match: raise APIResponseError( - _("未在响应中找到 {0}".format("unique_id")) + _("未在响应中找到 {0}").format("unique_id") ) unique_id = match.group(1) if unique_id is None: raise RuntimeError( - _("获取 {0} 失败,{1}".format("unique_id", response.url)) + _("获取 {0} 失败,{1}").format("unique_id", response.url) ) return unique_id @@ -444,7 +431,7 @@ async def get_all_uniqueid(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -479,7 +466,7 @@ async def get_aweme_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) transport = httpx.AsyncHTTPTransport(retries=5) @@ -493,22 +480,21 @@ async def get_aweme_id(cls, url: str) -> str: if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): raise APINotFoundError( _( - "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}".format( - cls.__name__ - ) - ) + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}" + ).format(cls.__name__) ) + match = cls._TIKTOK_AWEMEID_PARREN.search(str(response.url)) if not match: raise APIResponseError( - _("未在响应中找到 {0}".format("aweme_id")) + _("未在响应中找到 {0}").format("aweme_id") ) aweme_id = match.group(1) if aweme_id is None: raise RuntimeError( - _("获取 {0} 失败,{1}".format("aweme_id", response.url)) + _("获取 {0} 失败,{1}").format("aweme_id", response.url) ) return aweme_id @@ -546,7 +532,7 @@ async def get_all_aweme_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -605,7 +591,7 @@ def format_file_name( try: return naming_template.format(**fields) except KeyError as e: - raise KeyError(_("文件名模板字段 {0} 不存在,请检查".format(e))) + raise KeyError(_("文件名模板字段 {0} 不存在,请检查").format(e)) def create_user_folder(kwargs: dict, nickname: Union[str, int]) -> Path: From c6771d75eac5034b54727ce3ef31ed1abe8abc6c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 17 Mar 2024 16:27:28 +0800 Subject: [PATCH 081/164] =?UTF-8?q?del:=20=E5=88=A0=E9=99=A4example?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 请参考docs下的代码片段 --- examples/douyin/VerifyFp.py | 4 -- examples/douyin/handler_user_post.py | 32 ------------ examples/douyin/msToken.py | 5 -- examples/douyin/ttwid.py | 4 -- examples/f2_progress.py | 77 ---------------------------- examples/tiktok/msToken.py | 5 -- examples/tiktok/odin_tt.py | 4 -- examples/tiktok/postDetail.py | 23 --------- examples/tiktok/secUid.py | 11 ---- examples/tiktok/ttwid.py | 4 -- examples/tiktok/userProfile.py | 25 --------- 11 files changed, 194 deletions(-) delete mode 100644 examples/douyin/VerifyFp.py delete mode 100644 examples/douyin/handler_user_post.py delete mode 100644 examples/douyin/msToken.py delete mode 100644 examples/douyin/ttwid.py delete mode 100644 examples/f2_progress.py delete mode 100644 examples/tiktok/msToken.py delete mode 100644 examples/tiktok/odin_tt.py delete mode 100644 examples/tiktok/postDetail.py delete mode 100644 examples/tiktok/secUid.py delete mode 100644 examples/tiktok/ttwid.py delete mode 100644 examples/tiktok/userProfile.py diff --git a/examples/douyin/VerifyFp.py b/examples/douyin/VerifyFp.py deleted file mode 100644 index 57ece4b..0000000 --- a/examples/douyin/VerifyFp.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.douyin.utils import VerifyFpManager - -if __name__ == "__main__": - print("verify_fp:", VerifyFpManager.gen_verify_fp()) diff --git a/examples/douyin/handler_user_post.py b/examples/douyin/handler_user_post.py deleted file mode 100644 index be69b3a..0000000 --- a/examples/douyin/handler_user_post.py +++ /dev/null @@ -1,32 +0,0 @@ -import asyncio -from f2.apps.douyin.model import UserPost -from f2.apps.douyin.filter import UserPostFilter -from f2.apps.douyin.crawler import DouyinCrawler - -kwargs = { - "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", - "Referer": "https://www.douyin.com/", - }, - "proxies": {"http": None, "https": None}, - "cookie": "YOUR_COOKIE_HERE", -} - - -async def test_user_post_fetcher(): - async with DouyinCrawler(kwargs) as crawler: - params = UserPost( - max_cursor=0, - count=5, - sec_user_id="MS4wLjABAAAAu8qwDm1-muGuMhZZ-tVzyPVWlUxIbQRNJN_9k83OhWU", - ) - response = await crawler.fetch_user_post(params) - - video = UserPostFilter(response) - print( - f"作者:{video.nickname[0]}, 所有作品id:{video.aweme_id}, 每个作品的码率{video.video_bit_rate}" - ) - - -if __name__ == "__main__": - asyncio.run(test_user_post_fetcher()) diff --git a/examples/douyin/msToken.py b/examples/douyin/msToken.py deleted file mode 100644 index aa60a24..0000000 --- a/examples/douyin/msToken.py +++ /dev/null @@ -1,5 +0,0 @@ -from f2.apps.douyin.utils import TokenManager - -if __name__ == '__main__': - print('douyin real msToken:', TokenManager.gen_real_msToken()) - print('douyin fake msToken:', TokenManager.gen_false_msToken()) \ No newline at end of file diff --git a/examples/douyin/ttwid.py b/examples/douyin/ttwid.py deleted file mode 100644 index 996118f..0000000 --- a/examples/douyin/ttwid.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.douyin.utils import TokenManager - -if __name__ == '__main__': - print('douyin ttwid:', TokenManager.gen_ttwid()) \ No newline at end of file diff --git a/examples/f2_progress.py b/examples/f2_progress.py deleted file mode 100644 index 7ecb8eb..0000000 --- a/examples/f2_progress.py +++ /dev/null @@ -1,77 +0,0 @@ -import time -from f2.cli.cli_console import RichConsoleManager, CustomSpinnerColumn, ProgressManager -from f2.cli.cli_console import TextColumn, BarColumn, TimeElapsedColumn - - -if __name__ == "__main__": - - def simulate_progress(progress_manager): - # 启动进度条 - progress_manager.start() - - # 添加一个任务 - task_id = progress_manager.add_task( - "Demo Task: waiting", total=200, state="waiting" - ) - for _ in range(20): - time.sleep(0.1) - if _ == 4: # 模拟开始下载 - progress_manager.update( - task_id, description="Demo Task: starting", state="starting" - ) - elif _ == 8: # 模拟下载中 - progress_manager.update( - task_id, description="Demo Task: downloading", state="downloading" - ) - elif _ == 12: # 模拟暂停 - progress_manager.update( - task_id, description="Demo Task: paused", state="paused" - ) - time.sleep(1) # 暂停一会儿 - progress_manager.update( - task_id, description="Demo Task: downloading", state="downloading" - ) - elif _ == 16: # 模拟出错 - progress_manager.update( - task_id, description="Demo Task: error", state="error" - ) - time.sleep(0.5) - progress_manager.update( - task_id, description="Demo Task: completed", state="completed" - ) - - progress_manager.update(task_id, advance=10) - - # 停止进度条 - progress_manager.stop() - - print("Showing default progress:") - progress_manager_default = ProgressManager() - simulate_progress(progress_manager_default) - - print("\nShowing custom progress:") - my_spinners = { - "waiting": "dots12", - "downloading": "earth", - } - custom_spinner_column = CustomSpinnerColumn(spinner_styles=my_spinners, speed=0.5) - progress_manager_custom = ProgressManager(spinner_column=custom_spinner_column) - simulate_progress(progress_manager_custom) - - print("\nShowing custom 2 progress:") - custom_columns = { - "description": TextColumn("{task.description}"), - "bar": BarColumn( - complete_style="bright_magenta black", finished_style="bright_white green" - ), - "custom_percentage": TextColumn( - "[progress.custom_percentage]{task.percentage:>2.0f}%", style="bright_cyan" - ), - "elapsed": TimeElapsedColumn(), - } - progress_manager_custom2 = ProgressManager( - spinner_column=custom_spinner_column, - custom_columns=custom_columns, - expand=False, - ) - simulate_progress(progress_manager_custom2) diff --git a/examples/tiktok/msToken.py b/examples/tiktok/msToken.py deleted file mode 100644 index 0d54d50..0000000 --- a/examples/tiktok/msToken.py +++ /dev/null @@ -1,5 +0,0 @@ -from f2.apps.tiktok.utils import TokenManager - -if __name__ == "__main__": - print("tiktok real msToken:", TokenManager.gen_real_msToken()) - print("tiktok fake msToken:", TokenManager.gen_false_msToken()) diff --git a/examples/tiktok/odin_tt.py b/examples/tiktok/odin_tt.py deleted file mode 100644 index 0aa87ea..0000000 --- a/examples/tiktok/odin_tt.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.tiktok.utils import TokenManager - -if __name__ == "__main__": - print("tiktok odin_tt:", TokenManager.gen_odin_tt()) diff --git a/examples/tiktok/postDetail.py b/examples/tiktok/postDetail.py deleted file mode 100644 index 945306c..0000000 --- a/examples/tiktok/postDetail.py +++ /dev/null @@ -1,23 +0,0 @@ -import asyncio -from f2.apps.tiktok.handler import TiktokHandler - -kwargs = { - "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", - "Referer": "https://www.tiktok.com/", - }, - "proxies": {"http": None, "https": None}, - "cookie": "YOUR_COOKIE_HERE", -} - - -async def main(): - post = await TiktokHandler(kwargs).fetch_one_video(itemId="7095819783324601605") - print(post) - print("-------------------") - post = await TiktokHandler(kwargs).fetch_one_video(itemId="7305827432509082913") - print(post) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tiktok/secUid.py b/examples/tiktok/secUid.py deleted file mode 100644 index a42f531..0000000 --- a/examples/tiktok/secUid.py +++ /dev/null @@ -1,11 +0,0 @@ -import asyncio -from f2.apps.tiktok.utils import SecUserIdFetcher - - -async def main(): - secUid = await SecUserIdFetcher.get_secuid("https://www.tiktok.com/@vantoan___") - print(secUid) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tiktok/ttwid.py b/examples/tiktok/ttwid.py deleted file mode 100644 index c2bd857..0000000 --- a/examples/tiktok/ttwid.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.tiktok.utils import TokenManager - -if __name__ == "__main__": - print("tiktok ttwid:", TokenManager.gen_ttwid()) diff --git a/examples/tiktok/userProfile.py b/examples/tiktok/userProfile.py deleted file mode 100644 index f9d5ed4..0000000 --- a/examples/tiktok/userProfile.py +++ /dev/null @@ -1,25 +0,0 @@ -import asyncio -from f2.apps.tiktok.handler import TiktokHandler - -kwargs = { - "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", - "Referer": "https://www.tiktok.com/", - }, - "proxies": {"http": None, "https": None}, - "cookie": "YOUR_COOKIE_HERE", -} - - -async def main(): - user = await TiktokHandler(kwargs).handler_user_profile( - secUid="MS4wLjABAAAAQhcYf_TjRKUku-aF8oqngAfzrYksgGLRz8CKMciBFdfR54HQu3qGs-WoJ-KO7hO8" - ) - print(user) - print("-------------------") - user = await TiktokHandler(kwargs).handler_user_profile(uniqueId="sophia.ilysm") - print(user) - - -if __name__ == "__main__": - asyncio.run(main()) From 25c3a5353ec056a72abc163c9cbf2c5731154ee0 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 17 Mar 2024 16:41:40 +0800 Subject: [PATCH 082/164] =?UTF-8?q?refactor:=20=E5=B0=86=E6=8A=96=E9=9F=B3?= =?UTF-8?q?`--mode`=E7=9A=84=E4=BD=BF=E7=94=A8=E7=BB=9F=E4=B8=80=E7=9A=84c?= =?UTF-8?q?hoice=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/__init__.py | 11 +++++++++++ f2/apps/douyin/cli.py | 4 +--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/f2/__init__.py b/f2/__init__.py index dcc8236..4fefda0 100644 --- a/f2/__init__.py +++ b/f2/__init__.py @@ -21,3 +21,14 @@ "vivaldi", "librewolf", ] + +DOUYIN_MODE_LIST = [ + "one", + "post", + "like", + "collection", + "collects", + "music", + "mix", + "live", +] diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 20609f3..3a9f916 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -231,9 +231,7 @@ def handler_sso_login( @click.option( "--mode", "-M", - type=click.Choice( - ["one", "post", "like", "collection", "collects", "music", "mix", "live"] - ), + type=click.Choice(f2.DOUYIN_MODE_LIST), # default="post", # required=True, help=_( From bfe94a0b501787c185cc07af6944610d64d31c0c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 17 Mar 2024 16:49:41 +0800 Subject: [PATCH 083/164] =?UTF-8?q?refactor:=20=E5=B0=86tiktok`--mode`?= =?UTF-8?q?=E7=9A=84=E4=BD=BF=E7=94=A8=E7=BB=9F=E4=B8=80=E7=9A=84choice?= =?UTF-8?q?=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/__init__.py | 2 ++ f2/apps/tiktok/cli.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/f2/__init__.py b/f2/__init__.py index 4fefda0..2e8867f 100644 --- a/f2/__init__.py +++ b/f2/__init__.py @@ -32,3 +32,5 @@ "mix", "live", ] + +TIKTOK_MODE_LIST = ["one", "post", "like", "collect", "mix"] diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index 5a01384..b55a55c 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -193,7 +193,7 @@ def handler_naming( @click.option( "--mode", "-M", - type=click.Choice(["one", "post", "like", "collect", "mix"]), + type=click.Choice(f2.TIKTOK_MODE_LIST), # default="post", # required=True, help=_( From 2cf3a21a3a0c518aeaecd515a9948a345e7d5fba Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Fri, 22 Mar 2024 17:24:39 +0800 Subject: [PATCH 084/164] =?UTF-8?q?style:=20=E6=B3=A8=E9=87=8A=E4=B8=8E?= =?UTF-8?q?=E9=83=A8=E5=88=86=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/cli.py | 6 +++--- f2/apps/tiktok/cli.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 3a9f916..688b71a 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -401,7 +401,7 @@ def douyin( # 如果没有初始化配置文件,但是更新配置文件,则需要提供配置文件路径 elif update_config and not config: raise click.UsageError( - _("要更新配置, 首先需要使用'-c'选项提供一个自定义配置文件路径") + _("要更新配置,首先需要使用'-c'选项提供一个自定义配置文件路径") ) # 读取自定义配置文件 @@ -427,8 +427,8 @@ def douyin( # 从低频配置开始到高频配置再到cli参数,逐级覆盖,如果键值不存在使用父级的键值 kwargs = merge_config(main_conf, custom_conf, **kwargs) - logger.info(_("主配置路径: {0}").format(main_conf_path)) - logger.info(_("自定义配置路径: {0}").format(Path.cwd() / config)) + logger.info(_("主配置路径:{0}").format(main_conf_path)) + logger.info(_("自定义配置路径:{0}").format(Path.cwd() / config)) logger.debug(_("主配置参数:{0}").format(main_conf)) logger.debug(_("自定义配置参数:{0}").format(custom_conf)) logger.debug(_("CLI参数:{0}").format(kwargs)) diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index b55a55c..35b1ee3 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -386,8 +386,8 @@ def tiktok( # 从低频配置开始到高频配置再到cli参数,逐级覆盖,如果键值不存在使用父级的键值 kwargs = merge_config(main_conf, custom_conf, **kwargs) - logger.info(_("主配置路径: {0}").format(main_conf_path)) - logger.info(_("自定义配置路径: {0}").format(Path.cwd() / config)) + logger.info(_("主配置路径:{0}").format(main_conf_path)) + logger.info(_("自定义配置路径:{0}").format(Path.cwd() / config)) logger.debug(_("主配置参数:{0}").format(main_conf)) logger.debug(_("自定义配置参数:{0}").format(custom_conf)) logger.debug(_("CLI参数:{0}").format(kwargs)) From 65137624f5c37a281be0e6ead8f9d0ced6cb03ca Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 00:22:59 +0800 Subject: [PATCH 085/164] =?UTF-8?q?style:=20=E6=9B=B4=E6=96=B0F2=E5=B8=AE?= =?UTF-8?q?=E5=8A=A9=E8=AF=B4=E6=98=8E=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/helps.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/f2/helps.py b/f2/helps.py index 16520d1..4563ca0 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -43,37 +43,24 @@ def main() -> None: console.print(f"[i]{f2.__description_en__}", justify="center") console.print(f"[i]GitHub {f2.__repourl__}\n", justify="center") - table = Table.grid(padding=1, pad_edge=True, expand=True) - table.add_column("Website", no_wrap=True, justify="left", style="bold") - table.add_column("Description", no_wrap=True, justify="left", style="bold") - - # 分割 - # console.rule("[b]已适配[/b]", align="center") - # table.add_row( - # _("抖音"), _(" 单个作品,主页作品,点赞作品,收藏作品,合辑作品,图文,原声。后续更新:推荐作品,朋友作品,好友作品,搜索作品") - # ) - # table.add_row( - # _("TikTok"), _(" 单个作品,主页作品,点赞作品,收藏作品,播放列表(合辑)作品,原声。后续更新:推荐作品,朋友作品,好友作品,搜索作品") - # ) - # # 待适配 - # console.print(table) - # 分割 - # console.rule() - # 使用方法 table = Table.grid(padding=1, pad_edge=True) table.add_column("Usage", no_wrap=True, justify="left", style="bold") table.add_row("[b]f2[/b] [magenta] [/magenta][cyan][COMMAND]") - table.add_row(_("例: f2 dy -h 来获取douyin的命令帮助")) + table.add_row(_("例:f2 dy -h/--help 获取douyin的命令帮助")) + table.add_row( + "[b]f2[/b] [magenta][Option] [/magenta][cyan][Args][/cyan] [magenta] [/magenta][cyan][COMMAND]" + ) + table.add_row(_("例:f2 -d DEBUG dy 日志级别为调试运行")) console.print( Panel(table, border_style="bold", title="使用方法 | Usage", title_align="left") ) - table = Table.grid(padding=1, pad_edge=True, expand=True) + # 应用列表 table = Table(show_header=True, header_style="bold magenta") - table.add_column("Parameter", no_wrap=True, justify="left", style="bold") - table.add_column("Description", no_wrap=True, style="bold") - table.add_column("Status", no_wrap=True, justify="left", style="bold") + table.add_column(_("参数"), no_wrap=True, justify="left", style="bold") + table.add_column(_("描述"), no_wrap=True, style="bold") + table.add_column(_("状态"), no_wrap=True, justify="left", style="bold") table.add_row(_("weibo 或 wb"), _("- 获取微博")) table.add_row( @@ -99,9 +86,9 @@ def main() -> None: table.add_row(_("little_red_book 或 lrb"), _("- 获取小红书的作品")) table.add_row("\n") table.add_row( - "f2 -d [magenta] [/magenta][cyan][COMMAND]", + "f2 -d DEBUG", _( - "- 记录app的debug到/logs下,如遇BUG提交Issue时请附带该文件并[red]删除个人敏感信息[/red]" + "- 记录app的调试日志到/logs下,如遇BUG提交Issue时请附带该文件并[red]删除个人敏感信息[/red]" ), _("⚠"), ) @@ -115,7 +102,7 @@ def main() -> None: Panel( table, border_style="bold", - title="", + title="应用 | apps", title_align="left", subtitle=_("欢迎提交PR适配更多网站"), ) From 17fff92ed086c6f697eee7b6fd88c989abe8134f Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:08:47 +0800 Subject: [PATCH 086/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/model.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index 9a7c78d..e8df1a5 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -237,3 +237,18 @@ class LoginCheckQr(BaseLoginModel): verifyFp: str = "" fp: str = "" # msToken: str = TokenManager.gen_real_msToken() + + +class UserFollowing(BaseRequestModel): + user_id: str = "" + sec_user_id: str = "" + offset: int = 0 # 相当于cursor + min_time: int = 0 + max_time: int = 0 + count: int = 20 + # source_type = 1: 最近关注 需要指定max_time(s) 3: 最早关注 需要指定min_time(s) 4: 综合排序 + source_type: int = 4 + gps_access: int = 0 + address_book_access: int = 0 + is_top: int = 1 + From 860b88f12051e145760540acdfc1bbe6c1064a0f Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:09:43 +0800 Subject: [PATCH 087/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/model.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index e8df1a5..965c5e8 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -252,3 +252,16 @@ class UserFollowing(BaseRequestModel): address_book_access: int = 0 is_top: int = 1 + +class UserFollower(BaseRequestModel): + user_id: str + sec_user_id: str + offset: int = 0 # 相当于cursor + min_time: int = 0 + max_time: int = 0 + count: int = 20 + # source_type = 1: 最近关注 需要指定max_time(s) 2: 综合关注(意义不明) + source_type: int = 4 + gps_access: int = 0 + address_book_access: int = 0 + is_top: int = 1 From df79e460797ff8db668333a66eafabae1a4058a1 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:11:38 +0800 Subject: [PATCH 088/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E6=95=B0=E6=8D=AE=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了_to_raw()方法,保留接口原始内容 --- f2/apps/douyin/filter.py | 204 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index bd856f1..2463a9e 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -593,6 +593,210 @@ def __init__(self, data): super().__init__(data) +class UserFollowingFilter(JSONModel): + + @property + def status_code(self): # 1 正常,2096 用户隐私设置不允许查看 + return self._get_attr_value("$.status_code") + + @property + def status_msg(self): + return self._get_attr_value("$.status_msg") + + @property + def has_more(self): + return self._get_attr_value("$.has_more") + + @property + def total_following(self): + return self._get_attr_value("$.total") + + @property + def mix_count(self): + return self._get_attr_value("$.mix_count") + + @property + def offset(self): + return self._get_attr_value("$.offset") + + @property + def myself_user_id(self): + return self._get_attr_value("$.myself_user_id") + + @property + def max_time(self): + return self._get_attr_value("$.max_time") + + @property + def min_time(self): + return self._get_attr_value("$.min_time") + + # following_list + @property + def avatar_larger(self): + return self._get_list_attr_value("$.followings[*].avatar_larger.url_list[0]") + + @property + def can_comment(self): + return self._get_list_attr_value("$.followings[*].aweme_control.can_comment") + + @property + def can_forward(self): + return self._get_list_attr_value("$.followings[*].aweme_control.can_forward") + + @property + def can_share(self): + return self._get_list_attr_value("$.followings[*].aweme_control.can_share") + + @property + def can_show_comment(self): + return self._get_list_attr_value( + "$.followings[*].aweme_control.can_show_comment" + ) + + @property + def aweme_count(self): + return self._get_list_attr_value("$.followings[*].aweme_count") + + @property + def back_cover(self): + return self._get_list_attr_value("$.followings[*].cover_url[0].url_list[0]") + + @property + def register_time(self): + return self._get_list_attr_value("$.followings[*].create_time") + + @property + def unwatched_aweme_count(self): + return self._get_list_attr_value( + "$.followings[*].followings_secondary_information_struct.secondary_information_text" + ) + + @property + def is_block(self): + return self._get_list_attr_value("$.followings[*].is_block") + + @property + def is_blocked(self): + return self._get_list_attr_value("$.followings[*].is_blocked") + + @property + def is_gov_media_vip(self): + return self._get_list_attr_value("$.followings[*].is_gov_media_vip") + + @property + def is_mix_user(self): + return self._get_list_attr_value("$.followings[*].is_mix_user") + + @property + def is_phone_binded(self): + return self._get_list_attr_value("$.followings[*].is_phone_binded") + + @property + def is_star(self): + return self._get_list_attr_value("$.followings[*].is_star") + + @property + def is_top(self): + # 超粉? + return self._get_list_attr_value("$.followings[*].is_top") + + @property + def is_verified(self): + # 实名? + return self._get_list_attr_value("$.followings[*].is_verified") + + @property + def language(self): + return self._get_list_attr_value("$.followings[*].language") + + @property + def nickname(self): + return replaceT(self._get_list_attr_value("$.followings[*].nickname")) + + @property + def relation_label(self): + return self._get_list_attr_value("$.followings[*].relation_label") + + @property + def room_id(self): + return self._get_list_attr_value("$.followings[*].room_id") + + @property + def sec_uid(self): + return self._get_list_attr_value("$.followings[*].sec_uid") + + @property + def secret(self): + # 私密? + return self._get_list_attr_value("$.followings[*].secret") + + @property + def short_id(self): + return self._get_list_attr_value("$.followings[*].short_id") + + @property + def signature(self): + return replaceT(self._get_list_attr_value("$.followings[*].signature")) + + @property + def uid(self): + return self._get_list_attr_value("$.followings[*].uid") + + @property + def unique_id(self): + return self._get_list_attr_value("$.followings[*].unique_id") + + def _to_raw(self) -> dict: + return self._data + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + def _to_list(self): + exclude_list = [ + "has_more", + "total_following", + "mix_count", + "offset", + "myself_user_id", + "max_time", + "min_time", + ] + + keys = [ + prop_name + for prop_name in dir(self) + if not prop_name.startswith("__") + and not prop_name.startswith("_") + and prop_name not in exclude_list + ] + + following_entries = self._get_attr_value("$.followings") or [] + + list_dicts = [] + for entry in following_entries: + d = { + "has_more": self.has_more, + "total_following": self.total_following, + "mix_count": self.mix_count, + "offset": self.offset, + "myself_user_id": self.myself_user_id, + "max_time": self.max_time, + "min_time": self.min_time, + } + for key in keys: + attr_values = getattr(self, key) + index = following_entries.index(entry) + d[key] = attr_values[index] if index < len(attr_values) else None + list_dicts.append(d) + return list_dicts + + class PostDetailFilter(JSONModel): @property From efe15671bec316a960c0bdcc198ab34c523f5169 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:12:15 +0800 Subject: [PATCH 089/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E7=88=AC?= =?UTF-8?q?=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index a9e47f0..80372fc 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -22,6 +22,7 @@ FollowUserLive, LoginGetQr, LoginCheckQr, + UserFollowing, ) from f2.apps.douyin.utils import XBogusManager @@ -200,6 +201,13 @@ async def fetch_check_login(self, parms: LoginCheckQr): logger.debug(_("SSO检查登录状态接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) + async def fetch_user_following(self, params: UserFollowing): + endpoint = XBogusManager.model_2_endpoint( + dyendpoint.USER_FOLLOWING, params.dict() + ) + logger.debug(_("用户关注列表接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + async def __aenter__(self): return self From 9cd1a01bbfc8679fd95bd56e6a2fa77ca7bfdfe9 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:12:58 +0800 Subject: [PATCH 090/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E5=9C=B0?= =?UTF-8?q?=E5=9D=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/f2/apps/douyin/api.py b/f2/apps/douyin/api.py index 7a6d596..617c5f9 100644 --- a/f2/apps/douyin/api.py +++ b/f2/apps/douyin/api.py @@ -54,6 +54,9 @@ class DouyinAPIEndpoints: # 用户喜欢B (User Like B) USER_FAVORITE_B = f"{IESDOUYIN_DOMAIN}/web/api/v2/aweme/like/" + # 关注用户(User Following) + USER_FOLLOWING = f"{DOUYIN_DOMAIN}/aweme/v1/web/user/following/list/" + # 合集作品 MIX_AWEME = f"{DOUYIN_DOMAIN}/aweme/v1/web/mix/aweme/" From 1e341f22581cb4a0e2e758034024cd1df9def77e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:13:10 +0800 Subject: [PATCH 091/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E5=9C=B0?= =?UTF-8?q?=E5=9D=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/f2/apps/douyin/api.py b/f2/apps/douyin/api.py index 617c5f9..b8a30f1 100644 --- a/f2/apps/douyin/api.py +++ b/f2/apps/douyin/api.py @@ -57,6 +57,9 @@ class DouyinAPIEndpoints: # 关注用户(User Following) USER_FOLLOWING = f"{DOUYIN_DOMAIN}/aweme/v1/web/user/following/list/" + # 粉丝用户 (User Follower) + USER_FOLLOWER = f"{DOUYIN_DOMAIN}/aweme/v1/web/user/follower/list/" + # 合集作品 MIX_AWEME = f"{DOUYIN_DOMAIN}/aweme/v1/web/mix/aweme/" From 595fc8e197c1db7244ceb4bef098757c95e27ffa Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:13:23 +0800 Subject: [PATCH 092/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E7=88=AC?= =?UTF-8?q?=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 80372fc..56c98e7 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -23,6 +23,7 @@ LoginGetQr, LoginCheckQr, UserFollowing, + UserFollower, ) from f2.apps.douyin.utils import XBogusManager @@ -208,6 +209,13 @@ async def fetch_user_following(self, params: UserFollowing): logger.debug(_("用户关注列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) + async def fetch_user_follower(self, params: UserFollower): + endpoint = XBogusManager.model_2_endpoint( + dyendpoint.USER_FOLLOWER, params.dict() + ) + logger.debug(_("用户粉丝列表接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + async def __aenter__(self): return self From dba82ad64d21218d34ddb3a967349f1b3da0d6e8 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:14:35 +0800 Subject: [PATCH 093/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E7=9A=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 92 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 5e8cd79..ed9ba06 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -25,6 +25,7 @@ UserLive2, LoginGetQr, LoginCheckQr, + UserFollowing, ) from f2.apps.douyin.filter import ( UserPostFilter, @@ -38,6 +39,7 @@ UserLive2Filter, GetQrcodeFilter, CheckQrcodeFilter, + UserFollowingFilter, ) from f2.apps.douyin.utils import ( SecUserIdFetcher, @@ -1114,6 +1116,96 @@ async def fetch_user_feed_videos( logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + async def fetch_user_following( + self, + user_id: str = "", + sec_user_id: str = "", + offset: int = 0, + count: int = 20, + source_type: int = 4, + min_time: int = 0, + max_time: int = 0, + max_counts: float = float("inf"), + ) -> AsyncGenerator[UserFollowingFilter, Any]: + """ + 用于获取指定用户关注的用户的视频列表。 + + Args: + user_id: str: 用户ID + sec_user_id: str: 用户ID + offset: int: 起始页 + count: int: 每页关注用户数 + source_type: int: 排序类型 + min_time: int: 最小时间戳 + max_time: int: 最大时间戳 + Return: + following_data: dict: 关注用户数据字典,包含用户ID列表、用户昵称、用户头像、起始页 + """ + + if not user_id and not sec_user_id: + raise ValueError(_("至少提供 user_id 或 sec_user_id 中的一个参数")) + + max_counts = max_counts or float("inf") + users_collected = 0 + + logger.info(_("开始爬取用户: {0} 的关注用户").format(sec_user_id)) + + while users_collected < max_counts: + current_request_size = min(count, max_counts - users_collected) + + logger.debug("===================================") + logger.debug( + _("最大数量: {0} 每次请求数量: {1}").format(count, current_request_size) + ) + logger.info(_("开始爬取第 {0} 个关注用户").format(offset)) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserFollowing( + offset=offset, + count=current_request_size, + user_id=user_id, + sec_user_id=sec_user_id, + source_type=source_type, + min_time=min_time, + max_time=max_time, + ) + response = await crawler.fetch_user_following(params) + following = UserFollowingFilter(response) + + if following.status_code != 0: + logger.error( + _("错误代码:{0} 错误消息:{1}").format( + following.status_code, following.status_msg + ) + ) + break + + logger.info(_("当前请求的offset: {0}").format(offset)) + logger.debug( + _("用户ID: {0} 用户昵称: {1} 用户作品数: {2}").format( + following.sec_uid, + following.nickname, + following.aweme_count, + ) + ) + logger.debug("===================================") + + yield following + + if not following.has_more: + logger.info(_("用户: {0} 所有关注用户采集完毕").format(sec_user_id)) + break + + # 更新已经处理的用户数量 (Update the number of users processed) + users_collected += len(following.sec_uid) + offset = following.offset + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个用户").format(users_collected)) + async def handle_sso_login(): """ From d85a2310b84dd360bb6acf33768770a78b74e5a8 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 16:20:43 +0800 Subject: [PATCH 094/164] =?UTF-8?q?feat:=20=E8=BF=87=E6=BB=A4=E5=99=A8?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=BF=94=E5=9B=9E=E5=8E=9F=E5=A7=8B=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 2463a9e..30db29c 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -111,6 +111,9 @@ def unique_id(self): def user_age(self): return self._get_attr_value("$.user.user_age") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -282,6 +285,9 @@ def max_cursor(self): def min_cursor(self): return self._get_attr_value("$.min_cursor") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -437,6 +443,9 @@ def nickname(self): def uid(self): return self._get_list_attr_value("$.collects_list[*].user_info.uid") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -543,6 +552,9 @@ def owner_id(self): def sec_uid(self): return self._get_list_attr_value("$.mc_list[*].sec_uid") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1121,6 +1133,9 @@ def video_play_addr(self): def images(self): return self._get_list_attr_value("$.aweme_detail.images[*].url_list[0]") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1261,6 +1276,9 @@ def DiggAuth(self): def ShareAuth(self): return self._get_attr_value("$.data.data[0].room_auth.Share") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1389,6 +1407,9 @@ def follower_count(self): def sec_uid(self): return self._get_attr_value("$.data.room.owner.sec_uid") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1454,6 +1475,9 @@ def error_code(self): def message(self): return self._get_attr_value("$.message") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1491,6 +1515,9 @@ def message(self): def verify_ticket(self): return self._get_attr_value("$.verify_ticket") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) From 97c0f6917b008a88c072743eb622e8ed816dbff7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 19:26:48 +0800 Subject: [PATCH 095/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E6=95=B0=E6=8D=AE=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E5=99=A8=E5=AD=97=E6=AE=B5=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、添加次要消息类型 2、添加次要消息(包括多少个作品未看,以及是否正在直播) --- f2/apps/douyin/filter.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 30db29c..063db2f 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -679,9 +679,20 @@ def register_time(self): return self._get_list_attr_value("$.followings[*].create_time") @property - def unwatched_aweme_count(self): + def secondary_priority(self): + # secondary_priority 6 代表未看过的作品数量 1 代表正在直播 7 代表简介内容 return self._get_list_attr_value( - "$.followings[*].followings_secondary_information_struct.secondary_information_text" + "$.followings[*].following_list_secondary_information_struct.secondary_information_priority" + ) + + @property + def secondary_text(self): + return replaceT( + self._get_list_attr_value( + "$.followings[*].following_list_secondary_information_struct.secondary_information_text" + ) + ) + ) @property From 800d913a2be5c50b44e7a1de88e6eed5efb1122b Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 19:29:44 +0800 Subject: [PATCH 096/164] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Ddouyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E6=95=B0=E6=8D=AE=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E5=99=A8=5Fto=5Flist=E6=96=B9=E6=B3=95=E7=9A=84?= =?UTF-8?q?=E6=8E=92=E9=99=A4=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 因为长度检查只能对序列类型(如列表、元组、字符串等)执行 --- f2/apps/douyin/filter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 063db2f..d06a8bf 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -782,6 +782,8 @@ def _to_dict(self) -> dict: def _to_list(self): exclude_list = [ + "status_code", + "status_msg", "has_more", "total_following", "mix_count", From 487abeef0b59ecaef60566abff430926cca96ec1 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 23:02:37 +0800 Subject: [PATCH 097/164] =?UTF-8?q?refactor:=20=E6=9B=B4=E6=96=B0douyin?= =?UTF-8?q?=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7=E6=8E=A5=E5=8F=A3=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、修改字段参数和注释 --- f2/apps/douyin/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index 965c5e8..6d66b76 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -256,12 +256,12 @@ class UserFollowing(BaseRequestModel): class UserFollower(BaseRequestModel): user_id: str sec_user_id: str - offset: int = 0 # 相当于cursor + offset: int = 0 # 相当于cursor 但只对source_type: = 2 有效,其他情况为 0 即可 min_time: int = 0 max_time: int = 0 count: int = 20 # source_type = 1: 最近关注 需要指定max_time(s) 2: 综合关注(意义不明) - source_type: int = 4 + source_type: int = 1 gps_access: int = 0 address_book_access: int = 0 is_top: int = 1 From e38143f519be090ed3560b6fdbaa4591cec8c812 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 23:04:03 +0800 Subject: [PATCH 098/164] =?UTF-8?q?style:=20=E6=97=A5=E5=BF=97=E8=BE=93?= =?UTF-8?q?=E5=87=BA=E6=94=B9=E7=94=A8=E4=B8=AD=E6=96=87=E5=86=92=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index ed9ba06..1f91e91 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -1148,14 +1148,14 @@ async def fetch_user_following( max_counts = max_counts or float("inf") users_collected = 0 - logger.info(_("开始爬取用户: {0} 的关注用户").format(sec_user_id)) + logger.info(_("开始爬取用户:{0} 的关注用户").format(sec_user_id)) while users_collected < max_counts: current_request_size = min(count, max_counts - users_collected) logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format(count, current_request_size) + _("最大数量:{0} 每次请求数量:{1}").format(count, current_request_size) ) logger.info(_("开始爬取第 {0} 个关注用户").format(offset)) @@ -1193,7 +1193,7 @@ async def fetch_user_following( yield following if not following.has_more: - logger.info(_("用户: {0} 所有关注用户采集完毕").format(sec_user_id)) + logger.info(_("用户:{0} 所有关注用户采集完毕").format(sec_user_id)) break # 更新已经处理的用户数量 (Update the number of users processed) From 90dcf2a948dc4aca5d7f6613d01dbc9408bf9ac1 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 23:12:33 +0800 Subject: [PATCH 099/164] =?UTF-8?q?perf:=20=E7=BB=9F=E4=B8=80=E4=BA=86douy?= =?UTF-8?q?in=E5=85=B3=E6=B3=A8=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E7=9A=84total=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、防止粉丝类继承关注类造成该字段无法读取 --- f2/apps/douyin/filter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index d06a8bf..1d9ace9 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -620,7 +620,7 @@ def has_more(self): return self._get_attr_value("$.has_more") @property - def total_following(self): + def total(self): return self._get_attr_value("$.total") @property @@ -785,7 +785,7 @@ def _to_list(self): "status_code", "status_msg", "has_more", - "total_following", + "total", "mix_count", "offset", "myself_user_id", @@ -807,7 +807,7 @@ def _to_list(self): for entry in following_entries: d = { "has_more": self.has_more, - "total_following": self.total_following, + "total": self.total, "mix_count": self.mix_count, "offset": self.offset, "myself_user_id": self.myself_user_id, From b61dfc73c34b0c214ce17b68e793df03e5e20f9a Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 23 Mar 2024 23:13:36 +0800 Subject: [PATCH 100/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7=E6=95=B0=E6=8D=AE=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 162 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 1d9ace9..00ad901 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -822,6 +822,168 @@ def _to_list(self): return list_dicts +class UserFollowerFilter(UserFollowingFilter): + def __init__(self, data): + super().__init__(data) + + @property + def total(self): + return self._get_attr_value("$.total") + + # followers + @property + def avatar_larger(self): + return self._get_list_attr_value("$.followers[*].avatar_larger.url_list[0]") + + @property + def can_comment(self): + return self._get_list_attr_value("$.followers[*].aweme_control.can_comment") + + @property + def can_forward(self): + return self._get_list_attr_value("$.followers[*].aweme_control.can_forward") + + @property + def can_share(self): + return self._get_list_attr_value( + "$.followersfollowers[*].aweme_control.can_share" + ) + + @property + def can_show_comment(self): + return self._get_list_attr_value( + "$.followers[*].aweme_control.can_show_comment" + ) + + @property + def aweme_count(self): + return self._get_list_attr_value("$.followers[*].aweme_count") + + @property + def back_cover(self): + return self._get_list_attr_value("$.followers[*].cover_url[0].url_list[0]") + + @property + def register_time(self): + return self._get_list_attr_value("$.followers[*].create_time") + + @property + def is_block(self): + return self._get_list_attr_value("$.followers[*].is_block") + + @property + def is_blocked(self): + return self._get_list_attr_value("$.followers[*].is_blocked") + + @property + def is_gov_media_vip(self): + return self._get_list_attr_value("$.followers[*].is_gov_media_vip") + + @property + def is_mix_user(self): + return self._get_list_attr_value("$.followers[*].is_mix_user") + + @property + def is_phone_binded(self): + return self._get_list_attr_value("$.followers[*].is_phone_binded") + + @property + def is_star(self): + return self._get_list_attr_value("$.followers[*].is_star") + + @property + def is_top(self): + # 超粉? + return self._get_list_attr_value("$.followers[*].is_top") + + @property + def is_verified(self): + # 实名? + return self._get_list_attr_value("$.followers[*].is_verified") + + @property + def language(self): + return self._get_list_attr_value("$.followers[*].language") + + @property + def nickname(self): + return replaceT(self._get_list_attr_value("$.followers[*].nickname")) + + @property + def relation_label(self): + return self._get_list_attr_value("$.followers[*].relation_label") + + @property + def room_id(self): + return self._get_list_attr_value("$.followers[*].room_id") + + @property + def sec_uid(self): + return self._get_list_attr_value("$.followers[*].sec_uid") + + @property + def secret(self): + # 私密? + return self._get_list_attr_value("$.followers[*].secret") + + @property + def short_id(self): + return self._get_list_attr_value("$.followers[*].short_id") + + @property + def signature(self): + return replaceT(self._get_list_attr_value("$.followers[*].signature")) + + @property + def uid(self): + return self._get_list_attr_value("$.followers[*].uid") + + @property + def unique_id(self): + return self._get_list_attr_value("$.followers[*].unique_id") + + def _to_list(self): + exclude_list = [ + "status_code", + "status_msg", + "has_more", + "total", + "mix_count", + "offset", + "myself_user_id", + "max_time", + "min_time", + ] + + keys = [ + prop_name + for prop_name in dir(self) + if not prop_name.startswith("__") + and not prop_name.startswith("_") + and prop_name not in exclude_list + ] + + following_entries = self._get_attr_value("$.followers") or [] + + list_dicts = [] + for entry in following_entries: + d = { + "has_more": self.has_more, + "total": self.total, + "mix_count": self.mix_count, + "offset": self.offset, + "myself_user_id": self.myself_user_id, + "max_time": self.max_time, + "min_time": self.min_time, + } + for key in keys: + attr_values = getattr(self, key) + index = following_entries.index(entry) + d[key] = attr_values[index] if index < len(attr_values) else None + list_dicts.append(d) + return list_dicts + + class PostDetailFilter(JSONModel): @property From 5e32d5fd5c5741a1a63de1e3425cc18a339e06a9 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 01:25:36 +0800 Subject: [PATCH 101/164] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E7=B2=89=E4=B8=9D=E7=94=A8=E6=88=B7=E7=9A=84=E5=A4=84=E7=90=86?= =?UTF-8?q?(#59)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 93 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 1f91e91..3ae4b7a 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -1206,6 +1206,99 @@ async def fetch_user_following( logger.info(_("爬取结束,共爬取 {0} 个用户").format(users_collected)) + async def fetch_user_follower( + self, + user_id: str = "", + sec_user_id: str = "", + offset: int = 0, + count: int = 20, + source_type: int = 1, + min_time: int = 0, + max_time: int = 0, + max_counts: float = float("inf"), + ) -> AsyncGenerator[UserFollowerFilter, Any]: + """ + 用于获取指定用户的粉丝列表。 + + Args: + user_id: str: 用户ID + sec_user_id: str: 用户ID + offset: int: 起始页 + count: int: 每页粉丝数 + source_type: int: 排序类型 + min_time: int: 最小时间戳 + max_time: int: 最大时间戳 + Return: + follower_data: dict: 粉丝数据字典,包含用户ID列表、用户昵称、用户头像、起始页 + """ + + if not user_id and not sec_user_id: + raise ValueError(_("至少提供 user_id 或 sec_user_id 中的一个参数")) + + max_counts = max_counts or float("inf") + users_collected = 0 + + logger.info(_("开始爬取用户:{0} 的粉丝").format(sec_user_id)) + + while users_collected < max_counts: + current_request_size = min(count, max_counts - users_collected) + + logger.debug("===================================") + logger.debug( + _("最大数量:{0} 每次请求数量:{1}").format(count, current_request_size) + ) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserFollower( + offset=offset, + count=current_request_size, + user_id=user_id, + sec_user_id=sec_user_id, + source_type=source_type, + min_time=min_time, + max_time=max_time, + ) + response = await crawler.fetch_user_follower(params) + follower = UserFollowerFilter(response) + + if follower.status_code != 0: + logger.error( + _("错误代码:{0} 错误消息:{1}").format( + follower.status_code, follower.status_msg + ) + ) + break + + logger.info( + _("当前请求的offset:{0} max_time:{1}").format(offset, max_time) + ) + logger.info(_("爬取了 {0} 个粉丝用户").format(users_collected + 1)) + logger.debug( + _("用户ID:{0} 用户昵称:{1} 用户作品数:{2}").format( + follower.sec_uid, follower.nickname, follower.aweme_count + ) + ) + logger.debug("===================================") + + yield follower + + if not follower.has_more: + logger.info(_("用户:{0} 所有粉丝采集完毕").format(sec_user_id)) + break + + # 更新已经处理的用户数量 (Update the number of users processed) + users_collected += len(follower.sec_uid) + offset = follower.offset + + # 更新最大(最早)时间戳,避免重复返回相同的用户 + max_time = follower.min_time + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个用户").format(users_collected)) + async def handle_sso_login(): """ From ecaa9bd60e85cb70ae98a3db2901714fa263caab Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 01:32:33 +0800 Subject: [PATCH 102/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9douyin?= =?UTF-8?q?=E5=85=B3=E6=B3=A8=E7=94=A8=E6=88=B7=E5=A4=84=E7=90=86=E7=9A=84?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 3ae4b7a..cc99a90 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -1157,7 +1157,6 @@ async def fetch_user_following( logger.debug( _("最大数量:{0} 每次请求数量:{1}").format(count, current_request_size) ) - logger.info(_("开始爬取第 {0} 个关注用户").format(offset)) async with DouyinCrawler(self.kwargs) as crawler: params = UserFollowing( @@ -1180,12 +1179,14 @@ async def fetch_user_following( ) break - logger.info(_("当前请求的offset: {0}").format(offset)) + logger.info(_("当前请求的offset:{0}").format(offset)) + logger.info(_("爬取了 {0} 个关注用户").format(offset + 1)) logger.debug( - _("用户ID: {0} 用户昵称: {1} 用户作品数: {2}").format( + _("用户ID:{0} 用户昵称:{1} 用户作品数:{2} 额外内容:{3}").format( following.sec_uid, following.nickname, following.aweme_count, + following.secondary_text, ) ) logger.debug("===================================") From 0179dbe5f64337c48155ab2c13bca73c623d6329 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 01:33:00 +0800 Subject: [PATCH 103/164] =?UTF-8?q?style:=20=E6=B7=BB=E5=8A=A0=E5=AF=BC?= =?UTF-8?q?=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index cc99a90..d73a5e5 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -26,6 +26,7 @@ LoginGetQr, LoginCheckQr, UserFollowing, + UserFollower, ) from f2.apps.douyin.filter import ( UserPostFilter, @@ -40,6 +41,7 @@ GetQrcodeFilter, CheckQrcodeFilter, UserFollowingFilter, + UserFollowerFilter, ) from f2.apps.douyin.utils import ( SecUserIdFetcher, From df522b2fd2b4c9cd427ede66e7b7c80f87c69e59 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 16:10:54 +0800 Subject: [PATCH 104/164] =?UTF-8?q?feat:=20=E7=BB=99douyin=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E8=BF=87=E6=BB=A4=E5=99=A8=E6=B7=BB=E5=8A=A0=E5=8E=9F?= =?UTF-8?q?=E5=A7=8B=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 保留未转义部分字符 --- f2/apps/douyin/filter.py | 118 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 00ad901..4ecae7f 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -75,6 +75,10 @@ def mplatform_followers_count(self): def nickname(self): return replaceT(self._get_attr_value("$.user.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.user.nickname") + @property def room_id(self): return self._get_attr_value("$.user.room_id") @@ -95,6 +99,10 @@ def short_id(self): def signature(self): return replaceT(self._get_attr_value("$.user.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.user.signature") + @property def total_favorited(self): return self._get_attr_value("$.user.total_favorited") @@ -155,6 +163,10 @@ def create_time(self): def desc(self): return replaceT(self._get_list_attr_value("$.aweme_list[*].desc")) + @property + def desc_raw(self): + return self._get_list_attr_value("$.aweme_list[*].desc") + @property def uid(self): return self._get_list_attr_value("$.aweme_list[*].author.uid") @@ -167,6 +179,10 @@ def sec_user_id(self): def nickname(self): return replaceT(self._get_list_attr_value("$.aweme_list[*].author.nickname")) + @property + def nickname_raw(self): + return self._get_list_attr_value("$.aweme_list[*].author.nickname") + @property def author_avatar_thumb(self): return self._get_list_attr_value( @@ -268,6 +284,10 @@ def music_status(self): def music_title(self): return replaceT(self._get_list_attr_value("$.aweme_list[*].music.title")) + @property + def music_title_raw(self): + return self._get_list_attr_value("$.aweme_list[*].music.title") + @property def music_play_url(self): url_list = self._get_list_attr_value("$.aweme_list[*].music.play_url.url_list") @@ -374,6 +394,10 @@ def collects_id(self): def collects_name(self): return replaceT(self._get_list_attr_value("$.collects_list[*].collects_name")) + @property + def collects_name_raw(self): + return self._get_list_attr_value("$.collects_list[*].collects_name") + @property def create_time(self): create_times = self._get_list_attr_value("$.collects_list[*].create_time") @@ -439,6 +463,10 @@ def nickname(self): self._get_list_attr_value("$.collects_list[*].user_info.nickname") ) + @property + def nickname_raw(self): + return self._get_list_attr_value("$.collects_list[*].user_info.nickname") + @property def uid(self): return self._get_list_attr_value("$.collects_list[*].user_info.uid") @@ -488,6 +516,10 @@ def duration(self): def author(self): return replaceT(self._get_list_attr_value("$.mc_list[*].author")) + @property + def author_raw(self): + return self._get_list_attr_value("$.mc_list[*].author") + @property def collect_status(self): return self._get_list_attr_value("$.mc_list[*].collect_stat") @@ -536,6 +568,10 @@ def play_url(self): def title(self): return replaceT(self._get_list_attr_value("$.mc_list[*].title")) + @property + def title_raw(self): + return self._get_list_attr_value("$.mc_list[*].title") + @property def strong_beat_url(self): return self._get_list_attr_value("$.mc_list[*].strong_beat_url.url_list[0]") @@ -544,6 +580,10 @@ def strong_beat_url(self): def owner_nickname(self): return replaceT(self._get_list_attr_value("$.mc_list[*].owner_nickname")) + @property + def owner_nickname_raw(self): + return self._get_list_attr_value("$.mc_list[*].owner_nickname") + @property def owner_id(self): return self._get_list_attr_value("$.mc_list[*].owner_id") @@ -693,6 +733,10 @@ def secondary_text(self): ) ) + @property + def secondary_text_raw(self): + return self._get_list_attr_value( + "$.followings[*].following_list_secondary_information_struct.secondary_information_text" ) @property @@ -737,6 +781,10 @@ def language(self): def nickname(self): return replaceT(self._get_list_attr_value("$.followings[*].nickname")) + @property + def nickname_raw(self): + return self._get_list_attr_value("$.followings[*].nickname") + @property def relation_label(self): return self._get_list_attr_value("$.followings[*].relation_label") @@ -762,6 +810,10 @@ def short_id(self): def signature(self): return replaceT(self._get_list_attr_value("$.followings[*].signature")) + @property + def signature_raw(self): + return self._get_list_attr_value("$.followings[*].signature") + @property def uid(self): return self._get_list_attr_value("$.followings[*].uid") @@ -909,6 +961,10 @@ def language(self): def nickname(self): return replaceT(self._get_list_attr_value("$.followers[*].nickname")) + @property + def nickname_raw(self): + return self._get_list_attr_value("$.followers[*].nickname") + @property def relation_label(self): return self._get_list_attr_value("$.followers[*].relation_label") @@ -934,6 +990,10 @@ def short_id(self): def signature(self): return replaceT(self._get_list_attr_value("$.followers[*].signature")) + @property + def signature_raw(self): + return self._get_list_attr_value("$.followers[*].signature") + @property def uid(self): return self._get_list_attr_value("$.followers[*].uid") @@ -1003,6 +1063,10 @@ def aweme_id(self): def nickname(self): return replaceT(self._get_attr_value("$.aweme_detail.author.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.aweme_detail.author.nickname") + @property def sec_user_id(self): return self._get_attr_value("$.aweme_detail.author.sec_uid") @@ -1049,6 +1113,10 @@ def create_time(self): def desc(self): return replaceT(self._get_attr_value("$.aweme_detail.desc")) + @property + def desc_raw(self): + return self._get_attr_value("$.aweme_detail.desc") + @property def duration(self): return self._get_attr_value("$.aweme_detail.duration") @@ -1096,6 +1164,10 @@ def media_type(self): def mix_desc(self): return replaceT(self._get_attr_value("$.aweme_detail.mix_info.mix_desc")) + @property + def mix_dec_raw(self): + return self._get_attr_value("$.aweme_detail.mix_info.mix_desc") + @property def mix_create_time(self): return timestamp_2_str( @@ -1149,6 +1221,10 @@ def is_pgc(self): def music_author(self): return replaceT(self._get_attr_value("$.aweme_detail.music.author")) + @property + def music_author_raw(self): + return self._get_attr_value("$.aweme_detail.music.author") + @property def music_author_deleted(self): return self._get_attr_value("$.aweme_detail.music.author_deleted") @@ -1171,6 +1247,10 @@ def pgc_author(self): self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author") ) + @property + def pgc_author_raw(self): + return self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author") + @property def pgc_author_title(self): return replaceT( @@ -1179,6 +1259,12 @@ def pgc_author_title(self): ) ) + @property + def pgc_author_title_raw(self): + return self._get_attr_value( + "$.aweme_detail.music.matched_pgc_sound.pgc_author_title" + ) + @property def pgc_music_type(self): return self._get_attr_value( @@ -1193,6 +1279,10 @@ def music_status(self): def music_owner_handle(self): return replaceT(self._get_attr_value("$.aweme_detail.music.owner_handle")) + @property + def music_owner_handle_raw(self): + return self._get_attr_value("$.aweme_detail.music.owner_handle") + @property def music_owner_id(self): return self._get_attr_value("$.aweme_detail.music.owner_id") @@ -1201,6 +1291,10 @@ def music_owner_id(self): def music_owner_nickname(self): return replaceT(self._get_attr_value("$.aweme_detail.music.owner_nickname")) + @property + def music_owner_nickname_raw(self): + return self._get_attr_value("$.aweme_detail.music.owner_nickname") + @property def music_play_url(self): return self._get_attr_value("$.aweme_detail.music.play_url.url_list[0]") @@ -1366,6 +1460,10 @@ def live_status(self): def live_title(self): return replaceT(self._get_attr_value("$.data.data[0].title")) + @property + def live_title_raw(self): + return self._get_attr_value("$.data.data[0].title") + @property def cover(self): return self._get_attr_value("$.data.data[0].cover.url_list[0]") @@ -1403,6 +1501,10 @@ def sec_user_id(self): def nickname(self): return replaceT(self._get_attr_value("$.data.data[0].owner.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.data.data[0].owner.nickname") + @property def avatar_thumb(self): return self._get_attr_value("$.data.data[0].owner.avatar_thumb.url_list[0]") @@ -1513,6 +1615,10 @@ def live_status(self): def live_title(self): return replaceT(self._get_attr_value("$.data.room.title")) + @property + def live_title_raw(self): + return self._get_attr_value("$.data.room.title") + @property def user_count(self): return self._get_attr_value("$.data.room.user_count") @@ -1550,14 +1656,26 @@ def hls_pull_url(self): def nickname(self): return replaceT(self._get_attr_value("$.data.room.owner.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.data.room.owner.nickname") + @property def gender(self): return replaceT(self._get_attr_value("$.data.room.owner.gender")) + @property + def gender_raw(self): + return self._get_attr_value("$.data.room.owner.gender") + @property def signature(self): return replaceT(self._get_attr_value("$.data.room.owner.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.data.room.owner.signature") + @property def avatar_large(self): return self._get_attr_value("$.data.room.owner.avatar_large.url_list[0]") From 4b1d162611d9b4d56a9044cd269b6a9267a37c16 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:10:07 +0800 Subject: [PATCH 105/164] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Ddouyin?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=BF=87=E6=BB=A4=E5=99=A8=E5=AD=97=E6=AE=B5?= =?UTF-8?q?=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mix_dec_raw -> mix_desc_raw --- f2/apps/douyin/filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 4ecae7f..3a47901 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -1165,7 +1165,7 @@ def mix_desc(self): return replaceT(self._get_attr_value("$.aweme_detail.mix_info.mix_desc")) @property - def mix_dec_raw(self): + def mix_desc_raw(self): return self._get_attr_value("$.aweme_detail.mix_info.mix_desc") @property From bd78c7f76d37314213c0fbcc8daccf04aa88647f Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:27:53 +0800 Subject: [PATCH 106/164] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Ddouyin?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=BF=87=E6=BB=A4=E5=99=A8=E6=97=B6=E9=97=B4?= =?UTF-8?q?=E6=88=B3=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 统一转换为str方便处理 --- f2/apps/douyin/filter.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 3a47901..b6c8539 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -154,9 +154,9 @@ def aweme_type(self): def create_time(self): create_times = self._get_list_attr_value("$.aweme_list[*].create_time") return ( - [timestamp_2_str(ct) for ct in create_times] + [timestamp_2_str(str(ct)) for ct in create_times] if isinstance(create_times, list) - else timestamp_2_str(create_times) + else timestamp_2_str(str(create_times)) ) @property @@ -402,9 +402,9 @@ def collects_name_raw(self): def create_time(self): create_times = self._get_list_attr_value("$.collects_list[*].create_time") return ( - [timestamp_2_str(ct) for ct in create_times] + [timestamp_2_str(str(ct)) for ct in create_times] if isinstance(create_times, list) - else timestamp_2_str(create_times) + else timestamp_2_str(str(create_times)) ) @property @@ -427,9 +427,9 @@ def item_type(self): def last_collect_time(self): create_times = self._get_list_attr_value("$.collects_list[*].last_collect_time") return ( - [timestamp_2_str(ct) for ct in create_times] + [timestamp_2_str(str(ct)) for ct in create_times] if isinstance(create_times, list) - else timestamp_2_str(create_times) + else timestamp_2_str(str(create_times)) ) @property @@ -1625,11 +1625,11 @@ def user_count(self): @property def create_time(self): - return timestamp_2_str(self._get_attr_value("$.data.room.create_time")) + return timestamp_2_str(str(self._get_attr_value("$.data.room.create_time"))) @property def finish_time(self): - return timestamp_2_str(self._get_attr_value("$.data.room.finish_time")) + return timestamp_2_str(str(self._get_attr_value("$.data.room.finish_time"))) @property def cover(self): From 174d15b52cd09125ead435720805b70e073dc109 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:30:01 +0800 Subject: [PATCH 107/164] =?UTF-8?q?feat:=20timestamp=5F2=5Fstr=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E5=AF=B930=E4=BD=8D=E6=97=B6=E9=97=B4=E6=88=B3?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E6=A0=BC=E5=BC=8F=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/utils/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/f2/utils/utils.py b/f2/utils/utils.py index 11cf639..25f37c7 100644 --- a/f2/utils/utils.py +++ b/f2/utils/utils.py @@ -78,6 +78,10 @@ def timestamp_2_str( if timestamp is None or timestamp == "None": return "" + if isinstance(timestamp, str): + if len(timestamp) == 30: + return datetime.datetime.strptime(timestamp, "%a %b %d %H:%M:%S %z %Y") + return datetime.datetime.fromtimestamp(float(timestamp)).strftime(format) From c069651b82f1d4f31f9b61e569ba0bacb6be2ac5 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:32:59 +0800 Subject: [PATCH 108/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9=E4=BA=86?= =?UTF-8?q?=E7=BF=BB=E8=AF=91=E5=87=BD=E6=95=B0=E4=B8=8E=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/dl/base_downloader.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/f2/dl/base_downloader.py b/f2/dl/base_downloader.py index 734ac4a..368e3da 100644 --- a/f2/dl/base_downloader.py +++ b/f2/dl/base_downloader.py @@ -26,7 +26,7 @@ class BaseDownloader(BaseCrawler): """基础下载器 (Base Downloader Class)""" - def __init__(self, kwargs: dict = {}): + def __init__(self, kwargs: dict = ...): proxies_conf = kwargs.get("proxies", {"http": None, "https": None}) proxies = { "http://": proxies_conf.get("http", None), @@ -76,9 +76,9 @@ async def _download_chunks( task_id, advance=len(chunk), total=int(content_length) ) except httpx.ReadTimeout as e: - logger.warning(_("文件区块下载超时: {0}".format(e))) + logger.warning(_("文件区块下载超时:{0}").format(e)) except Exception as e: - logger.error(_("文件区块下载失败: {0}".format(e))) + logger.error(_("文件区块下载失败:{0}").format(e)) async def download_file( self, task_id: TaskID, url: str, full_path: Union[str, Path] @@ -122,10 +122,11 @@ async def download_file( logger.debug( _( "找到了未下载完的文件 {0}, 大小为 {1} 字节".format( + logger.debug( + _("找到了未下载完的文件 {0}, 大小为 {1} 字节").format( tmp_path, start_byte ) ) - ) if start_byte in [0, content_length]: if start_byte: @@ -165,7 +166,7 @@ async def download_file( await self.progress.update( task_id, - description=_("[ 失败 ]:"), + description=_("[ 完成 ]:"), filename=trim_filename(full_path.name, 45), state="completed", ) @@ -208,7 +209,7 @@ async def save_file( filename=trim_filename(full_path.name, 45), state="completed", ) - logger.debug(_("下载完成, 文件已保存为 {0}".format(full_path))) + logger.debug(_("下载完成, 文件已保存为 {0}").format(full_path)) async def download_m3u8_stream( self, task_id: TaskID, url: str, full_path: Union[str, Path] @@ -283,9 +284,9 @@ async def download_m3u8_stream( ) except httpx.ReadTimeout as e: - logger.warning(_("TS文件下载超时: {0}".format(e))) + logger.warning(_("TS文件下载超时: {0}").format(e)) except Exception as e: - logger.error(_("TS文件下载失败: {0}".format(e))) + logger.error(_("TS文件下载失败: {0}").format(e)) logger.error(traceback.format_exc()) finally: await ts_response.aclose() @@ -303,7 +304,7 @@ async def download_m3u8_stream( ) return else: - logger.error(_("HTTP错误: {0}".format(e))) + logger.error(_("HTTP错误: {0}").format(e)) await self.progress.update( task_id, description=_("[ 失败 ]:"), @@ -313,7 +314,7 @@ async def download_m3u8_stream( return except Exception as e: - logger.error(_("m3u8文件解析失败: {0}".format(e))) + logger.error(_("m3u8文件解析失败: {0}").format(e)) logger.error(traceback.format_exc()) await self.progress.update( task_id, @@ -360,7 +361,7 @@ async def initiate_download( await self.progress.update(task_id, state="completed") else: task_id = await self.progress.add_task( - description=_("[ {0} ]:".format(file_type)), + description=_("[ {0} ]:").format(file_type), filename=trim_filename(file_path, 45), start=True, ) @@ -407,7 +408,7 @@ async def initiate_static_download( await self.progress.update(task_id, state="completed") else: task_id = await self.progress.add_task( - description=_("[ {0} ]:".format(file_type)), + description=_("[ {0} ]:").format(file_type), filename=trim_filename(file_path, 45), start=True, ) @@ -453,7 +454,7 @@ async def initiate_m3u8_download( await self.progress.update(task_id, state="completed") else: task_id = await self.progress.add_task( - description=_("[ {0} ]:".format(file_type)), + description=_("[ {0} ]:").format(file_type), filename=trim_filename(file_path, 45), start=True, ) @@ -466,7 +467,7 @@ async def initiate_m3u8_download( async def execute_tasks(self): """执行所有下载任务 (Execute all download tasks)""" logger.debug( - _("开始执行下载任务,本次共有 {0} 个任务".format(len(self.download_tasks))) + _("开始执行下载任务,本次共有 {0} 个任务").format(len(self.download_tasks)) ) await asyncio.gather(*self.download_tasks) self.download_tasks.clear() From c9c634a0bd15e441a60c3aa989829378e0966dab Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:33:42 +0800 Subject: [PATCH 109/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E5=8F=82=E6=95=B0=E7=B1=BB=E5=9E=8B=E4=B8=8E=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/dl/base_downloader.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/f2/dl/base_downloader.py b/f2/dl/base_downloader.py index 368e3da..5407c29 100644 --- a/f2/dl/base_downloader.py +++ b/f2/dl/base_downloader.py @@ -7,7 +7,7 @@ import traceback from pathlib import Path from rich.progress import TaskID -from typing import Union, Optional, Any +from typing import Union, Optional, Any, List from f2.log.logger import logger from f2.i18n.translator import _ @@ -81,15 +81,23 @@ async def _download_chunks( logger.error(_("文件区块下载失败:{0}").format(e)) async def download_file( - self, task_id: TaskID, url: str, full_path: Union[str, Path] + self, + task_id: TaskID, + urls: Union[str, List[str]], + full_path: Union[str, Path], ) -> None: """ 下载文件 (Download file) Args: task_id (TaskID): 任务ID (Task ID) - url (str): 文件URL (File URL) + urls (Union[str, List[str]]): 文件URL (File URL) full_path (Union[str, Path]): 保存路径 (Save path) + + Note: + url仅代表一个文件的链接,当url为列表时,表示该文件的多个链接 + (url represents only a link to a file, when url is a list, + it represents multiple links to the file) """ async with self.semaphore: # 确保目标路径存在 (Ensure target path exists) @@ -180,7 +188,10 @@ async def download_file( logger.debug(_("下载完成, 文件已保存为 {0}".format(full_path))) async def save_file( - self, task_id: TaskID, content: Any, full_path: Union[str, Path] + self, + task_id: TaskID, + content: Any, + full_path: Union[str, Path], ): """ 保存文件 (Save file) @@ -212,7 +223,10 @@ async def save_file( logger.debug(_("下载完成, 文件已保存为 {0}").format(full_path)) async def download_m3u8_stream( - self, task_id: TaskID, url: str, full_path: Union[str, Path] + self, + task_id: TaskID, + url: str, + full_path: Union[str, Path], ) -> None: """ 下载m3u8流视频 (Download m3u8 stream video) @@ -327,7 +341,7 @@ async def download_m3u8_stream( async def initiate_download( self, file_type: str, - file_url: str, + file_url: Union[str, List[str]], base_path: Union[str, Path], file_name: str, file_suffix: Optional[str], @@ -339,10 +353,15 @@ async def initiate_download( Args: file_type (str): 文件类型描述 (File type description) - file_url (str): 文件URL (File URL) + file_url (Union[str, List[str]]): 文件URL (File URL) file_name (str): 文件名称 (File name) base_path (Union[str, Path]): 基础路径 (Base path) file_suffix (Optional[str]): 文件后缀 (File suffix) + + Note: + file_url仅代表一个文件的链接,当file_url为列表时,表示该文件的多个链接 + (file_url represents only a link to a file, when file_url is a list, + it represents multiple links to the file) """ # 文件路径 From dd2d7299f1949ccd508d22c167e21722f8818388 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:39:04 +0800 Subject: [PATCH 110/164] =?UTF-8?q?perf:=20=E4=BF=AE=E6=94=B9=E4=B8=8B?= =?UTF-8?q?=E8=BD=BD=E9=80=BB=E8=BE=91=E4=BB=A5=E6=8F=90=E9=AB=98=E6=80=A7?= =?UTF-8?q?=E8=83=BD(#29,=20#41,=20#45)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、增加了对丢失链接的重试逻辑 https://github.com/Johnserf-Seed/TikTokDownload/issues/679 https://github.com/Johnserf-Seed/TikTokDownload/issues/678 https://github.com/Johnserf-Seed/TikTokDownload/issues/668 --- f2/apps/douyin/dl.py | 2 +- f2/apps/douyin/filter.py | 2 +- f2/dl/base_downloader.py | 143 ++++++++++++++++++++++----------------- 3 files changed, 84 insertions(+), 63 deletions(-) diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index 6ed9410..3cfb48f 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -244,7 +244,7 @@ async def handler_download( ) + "_video" ) - + # video_play_addr 现在为一个list,第一个链接下载失败,则下载第二个链接 video_url = aweme_data_dict.get("video_play_addr") if video_url != None: await self.initiate_download( diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index b6c8539..87ed6a5 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -1395,7 +1395,7 @@ def video_bit_rate(self): @property def video_play_addr(self): - return self._get_attr_value("$.aweme_detail.video.play_addr.url_list[0]") + return self._get_attr_value("$.aweme_detail.video.play_addr.url_list") # images @property diff --git a/f2/dl/base_downloader.py b/f2/dl/base_downloader.py index 5407c29..ec12d48 100644 --- a/f2/dl/base_downloader.py +++ b/f2/dl/base_downloader.py @@ -100,77 +100,92 @@ async def download_file( it represents multiple links to the file) """ async with self.semaphore: + # 如果urls是单个链接,则转换为列表以便统一处理 + if isinstance(urls, str): + urls = [urls] + # 确保目标路径存在 (Ensure target path exists) full_path = self._ensure_path(full_path) - # 获取文件内容大小 (Get the size of the file content) - content_length = await get_content_length(url, self.headers, self.proxies) - logger.debug( - _("{0}在服务器上的总内容长度为:{1} 字节".format(url, content_length)) - ) + # 遍历所有链接 (Iterate over all links) + for link in urls: + # 获取文件内容大小 (Get the size of the file content) + content_length = await get_content_length( + link, self.headers, self.proxies + ) - # 如果文件内容大小为0, 则不下载 (If file content size is 0, skip download) - if content_length == 0: - logger.warning(_("内容长度为0,跳过下载")) - await self.progress.update( - task_id, - description=_("[ 丢失 ]:"), - filename=trim_filename(full_path.name, 45), - state="completed", + logger.debug( + _("{0} 在服务器上的总内容长度为:{1} 字节").format( + link, content_length + ) ) - return - # 确保目标路径存在 (Ensure target path exists) - full_path.parent.mkdir(parents=True, exist_ok=True) - # 寻找未下载完的临时文件 (Find unfinished temporary files) - tmp_path = full_path.with_suffix(".tmp") - # 获取临时文件的大小 (Get the size of the temporary file) - start_byte = 0 if not tmp_path.exists() else tmp_path.stat().st_size - - logger.debug( - _( - "找到了未下载完的文件 {0}, 大小为 {1} 字节".format( + # 如果文件内容大小为0, 则尝试下一个链接 (If file content size is 0, try the next link) + if content_length == 0: + logger.warning( + _("链接 {0} 内容长度为0,尝试下一个链接是否可用").format(link) + ) + continue + + # 确保目标路径存在 (Ensure target path exists) + full_path.parent.mkdir(parents=True, exist_ok=True) + # 寻找未下载完的临时文件 (Find unfinished temporary files) + tmp_path = full_path.with_suffix(".tmp") + # 获取临时文件的大小 (Get the size of the temporary file) + start_byte = 0 if not tmp_path.exists() else tmp_path.stat().st_size + logger.debug( _("找到了未下载完的文件 {0}, 大小为 {1} 字节").format( tmp_path, start_byte ) ) - if start_byte in [0, content_length]: - if start_byte: - tmp_path.rename(full_path) - logger.debug(_("临时文件已完全下载")) - return + if start_byte in [0, content_length]: + if start_byte: + tmp_path.rename(full_path) + logger.debug(_("临时文件已完全下载")) + return - # 构建range请求头 (Build range request header) - range_headers = ( - {"Range": "bytes={}-".format(start_byte)} if start_byte else {} - ) - range_headers.update(self.headers) - range_request = self.aclient.build_request( - "GET", url, headers=range_headers - ) - async with aiofiles.open(tmp_path, "ab" if start_byte else "wb") as file: - await self._download_chunks( - self.aclient, range_request, file, content_length, task_id + # 构建range请求头 (Build range request header) + range_headers = ( + {"Range": "bytes={}-".format(start_byte)} if start_byte else {} ) - - # 下载完成后重命名文件 (Rename file after download is complete) - try: - tmp_path.rename(full_path) - except FileExistsError: - logger.warning(_("{0} 已存在,将覆盖".format(full_path))) - tmp_path.replace(full_path) - except PermissionError: - logger.error( - _("另一个程序正在使用此文件或受异步调度影响,该任务需要重新下载") + range_headers.update(self.headers) + range_request = self.aclient.build_request( + "GET", link, headers=range_headers ) - # 尝试删除临时文件 (Try to delete the temporary file) + async with aiofiles.open( + tmp_path, "ab" if start_byte else "wb" + ) as file: + await self._download_chunks( + self.aclient, range_request, file, content_length, task_id + ) + + # 下载完成后重命名文件 (Rename file after download is complete) try: - tmp_path.unlink() tmp_path.rename(full_path) - except Exception as e: - logger.error(_("尝试删除临时文件失败: {0}".format(e))) + except FileExistsError: + logger.warning(_("{0} 已存在,将覆盖").format(full_path)) + tmp_path.replace(full_path) + except PermissionError: + logger.error( + _( + "另一个程序正在使用此文件或受异步调度影响,该任务需要重新下载" + ) + ) + # 尝试删除临时文件 (Try to delete the temporary file) + try: + tmp_path.unlink() + tmp_path.rename(full_path) + except Exception as e: + logger.error(_("尝试删除临时文件失败:{0}").format(e)) + + await self.progress.update( + task_id, + description=_("[ 失败 ]:"), + filename=trim_filename(full_path.name, 45), + state="error", + ) await self.progress.update( task_id, @@ -178,14 +193,20 @@ async def download_file( filename=trim_filename(full_path.name, 45), state="completed", ) + logger.debug(_("下载完成, 文件已保存为 {0}").format(full_path)) - await self.progress.update( - task_id, - description=_("[ 完成 ]:"), - filename=trim_filename(full_path.name, 45), - state="completed", - ) - logger.debug(_("下载完成, 文件已保存为 {0}".format(full_path))) + # 如果下载成功,则跳出循环 (If download is successful, break the loop) + break + + else: + # 如果遍历完所有链接仍然无法成功下载,则记录警告 + logger.warning("所有链接都无法下载") + await self.progress.update( + task_id, + description=_("[ 丢失 ]:所有链接都无法下载"), + filename=trim_filename(full_path.name, 45), + state="error", + ) async def save_file( self, From 9f4dde759f2ec034b6ab50ddefe4e035fa05bb0c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:41:11 +0800 Subject: [PATCH 111/164] =?UTF-8?q?feat:=20=E7=BB=99tiktok=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E8=BF=87=E6=BB=A4=E5=99=A8=E6=B7=BB=E5=8A=A0=E5=8E=9F?= =?UTF-8?q?=E5=A7=8B=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 保留未转义部分字符 --- f2/apps/tiktok/filter.py | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/f2/apps/tiktok/filter.py b/f2/apps/tiktok/filter.py index 58192a7..9b9593a 100644 --- a/f2/apps/tiktok/filter.py +++ b/f2/apps/tiktok/filter.py @@ -45,6 +45,10 @@ def uid(self): def nickname(self): return replaceT(self._get_attr_value("$.userInfo.user.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.userInfo.user.nickname") + @property def secUid(self): return self._get_attr_value("$.userInfo.user.secUid") @@ -81,6 +85,10 @@ def relation(self) -> bool: # follow 1, no follow 0 def signature(self): return replaceT(self._get_attr_value("$.userInfo.user.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.userInfo.user.signature") + @property def ttSeller(self) -> bool: return bool(self._get_attr_value("$.userInfo.user.ttSeller")) @@ -132,6 +140,10 @@ def createTime(self): def desc(self): return replaceT(self._get_list_attr_value("$.itemList[*].desc")) + @property + def desc_raw(self): + return self._get_list_attr_value("$.itemList[*].desc") + @property def textExtra(self): return self._get_list_attr_value("$.itemList[*].textExtra") @@ -142,6 +154,10 @@ def textExtra(self): def nickname(self): return replaceT(self._get_list_attr_value("$.itemList[*].author.nickname")) + @property + def nickname_raw(self): + return self._get_list_attr_value("$.itemList[*].author.nickname") + @property def uid(self): return self._get_list_attr_value("$.itemList[*].author.id") @@ -221,6 +237,10 @@ def music_album(self): def music_authorName(self): return replaceT(self._get_list_attr_value("$.itemList[*].music.authorName")) + @property + def music_authorName_raw(self): + return self._get_list_attr_value("$.itemList[*].music.authorName") + @property def music_coverLarge(self): return self._get_list_attr_value("$.itemList[*].music.coverLarge") @@ -245,6 +265,10 @@ def music_playUrl(self): def music_title(self): return replaceT(self._get_list_attr_value("$.itemList[*].music.title")) + @property + def music_title_raw(self): + return self._get_list_attr_value("$.itemList[*].music.title") + # video @property def video_bitrate(self): @@ -417,6 +441,10 @@ def uid(self): def nickname(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.author.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.author.nickname") + @property def secUid(self): return self._get_attr_value("$.itemInfo.itemStruct.author.secUid") @@ -429,6 +457,10 @@ def uniqueId(self): def signature(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.author.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.author.signature") + @property def openFavorite(self): return self._get_attr_value("$.itemInfo.itemStruct.author.openFavorite") @@ -461,6 +493,10 @@ def createTime(self): def desc(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.desc")) + @property + def desc_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.desc") + @property def textExtra(self): return self._get_attr_value("$.itemInfo.itemStruct.textExtra") @@ -531,6 +567,10 @@ def videoSuggestWordsList(self): def music_authorName(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.music.authorName")) + @property + def music_authorName_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.music.authorName") + @property def music_coverLarge(self): return self._get_attr_value("$.itemInfo.itemStruct.music.coverLarge") @@ -555,6 +595,10 @@ def music_playUrl(self): def music_title(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.music.title")) + @property + def music_title_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.music.title") + # video @property def video_bitrate(self): From 4e8e19946a448658f2f43b263689cc5b58564f10 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:43:13 +0800 Subject: [PATCH 112/164] =?UTF-8?q?style:=20=E5=88=A0=E9=99=A4=E6=97=A0?= =?UTF-8?q?=E7=94=A8=E5=AF=BC=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/filter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/f2/apps/tiktok/filter.py b/f2/apps/tiktok/filter.py index 9b9593a..55a1bc0 100644 --- a/f2/apps/tiktok/filter.py +++ b/f2/apps/tiktok/filter.py @@ -1,7 +1,5 @@ # path: f2/apps/tiktok/filter.py -from typing import List, Union - from f2.utils.json_filter import JSONModel from f2.utils.utils import _get_first_item_from_list, timestamp_2_str, replaceT From dc83a61a5c6206066bbbc1519880238a6fc38c64 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 17:46:08 +0800 Subject: [PATCH 113/164] =?UTF-8?q?feat:=20douyin=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E5=AD=97=E6=AE=B5=E9=87=8D=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了原始内容字段,需要重新生成数据库 --- f2/apps/douyin/db.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/f2/apps/douyin/db.py b/f2/apps/douyin/db.py index d7231fc..4569f37 100644 --- a/f2/apps/douyin/db.py +++ b/f2/apps/douyin/db.py @@ -32,10 +32,12 @@ async def _create_table(self) -> None: "mix_count INTEGER", "mplatform_followers_count INTEGER", "nickname TEXT", + "nickname_raw TEXT", "room_id TEXT", "school_name TEXT", "short_id TEXT", "signature TEXT", + "signature_raw TEXT", "total_favorited INTEGER", "uid TEXT", "unique_id TEXT", @@ -147,6 +149,7 @@ async def _create_table(self) -> None: "aweme_id TEXT PRIMARY KEY", "aweme_type TEXT", "nickname TEXT", + "nickname_raw TEXT", "sec_user_id TEXT", "short_id TEXT", "uid TEXT", @@ -158,6 +161,7 @@ async def _create_table(self) -> None: "comment_gid TEXT", "create_time TEXT", "desc TEXT", + "desc_raw TEXT", "duration TEXT", "is_ads TEXT", "is_story TEXT", @@ -174,6 +178,7 @@ async def _create_table(self) -> None: "is_long_video TEXT", "media_type TEXT", "mix_desc TEXT", + "mix_desc_raw TEXT", "mix_create_time TEXT", "mix_id TEXT", "mix_name TEXT", @@ -186,17 +191,22 @@ async def _create_table(self) -> None: "is_original_sound TEXT", "is_pgc TEXT", "music_author TEXT", + "music_author_raw TEXT", "music_author_deleted TEXT", "music_duration TEXT", "music_id TEXT", "music_mid TEXT", "pgc_author TEXT", + "pgc_author_raw TEXT", "pgc_author_title TEXT", + "pgc_author_title_raw TEXT", "pgc_music_type TEXT", "music_status TEXT", "music_owner_handle TEXT", + "music_owner_handle_raw TEXT", "music_owner_id TEXT", "music_owner_nickname TEXT", + "music_owner_nickname_raw TEXT", "music_play_url TEXT", "position TEXT", "region TEXT", From d9939f1b4e24e35f0f7762e113b3cbc66e248bb0 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 24 Mar 2024 19:02:33 +0800 Subject: [PATCH 114/164] =?UTF-8?q?perf:=20=E5=8D=87=E7=BA=A7web=20douyin?= =?UTF-8?q?=2019.5.0=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 包括浏览器版本 --- f2/apps/douyin/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index 6d66b76..8714a99 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -12,18 +12,18 @@ class BaseRequestModel(BaseModel): aid: str = "6383" channel: str = "channel_pc_web" pc_client_type: int = 1 - version_code: str = "170400" - version_name: str = "17.4.0" + version_code: str = "190500" + version_name: str = "19.5.0" cookie_enabled: str = "true" screen_width: int = 1920 screen_height: int = 1080 browser_language: str = "zh-CN" browser_platform: str = "Win32" browser_name: str = "Edge" - browser_version: str = "117.0.2045.47" + browser_version: str = "122.0.0.0" browser_online: str = "true" engine_name: str = "Blink" - engine_version: str = "117.0.0.0" + engine_version: str = "122.0.0.0" os_name: str = "Windows" os_version: str = "10" cpu_core_num: int = 12 From 48b3227554d2f97278cd466a8327997ac86038f0 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 20:51:44 +0800 Subject: [PATCH 115/164] =?UTF-8?q?style:=20=E4=BC=98=E5=8C=96douyin=20`ha?= =?UTF-8?q?ndler`=E6=A8=A1=E5=9D=97=E6=B3=A8=E9=87=8A=E8=A1=A8=E8=BE=BE?= =?UTF-8?q?=E4=B8=8E=E6=96=B9=E6=B3=95=E5=8F=82=E6=95=B0=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、统称为`作品` 2、添加所有函数的返回类型与注释 --- f2/apps/douyin/handler.py | 194 ++++++++++++++++++++++---------------- 1 file changed, 112 insertions(+), 82 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index d73a5e5..bc0d8c6 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -62,11 +62,14 @@ class DouyinHandler: # 需要忽略的字段(需过滤掉有时效性的字段) ignore_fields = ["video_play_addr", "images", "video_bit_rate", "cover"] - def __init__(self, kwargs) -> None: + def __init__(self, kwargs: dict = ...) -> None: self.kwargs = kwargs self.downloader = DouyinDownloader(kwargs) - async def handler_user_profile(self, sec_user_id: str) -> UserProfileFilter: + async def handler_user_profile( + self, + sec_user_id: str, + ) -> UserProfileFilter: """ 用于获取指定用户的个人信息 (Used to get personal info of specified users) @@ -83,7 +86,11 @@ async def handler_user_profile(self, sec_user_id: str) -> UserProfileFilter: response = await crawler.fetch_user_profile(params) return UserProfileFilter(response) - async def get_user_nickname(self, sec_user_id: str, db: AsyncUserDB) -> str: + async def get_user_nickname( + self, + sec_user_id: str, + db: AsyncUserDB, + ) -> str: """ 获取指定用户的昵称,如果不存在,则从服务器获取并存储到数据库中 (Used to get personal info of specified users) @@ -103,7 +110,10 @@ async def get_user_nickname(self, sec_user_id: str, db: AsyncUserDB) -> str: return user_dict.get("nickname") async def get_or_add_user_data( - self, kwargs: dict, sec_user_id: str, db: AsyncUserDB + self, + kwargs: dict, + sec_user_id: str, + db: AsyncUserDB, ) -> Path: """ 获取或创建用户数据同时创建用户目录 @@ -140,7 +150,10 @@ async def get_or_add_user_data( @classmethod async def get_or_add_video_data( - cls, aweme_data: dict, db: AsyncVideoDB, ignore_fields: list = None + cls, + aweme_data: dict, + db: AsyncVideoDB, + ignore_fields: list = None, ): """ 获取或创建作品数据库数据 @@ -164,7 +177,7 @@ async def get_or_add_video_data( @mode_handler("one") async def handle_one_video(self): """ - 用于处理单个视频。 + 用于处理单个作品。 (Used to process a single video.) Args: @@ -186,25 +199,28 @@ async def handle_one_video(self): logger.debug(_("单个视频数据:{0}").format(aweme_data)) await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) - async def fetch_one_video(self, aweme_id: str) -> dict: + async def fetch_one_video( + self, + aweme_id: str, + ) -> PostDetailFilter: """ - 用于获取单个视频。 + 用于获取单个作品。 Args: - aweme_id: str: 视频ID + aweme_id: str: 作品ID Return: - video_data: dict: 视频数据字典,包含视频ID、视频文案、作者昵称 + video: PostDetailFilter: 单个作品数据过滤器 """ - logger.debug(_("开始爬取视频:{0}").format(aweme_id)) + logger.debug(_("开始爬取作品:{0}").format(aweme_id)) async with DouyinCrawler(self.kwargs) as crawler: params = PostDetail(aweme_id=aweme_id) response = await crawler.fetch_post_detail(params) video = PostDetailFilter(response) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -214,7 +230,7 @@ async def fetch_one_video(self, aweme_id: str) -> dict: @mode_handler("post") async def handle_user_post(self): """ - 用于处理用户发布的视频。 + 用于处理用户发布的作品。 (Used to process videos published by users.) Args: @@ -238,7 +254,7 @@ async def handle_user_post(self): self.kwargs, aweme_data_list, user_path ) - # # 一次性批量插入视频数据到数据库 + # # 一次性批量插入作品数据到数据库 # async with AsyncVideoDB("douyin_videos.db") as db: # await db.batch_insert_videos(aweme_data_list, ignore_fields) @@ -248,24 +264,24 @@ async def fetch_user_post_videos( max_cursor: int, page_counts: int, max_counts: int, - ): + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户发布的视频列表。 + 用于获取指定用户发布的作品列表。 Args: sec_user_id: str: 用户ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + video: AsyncGenerator[UserPostFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 发布的视频").format(sec_user_id)) + logger.debug(_("开始爬取用户:{0} 发布的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -298,7 +314,7 @@ async def fetch_user_post_videos( logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -307,16 +323,16 @@ async def fetch_user_post_videos( aweme_data_list = video._to_list() yield aweme_data_list - # 更新已经处理的视频数量 (Update the number of videos processed) + # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("like") async def handle_user_like(self): """ - 用于处理用户喜欢的视频 (Used to process videos liked by users) + 用于处理用户喜欢的作品 (Used to process videos liked by users) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -343,7 +359,7 @@ async def handle_user_like(self): # for aweme_data in aweme_data_list: # await get_or_add_video_data(aweme_data, db, ignore_fields) - # # 一次性批量插入视频数据到数据库 + # # 一次性批量插入作品数据到数据库 # async with AsyncVideoDB("douyin_videos.db") as db: # await db.batch_insert_videos(aweme_data_list, ignore_fields) @@ -353,24 +369,24 @@ async def fetch_user_like_videos( max_cursor: int, page_counts: int, max_counts: int, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户喜欢的视频列表。 + 用于获取指定用户喜欢的作品列表。 Args: sec_user_id: str: 用户ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + video: AsyncGenerator[UserPostFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 喜欢的视频").format(sec_user_id)) + logger.debug(_("开始爬取用户:{0} 喜欢的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -416,7 +432,7 @@ async def fetch_user_like_videos( videos_collected += len(aweme_data_list) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个点赞作品").format(videos_collected)) @mode_handler("music") async def handle_user_music_collection(self): @@ -449,18 +465,21 @@ async def handle_user_music_collection(self): ) async def fetch_user_music_collection( - self, max_cursor: int, page_counts: int, max_counts: int - ) -> AsyncGenerator[List[Dict[str, Any]], Any]: + self, + max_cursor: int, + page_counts: int, + max_counts: int, + ) -> AsyncGenerator[UserMusicCollectionFilter, Any]: """ 用于获取指定用户收藏的音乐作品列表。 Args: max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: AsyncGenerator[List[Dict[str, Any]], None]: 音乐作品数据 + music: AsyncGenerator[UserMusicCollectionFilter, Any]: 音乐数据过滤器,包含音乐数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") @@ -504,10 +523,12 @@ async def fetch_user_music_collection( music_collected += len(music.music_id) max_cursor = music.max_cursor + logger.debug(_("爬取结束,共爬取 {0} 个音乐作品").format(music_collected)) + @mode_handler("collection") async def handle_user_collection(self): """ - 用于处理用户收藏的视频 (Used to process videos collected by users) + 用于处理用户收藏的作品 (Used to process videos collected by users) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -533,21 +554,22 @@ async def handle_user_collection(self): ) async def fetch_user_collection_videos( - self, max_cursor: int = 0, page_counts: int = 20, max_counts: int = None - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + self, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, + ) -> AsyncGenerator[UserCollectionFilter, Any]: """ - 用于获取指定用户收藏的视频列表。 + 用于获取指定用户收藏的作品列表。 (Used to get the list of videos collected by the specified user.) Args: max_cursor: int: 起始页 (Start page) - page_counts: int: 每页视频数 (Number of videos per page) - max_counts: int: 最大视频数 (Maximum number of videos) + page_counts: int: 每页作品数 (Number of videos per page) + max_counts: int: 最大作品数 (Maximum number of videos) Return: - aweme_data: dict: 视频数据字典, 包含视频ID列表、视频文案、作者昵称、起始页 - (Video data dictionary, including video ID list, video description, - author nickname, start page) + collection: AsyncGenerator[UserCollectionFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 Note: 该接口需要用POST且只靠cookie来获取数据。 @@ -557,7 +579,7 @@ async def fetch_user_collection_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户收藏的视频")) + logger.debug(_("开始爬取用户收藏的作品")) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -597,7 +619,7 @@ async def fetch_user_collection_videos( @mode_handler("collects") async def handle_user_collects(self): """ - 用于处理用户收藏夹的视频 (Used to process videos in user collections) + 用于处理用户收藏夹的作品 (Used to process videos in user collections) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -686,8 +708,11 @@ async def select_user_collects( return str(collects.collects_id[selected_index - 1]) async def fetch_user_collects( - self, max_cursor: int, page_counts: int, max_counts: int - ) -> AsyncGenerator[UserCollectsFilter, None]: + self, + max_cursor: int, + page_counts: int, + max_counts: int, + ) -> AsyncGenerator[UserCollectsFilter, Any]: """ 用于获取指定用户收藏夹。 (Used to get the list of videos in the specified user's collection.) @@ -698,7 +723,7 @@ async def fetch_user_collects( max_counts: int: 最大收藏夹数 (Max counts) Return: - collects: AsyncGenerator[UserCollectsFilter, None]: 收藏夹列表过滤器 (Collection list Filter) + collects: AsyncGenerator[UserCollectsFilter, Any]: 收藏夹数据过滤器,包含收藏夹数据的_to_raw、_to_dict、_to_list方法) """ max_counts = max_counts or float("inf") @@ -735,7 +760,7 @@ async def fetch_user_collects( collected += len(collects.collects_id) max_cursor = collects.max_cursor - logger.debug(_("用户收藏夹爬取结束")) + logger.debug(_("爬取结束,共爬取 {0} 个收藏夹").format(collected)) async def fetch_user_collects_videos( self, @@ -743,27 +768,25 @@ async def fetch_user_collects_videos( max_cursor: int, page_counts: int, max_counts: int, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserCollectionFilter, Any]: """ - 用于获取指定用户收藏夹的视频列表。 + 用于获取指定用户收藏夹的作品列表。 (Used to get the list of videos in the specified user's collection.) Args: collects_id: str: 收藏夹ID (Collection ID) max_cursor: int: 起始页 (Page cursor) - page_counts: int: 每页视频数 (Number of videos per page) - max_counts: int: 最大视频数 (Maximum number of videos) + page_counts: int: 每页作品数 (Number of videos per page) + max_counts: int: 最大作品数 (Maximum number of videos) Return: - aweme_data: dict: 视频数据字典, 包含视频ID列表、视频文案、作者昵称、起始页 - (Video data dictionary, including video ID list, video description, - author nickname, start page) + video: AsyncGenerator[UserCollectionFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取收藏夹: {0} 的视频").format(collects_id)) + logger.debug(_("开始爬取收藏夹: {0} 的作品").format(collects_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -798,7 +821,7 @@ async def fetch_user_collects_videos( else: logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -817,12 +840,12 @@ async def fetch_user_collects_videos( break max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("mix") async def handle_user_mix(self): """ - 用于处理用户合集的视频 (Used to process videos of users' collections) + 用于处理用户合集的作品 (Used to process videos of users' mix) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -858,24 +881,24 @@ async def fetch_user_mix_videos( max_cursor: int = 0, page_counts: int = 20, max_counts: int = None, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserMixFilter, Any]: """ - 用于获取指定用户合集的视频列表。 + 用于获取指定用户合集的作品列表。 Args: mix_id: str: 合集ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + mix: AsyncGenerator[UserMixFilter, Any]: 合集作品数据过滤器,包含合集作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取合集: {0} 的视频").format(mix_id)) + logger.debug(_("开始爬取合集: {0} 的作品").format(mix_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -914,7 +937,7 @@ async def fetch_user_mix_videos( logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) break - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个合集作品").format(videos_collected)) @mode_handler("live") async def handle_user_live(self): @@ -933,7 +956,7 @@ async def handle_user_live(self): live_status = webcast_data.get("live_status") # 是否正在直播 if live_status != 2: - logger.debug(_("直播已结束")) + logger.info(_("当前 {0} 直播已结束").format(webcast_id)) return sec_user_id = webcast_data.get("sec_user_id") @@ -942,6 +965,10 @@ async def handle_user_live(self): await self.downloader.create_stream_tasks(self.kwargs, webcast_data, user_path) async def fetch_user_live_videos(self, webcast_id: str): + async def fetch_user_live_videos( + self, + webcast_id: str, + ) -> UserLiveFilter: """ 用于获取指定用户直播列表。 (Used to get the list of videos collected by the specified user.) @@ -979,7 +1006,10 @@ async def fetch_user_live_videos(self, webcast_id: str): webcast_data = live._to_dict() return webcast_data - async def fetch_user_live_videos_by_room_id(self, room_id: str): + async def fetch_user_live_videos_by_room_id( + self, + room_id: str, + ) -> UserLive2Filter: """ 使用room_id获取指定用户直播列表。 (Used to get the list of videos collected by the specified user) @@ -1053,24 +1083,24 @@ async def fetch_user_feed_videos( max_cursor: int, page_counts: int, max_counts: int, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户feed的视频列表。 + 用于获取指定用户feed的作品列表。 Args: sec_user_id: str: 用户ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + video: AsyncGenerator[UserPostFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} feed的视频").format(sec_user_id)) + logger.debug(_("开始爬取用户: {0} feed的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -1116,7 +1146,7 @@ async def fetch_user_feed_videos( videos_collected += len(video.aweme_id) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个首页推荐作品").format(videos_collected)) async def fetch_user_following( self, @@ -1130,7 +1160,7 @@ async def fetch_user_following( max_counts: float = float("inf"), ) -> AsyncGenerator[UserFollowingFilter, Any]: """ - 用于获取指定用户关注的用户的视频列表。 + 用于获取指定用户关注的用户的作品列表。 Args: user_id: str: 用户ID @@ -1141,7 +1171,7 @@ async def fetch_user_following( min_time: int: 最小时间戳 max_time: int: 最大时间戳 Return: - following_data: dict: 关注用户数据字典,包含用户ID列表、用户昵称、用户头像、起始页 + following: AsyncGenerator[UserFollowingFilter, Any]: 关注用户数据过滤器,包含关注用户数据的_to_raw、_to_dict、_to_list方法 """ if not user_id and not sec_user_id: @@ -1232,7 +1262,7 @@ async def fetch_user_follower( min_time: int: 最小时间戳 max_time: int: 最大时间戳 Return: - follower_data: dict: 粉丝数据字典,包含用户ID列表、用户昵称、用户头像、起始页 + follower: AsyncGenerator[UserFollowerFilter, Any]: 粉丝数据过滤器,包含用户ID列表、用户昵称、用户头像、起始页 """ if not user_id and not sec_user_id: From 5b1926c16be488b16b0bd6856f642b635f973e8b Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 20:54:34 +0800 Subject: [PATCH 116/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E4=BA=86?= =?UTF-8?q?=E6=89=80=E6=9C=89fetch=E6=96=B9=E6=B3=95=E7=9A=84=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 102 ++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index bc0d8c6..0d7974e 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -225,7 +225,7 @@ async def fetch_one_video( ) ) - return video._to_dict() + return video @mode_handler("post") async def handle_user_post(self): @@ -320,8 +320,7 @@ async def fetch_user_post_videos( ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield video # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) @@ -406,31 +405,30 @@ async def fetch_user_like_videos( sec_user_id=sec_user_id, ) response = await crawler.fetch_user_like(params) - video = UserPostFilter(response) + like = UserPostFilter(response) - if not video.has_aweme: + if not like.has_aweme: logger.debug(_("{0} 页没有找到作品").format(max_cursor)) - if not video.has_more: + if not like.has_more: logger.debug(_("用户:{0} 所有作品采集完毕").format(sec_user_id)) break - max_cursor = video.max_cursor + max_cursor = like.max_cursor continue logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( + like.aweme_id, like.desc, like.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield like - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) - max_cursor = video.max_cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(like.aweme_id) + max_cursor = like.max_cursor logger.debug(_("爬取结束,共爬取 {0} 个点赞作品").format(videos_collected)) @@ -513,7 +511,7 @@ async def fetch_user_music_collection( ) logger.debug("===================================") - yield music._to_list() + yield music if not music.has_more: logger.debug(_("用户收藏的音乐作品采集完毕")) @@ -595,26 +593,27 @@ async def fetch_user_collection_videos( async with DouyinCrawler(self.kwargs) as crawler: params = UserCollection(cursor=max_cursor, count=current_request_size) response = await crawler.fetch_user_collection(params) - video = UserCollectionFilter(response) + collection = UserCollectionFilter(response) logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + collection.aweme_id, collection.desc, collection.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield collection - if not video.has_more: - logger.debug(_("用户收藏的视频采集完毕")) + if not collection.has_more: + logger.debug(_("用户收藏的作品采集完毕")) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) - max_cursor = video.max_cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(collection.aweme_id) + max_cursor = collection.max_cursor + + logger.debug(_("爬取结束,共爬取 {0} 个收藏作品").format(videos_collected)) @mode_handler("collects") async def handle_user_collects(self): @@ -816,7 +815,7 @@ async def fetch_user_collects_videos( if video.has_aweme: if not video.has_more: logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) - yield video._to_list() + yield video break else: logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) @@ -827,11 +826,10 @@ async def fetch_user_collects_videos( ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield video - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(video.aweme_id) max_cursor = video.max_cursor else: logger.debug(_("{0} 页没有找到作品").format(max_cursor)) @@ -916,24 +914,23 @@ async def fetch_user_mix_videos( cursor=max_cursor, count=current_request_size, mix_id=mix_id ) response = await crawler.fetch_user_mix(params) - video = UserMixFilter(response) + mix = UserMixFilter(response) logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + mix.aweme_id, mix.desc, mix.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield mix - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) - max_cursor = video.max_cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(mix.aweme_id) + max_cursor = mix.max_cursor - if not video.has_more: + if not mix.has_more: logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) break @@ -1003,8 +1000,7 @@ async def fetch_user_live_videos( logger.debug("===================================") logger.debug(_("直播信息爬取结束")) - webcast_data = live._to_dict() - return webcast_data + return live async def fetch_user_live_videos_by_room_id( self, @@ -1048,8 +1044,7 @@ async def fetch_user_live_videos_by_room_id( logger.debug("===================================") logger.debug(_("直播信息爬取结束")) - webcast_data = live._to_dict() - return webcast_data + return live @mode_handler("feed") async def handle_user_feed(self): @@ -1120,31 +1115,30 @@ async def fetch_user_feed_videos( sec_user_id=sec_user_id, ) response = await crawler.fetch_user_post(params) - video = UserPostFilter(response) + feed = UserPostFilter(response) - if not video.has_aweme: + if not feed.has_aweme: logger.debug(_("{0} 页没有找到作品").format(max_cursor)) - if not video.has_more: + if not feed.has_more: logger.debug(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break - max_cursor = video.max_cursor + max_cursor = feed.max_cursor continue logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + feed.aweme_id, feed.desc, feed.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield feed - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - max_cursor = video.max_cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(feed.aweme_id) + max_cursor = feed.max_cursor logger.debug(_("爬取结束,共爬取 {0} 个首页推荐作品").format(videos_collected)) From 5aafee59d2be214a0047aff0e7336a30ecbf5c45 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 20:55:16 +0800 Subject: [PATCH 117/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E4=BA=86?= =?UTF-8?q?=E6=89=80=E6=9C=89handle=E6=96=B9=E6=B3=95=E7=9A=84=E8=B0=83?= =?UTF-8?q?=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 42 +++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 0d7974e..5812f35 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -190,14 +190,18 @@ async def handle_one_video(self): async with AsyncUserDB("douyin_users.db") as db: user_path = await self.get_or_add_user_data( - self.kwargs, aweme_data.get("sec_user_id"), db + self.kwargs, aweme_data.sec_user_id, db ) async with AsyncVideoDB("douyin_videos.db") as db: - await self.get_or_add_video_data(aweme_data, db, self.ignore_fields) + await self.get_or_add_video_data( + aweme_data._to_dict(), db, self.ignore_fields + ) - logger.debug(_("单个视频数据:{0}").format(aweme_data)) - await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) + logger.debug(_("单个作品数据:{0}").format(aweme_data._to_dict())) + await self.downloader.create_download_tasks( + self.kwargs, aweme_data._to_dict(), user_path + ) async def fetch_one_video( self, @@ -251,12 +255,12 @@ async def handle_user_post(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) # # 一次性批量插入作品数据到数据库 # async with AsyncVideoDB("douyin_videos.db") as db: - # await db.batch_insert_videos(aweme_data_list, ignore_fields) + # await db.batch_insert_videos(aweme_data_list._to_list(), ignore_fields) async def fetch_user_post_videos( self, @@ -351,7 +355,7 @@ async def handle_user_like(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) # async with AsyncVideoDB("douyin_videos.db") as db: @@ -459,7 +463,7 @@ async def handle_user_music_collection(self): ): # 创建下载任务 await self.downloader.create_music_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_music_collection( @@ -548,7 +552,7 @@ async def handle_user_collection(self): max_cursor, page_counts, max_counts ): await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_collection_videos( @@ -664,7 +668,7 @@ async def handle_user_collects(self): collects_id, max_cursor, page_counts, max_counts ): await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, tmp_user_path + self.kwargs, aweme_data_list._to_list(), tmp_user_path ) async def select_user_collects( @@ -866,11 +870,11 @@ async def handle_user_mix(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) # async with AsyncVideoDB("douyin_videos.db") as db: - # for aweme_data in aweme_data_list: + # for aweme_data in aweme_data_list._to_list(): # await get_or_add_video_data(aweme_data, db, ignore_fields) async def fetch_user_mix_videos( @@ -950,18 +954,22 @@ async def handle_user_live(self): # 然后下载直播推流 webcast_data = await self.fetch_user_live_videos(webcast_id) - live_status = webcast_data.get("live_status") + + live_status = webcast_data.live_status + sec_user_id = webcast_data.sec_user_id + # 是否正在直播 if live_status != 2: logger.info(_("当前 {0} 直播已结束").format(webcast_id)) return - sec_user_id = webcast_data.get("sec_user_id") async with AsyncUserDB("douyin_users.db") as db: user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) - await self.downloader.create_stream_tasks(self.kwargs, webcast_data, user_path) - async def fetch_user_live_videos(self, webcast_id: str): + await self.downloader.create_stream_tasks( + self.kwargs, webcast_data._to_dict(), user_path + ) + async def fetch_user_live_videos( self, webcast_id: str, @@ -1069,7 +1077,7 @@ async def handle_user_feed(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_feed_videos( From 7c5cf7d9da2f459c3e8ce76c92aea45fd9e2f4a5 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 22:17:42 +0800 Subject: [PATCH 118/164] =?UTF-8?q?test:=20=E6=B7=BB=E5=8A=A0=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E6=8A=96=E9=9F=B3=E5=8E=9F=E5=A3=B0=E6=AD=8C=E8=AF=8D?= =?UTF-8?q?=E8=BD=AC=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/test/test_lrc.py | 97 +++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 f2/apps/douyin/test/test_lrc.py diff --git a/f2/apps/douyin/test/test_lrc.py b/f2/apps/douyin/test/test_lrc.py new file mode 100644 index 0000000..3fa49c3 --- /dev/null +++ b/f2/apps/douyin/test/test_lrc.py @@ -0,0 +1,97 @@ +from f2.apps.douyin.utils import json_2_lrc + + +def test_gen_lrc_from_json(): + data = [ + {"text": "CB on the beat,ho", "timeId": "5.700"}, + {"text": "Wasted CTA lovees wasted", "timeId": "10.210"}, + {"text": "Wasted I'm on these drugsI feel wasted", "timeId": "12.760"}, + {"text": "Wasted get her off my mind when I'm wasted", "timeId": "15.350"}, + {"text": "Wasted I'm waste all my time when I'm wasted", "timeId": "17.740"}, + {"text": "Wasted CTA lovees wasted", "timeId": "20.790"}, + {"text": "Wasted I'm on these drugsI feel wasted", "timeId": "22.900"}, + {"text": "Wasted get her off my mind when I'm wasted", "timeId": "25.850"}, + {"text": "Wasted I'm waste all my time when I'm wasted", "timeId": "28.210"}, + {"text": "Wasted", "timeId": "30.830"}, + {"text": "Damn why is she so demonic", "timeId": "31.320"}, + {"text": "She medusa with a little pocahontas", "timeId": "33.510"}, + {"text": "She been lacin all my drugs or sosomethin", "timeId": "36.150"}, + { + "text": "Cause every time that we're together I'm unconscious", + "timeId": "38.560", + }, + {"text": "Hold upuhlet me be honest", "timeId": "41.100"}, + {"text": "I know l saw her put the percs in my chronic", "timeId": "43.760"}, + {"text": "Smokintil my eyes roll back like the omen", "timeId": "46.370"}, + {"text": "Just another funeral for hergod damn", "timeId": "48.370"}, + {"text": "Wasted CTA lovees wasted", "timeId": "61.320"}, + {"text": "Wasted I'm on these drugsI feel wasted", "timeId": "63.890"}, + {"text": "Wasted get her off my mind when I'm wasted", "timeId": "66.400"}, + {"text": "Wasted I'm waste all my time when I'm wasted", "timeId": "68.970"}, + {"text": "Wasted", "timeId": "71.160"}, + {"text": "She do cocaine in my basement", "timeId": "72.170"}, + {"text": "I'm a doctorsbut I'm runninout of patience", "timeId": "74.270"}, + {"text": "She told me that she tryna get closer to satan", "timeId": "76.760"}, + {"text": "She be talkin to him when she in the matrix", "timeId": "79.450"}, + { + "text": "Rockstarthat's our stylethere boys can't take it", + "timeId": "81.770", + }, + {"text": "Hatin but they're still tryna take our cadence", "timeId": "83.930"}, + {"text": "No basicbrand new rari when I'm racin", "timeId": "86.870"}, + { + "text": "Take itlet you roll my weedplease don't lace ityeah", + "timeId": "89.340", + }, + {"text": "That's a bum that you chasinayy", "timeId": "92.330"}, + {"text": "Foreign with meshe a dominatrix", "timeId": "95.220"}, + {"text": "I love that girls and I do like her body", "timeId": "97.270"}, + {"text": "I don't what the moneyI just want the molly", "timeId": "98.820"}, + { + "text": "That's what she say when she livesd in the valley", + "timeId": "100.160", + }, + {"text": "Lil boyI'm your fatherhakuna matata", "timeId": "101.380"}, + {"text": "I made that girl girls all of that top up", "timeId": "102.220"}, + { + "text": "Got dreadrs in my headused to pray for the lock up", + "timeId": "103.360", + }, + { + "text": "I htit from the back and my legs start to lock up", + "timeId": "104.850", + }, + {"text": "Jacuzzi thar bootyI gave that girl flakka", "timeId": "106.540"}, + {"text": "I'm talkinblue caps that keep tweakinmy chakra", "timeId": "107.520"}, + {"text": "Rose on my chainthere's no hint like no copper", "timeId": "108.860"}, + {"text": "Take in the middle my head like I'm avatar", "timeId": "110.190"}, + {"text": "That's the reason that I ride on my appas", "timeId": "111.510"}, + {"text": "Wasted", "timeId": "112.710"}, + {"text": "WastedGTA lovees wasted", "timeId": "122.290"}, + {"text": "WastedI'm on these drugsI feel wasted", "timeId": "124.800"}, + {"text": "Wastedget her off my mind when I'm wasted", "timeId": "127.380"}, + {"text": "WastedI waste all my time when I'm wasted", "timeId": "130.120"}, + {"text": "My eyes closedhopinthis ain't makebelieve", "timeId": "132.850"}, + { + "text": "And she don't know hate all her demons like in me", + "timeId": "135.150", + }, + {"text": "L don't know l don't know", "timeId": "137.730"}, + {"text": "Don't know what she been onI don't know", "timeId": "143.870"}, + {"text": "All that lean l ain't have to let her in", "timeId": "146.470"}, + { + "text": "She ain't take my heart,but she took my medicine", + "timeId": "148.580", + }, + {"text": "Least somebody gon'take lthate to waste it", "timeId": "151.330"}, + {"text": "WastedGTA lovees wasted", "timeId": "152.980"}, + {"text": "WastedI'm on these drugsI feel wasted", "timeId": "155.610"}, + {"text": "Wastedget her off my mind when I'm wasted", "timeId": "158.070"}, + {"text": "WastedI waste all my time when I'm wasted", "timeId": "160.820"}, + ] + + print(json_2_lrc(data)) + + +if __name__ == "__main__": + test_gen_lrc_from_json() From 0dd61ea65f63de12e6a4008847f441f9de1baea6 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 22:46:10 +0800 Subject: [PATCH 119/164] =?UTF-8?q?test:=20=E6=B7=BB=E5=8A=A0=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E6=8A=96=E9=9F=B3=E7=94=A8=E6=88=B7=E5=85=B3=E6=B3=A8?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/user-following.py | 43 ++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 docs/snippets/douyin/user-following.py diff --git a/docs/snippets/douyin/user-following.py b/docs/snippets/douyin/user-following.py new file mode 100644 index 0000000..4a78387 --- /dev/null +++ b/docs/snippets/douyin/user-following.py @@ -0,0 +1,43 @@ +import asyncio +from f2.log.logger import logger +from f2.apps.douyin.handler import DouyinHandler + +kwargs = { + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Referer": "https://www.douyin.com/", + }, + "proxies": { + "http": None, + "https": None, + }, + "timeout": 10, + "cookie": "YOUR_COOKIE_HERE", +} + + +async def main(): + sec_user_id = "" # 公开关注的账号 + # sec_user_id = "MS4wLjABAAAAGPm-wPeGQuziCu5z6KerQA7WmSTnS99c8lU8WLToB0BsN02mqbPxPuxwDjKf7udZ" # 隐私设置的账号 + async for following in DouyinHandler(kwargs).fetch_user_following( + sec_user_id=sec_user_id + ): + logger.info( + "用户ID:{0} 用户昵称:{1} 用户作品数:{2} 额外内容:{3}".format( + following.sec_uid, + following.nickname, + following.aweme_count, + following.secondary_text, + ) + ) + # print("=================_to_raw==================") + # print(following._to_raw()) + # print("=================_to_dict=================") + # print(following._to_dict()) + # print("=================_to_list===============") + # 数据量多的情况下_to_list这种数据结构比较慢 + # print(following._to_list()) + + +if __name__ == "__main__": + asyncio.run(main()) From bd61087ebb334eafad7ad37a429e4cbb3957baaa Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 22:46:17 +0800 Subject: [PATCH 120/164] =?UTF-8?q?test:=20=E6=B7=BB=E5=8A=A0=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E6=8A=96=E9=9F=B3=E7=94=A8=E6=88=B7=E7=B2=89=E4=B8=9D?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/user-follower.py | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 docs/snippets/douyin/user-follower.py diff --git a/docs/snippets/douyin/user-follower.py b/docs/snippets/douyin/user-follower.py new file mode 100644 index 0000000..a466d65 --- /dev/null +++ b/docs/snippets/douyin/user-follower.py @@ -0,0 +1,43 @@ +import asyncio +from f2.log.logger import logger +from f2.apps.douyin.handler import DouyinHandler + +kwargs = { + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Referer": "https://www.douyin.com/", + }, + "proxies": { + "http": None, + "https": None, + }, + "timeout": 10, + "cookie": "YOUR_COOKIE_HERE", +} + + +async def main(): + sec_user_id = "" # 公开粉丝的账号 + # sec_user_id = "MS4wLjABAAAAGPm-wPeGQuziCu5z6KerQA7WmSTnS99c8lU8WLToB0BsN02mqbPxPuxwDjKf7udZ" # 隐私设置的账号 + # 根据max_time 和 min_time 区间获取用户粉丝列表 + async for follower in DouyinHandler(kwargs).fetch_user_follower( + sec_user_id=sec_user_id, + # max_time=1668606509, + # min_time=0, + ): + logger.info( + "用户ID:{0} 用户昵称:{1} 用户作品数:{2}".format( + follower.sec_uid, follower.nickname, follower.aweme_count + ) + ) + # print("=================_to_raw==================") + # print(follower._to_raw()) + # print("=================_to_dict=================") + # print(follower._to_dict()) + # print("=================_to_list===============") + # 数据量多的情况下_to_list这种数据结构比较慢 + # print(follower._to_list()) + + +if __name__ == "__main__": + asyncio.run(main()) From 650c67f8406c4c3396fbd8c38be3db5fd353bef7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 22:54:11 +0800 Subject: [PATCH 121/164] =?UTF-8?q?feat:=20=E5=90=AF=E7=94=A8timeout?= =?UTF-8?q?=E5=8F=82=E6=95=B0=EF=BC=8C=E9=81=BF=E5=85=8D=E8=AF=B7=E6=B1=82?= =?UTF-8?q?=E8=BF=87=E4=BA=8E=E9=A2=91=E7=B9=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 5812f35..25b868a 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -330,6 +330,10 @@ async def fetch_user_post_videos( videos_collected += len(video.aweme_id) max_cursor = video.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.info(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("like") @@ -434,6 +438,10 @@ async def fetch_user_like_videos( videos_collected += len(like.aweme_id) max_cursor = like.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.debug(_("爬取结束,共爬取 {0} 个点赞作品").format(videos_collected)) @mode_handler("music") @@ -525,6 +533,10 @@ async def fetch_user_music_collection( music_collected += len(music.music_id) max_cursor = music.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.debug(_("爬取结束,共爬取 {0} 个音乐作品").format(music_collected)) @mode_handler("collection") @@ -617,6 +629,10 @@ async def fetch_user_collection_videos( videos_collected += len(collection.aweme_id) max_cursor = collection.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.debug(_("爬取结束,共爬取 {0} 个收藏作品").format(videos_collected)) @mode_handler("collects") @@ -763,6 +779,10 @@ async def fetch_user_collects( collected += len(collects.collects_id) max_cursor = collects.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.debug(_("爬取结束,共爬取 {0} 个收藏夹").format(collected)) async def fetch_user_collects_videos( @@ -842,6 +862,10 @@ async def fetch_user_collects_videos( break max_cursor = video.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("mix") @@ -937,6 +961,9 @@ async def fetch_user_mix_videos( if not mix.has_more: logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) break + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) logger.debug(_("爬取结束,共爬取 {0} 个合集作品").format(videos_collected)) @@ -1148,6 +1175,10 @@ async def fetch_user_feed_videos( videos_collected += len(feed.aweme_id) max_cursor = feed.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + logger.debug(_("爬取结束,共爬取 {0} 个首页推荐作品").format(videos_collected)) async def fetch_user_following( From 86bb01774be3c5586bbd0f520022f3940bd90301 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 22:55:34 +0800 Subject: [PATCH 122/164] =?UTF-8?q?perf:=20=E8=B0=83=E6=95=B4mix=E6=B2=A1?= =?UTF-8?q?=E6=9C=89=E6=9B=B4=E5=A4=9A=E6=95=B0=E6=8D=AE=E6=97=B6=E6=8F=90?= =?UTF-8?q?=E5=89=8Dbreak?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 25b868a..c91abee 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -954,13 +954,14 @@ async def fetch_user_mix_videos( yield mix + if not mix.has_more: + logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) + break + # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(mix.aweme_id) max_cursor = mix.max_cursor - if not mix.has_more: - logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) - break # 避免请求过于频繁 logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) From 845192f6de1db2902abac5d2e105f021f50d6f8e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:00:00 +0800 Subject: [PATCH 123/164] =?UTF-8?q?style:=20=E8=B0=83=E6=95=B4=E6=B3=A8?= =?UTF-8?q?=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/user-follower.py | 4 ++-- docs/snippets/douyin/user-following.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/snippets/douyin/user-follower.py b/docs/snippets/douyin/user-follower.py index a466d65..017c683 100644 --- a/docs/snippets/douyin/user-follower.py +++ b/docs/snippets/douyin/user-follower.py @@ -30,9 +30,9 @@ async def main(): follower.sec_uid, follower.nickname, follower.aweme_count ) ) - # print("=================_to_raw==================") + # print("=================_to_raw================") # print(follower._to_raw()) - # print("=================_to_dict=================") + # print("=================_to_dict===============") # print(follower._to_dict()) # print("=================_to_list===============") # 数据量多的情况下_to_list这种数据结构比较慢 diff --git a/docs/snippets/douyin/user-following.py b/docs/snippets/douyin/user-following.py index 4a78387..cd0accf 100644 --- a/docs/snippets/douyin/user-following.py +++ b/docs/snippets/douyin/user-following.py @@ -30,9 +30,9 @@ async def main(): following.secondary_text, ) ) - # print("=================_to_raw==================") + # print("=================_to_raw================") # print(following._to_raw()) - # print("=================_to_dict=================") + # print("=================_to_dict===============") # print(following._to_dict()) # print("=================_to_list===============") # 数据量多的情况下_to_list这种数据结构比较慢 From 7a875e80f1173f048ca9cd5366096a620e9b2b12 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:17:19 +0800 Subject: [PATCH 124/164] =?UTF-8?q?docs:=20=E6=9B=B4=E6=96=B0douyin?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/one-video.py | 10 ++++-- docs/snippets/douyin/user-collection.py | 29 +++++++++-------- docs/snippets/douyin/user-like.py | 36 ++++++++++++--------- docs/snippets/douyin/user-live.py | 8 ++++- docs/snippets/douyin/user-mix.py | 43 +++++++++++++------------ docs/snippets/douyin/user-post.py | 38 +++++++++++++--------- docs/snippets/douyin/user-profile.py | 6 +++- 7 files changed, 100 insertions(+), 70 deletions(-) diff --git a/docs/snippets/douyin/one-video.py b/docs/snippets/douyin/one-video.py index 6e664d2..8bddee6 100644 --- a/docs/snippets/douyin/one-video.py +++ b/docs/snippets/douyin/one-video.py @@ -12,9 +12,13 @@ async def main(): - print(await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294994585925848359")) - print("-------------------") - print(await DouyinHandler(kwargs).fetch_one_video(aweme_id="7305827432509082913")) + video = await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294994585925848359") + print("=================_to_raw================") + print(video._to_raw()) + # print("=================_to_dict================") + # print(video._to_dict()) + # print("=================_to_list================") + # print(video._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-collection.py b/docs/snippets/douyin/user-collection.py index c9966bb..4658bc7 100644 --- a/docs/snippets/douyin/user-collection.py +++ b/docs/snippets/douyin/user-collection.py @@ -8,23 +8,26 @@ }, "proxies": {"http": None, "https": None}, "cookie": "YOUR_COOKIE_HERE", + "timeout": 10, } async def main(): - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos() - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos( - 0, 10, 20 - ) - ] - print(results) + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos(): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + + # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos( + # 0, 10, 20 + # ): + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + + # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos( + # 0, 10, 20 + # ): + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-like.py b/docs/snippets/douyin/user-like.py index 9716a3d..51af131 100644 --- a/docs/snippets/douyin/user-like.py +++ b/docs/snippets/douyin/user-like.py @@ -8,26 +8,30 @@ }, "proxies": {"http": None, "https": None}, "cookie": "YOUR_COOKIE_HERE", + "timeout": 10, } async def main(): - user_sec_id = "YOUR_HOME_PAGE" # 替换开放喜欢列表的sec_user_id - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( - user_sec_id - ) - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( - user_sec_id, 0, 10, 20 - ) - ] - print(results) + sec_user_id = "MS4wLjABAAAA5sofqwkCjeZqwtTMs00E5HAg8udRR-warVgfPykwwgk" # 替换开放喜欢列表的sec_user_id + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( + sec_user_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + + # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( + # sec_user_id, 0, 10, 20 + # ): + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + + # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( + # sec_user_id, 0, 10, 20 + # ): + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-live.py b/docs/snippets/douyin/user-live.py index 837e8db..d4dc9fd 100644 --- a/docs/snippets/douyin/user-live.py +++ b/docs/snippets/douyin/user-live.py @@ -12,7 +12,13 @@ async def main(): - print(await DouyinHandler(kwargs).fetch_user_live_videos(webcast_id="775841227732")) + live = await DouyinHandler(kwargs).fetch_user_live_videos(webcast_id="775841227732") + print("=================_to_raw================") + print(live._to_raw()) + # print("=================_to_dict===============") + # print(live._to_dict()) + # print("=================_to_list===============") + # print(live._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-mix.py b/docs/snippets/douyin/user-mix.py index f6715f7..1a2a48a 100644 --- a/docs/snippets/douyin/user-mix.py +++ b/docs/snippets/douyin/user-mix.py @@ -7,31 +7,34 @@ "Referer": "https://www.douyin.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } async def main(): - mix_id = ( - await DouyinHandler(kwargs) - .fetch_one_video(aweme_id="7294914031133969705") - .get("mix_id") - ) - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( - mix_id - ) - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( - mix_id, 0, 10, 20 - ) - ] - print(results) + mix_id = await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294914031133969705") + # mix_id 为PostDetailFilter对象 + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( + mix_id.mix_id + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( + mix_id.mix_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-post.py b/docs/snippets/douyin/user-post.py index c83ba07..6dccb55 100644 --- a/docs/snippets/douyin/user-post.py +++ b/docs/snippets/douyin/user-post.py @@ -7,27 +7,33 @@ "Referer": "https://www.douyin.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } async def main(): - user_sec_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE" - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( - user_sec_id, 0, 10, 20 - ) - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( - user_sec_id, 0, 10, 20 - ) - ] - print(results) + sec_user_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE" + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( + sec_user_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( + sec_user_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-profile.py b/docs/snippets/douyin/user-profile.py index c6da736..5bfe045 100644 --- a/docs/snippets/douyin/user-profile.py +++ b/docs/snippets/douyin/user-profile.py @@ -13,7 +13,11 @@ async def main(): sec_user_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE" - print(await DouyinHandler(kwargs).handler_user_profile(sec_user_id=sec_user_id)) + user = await DouyinHandler(kwargs).handler_user_profile(sec_user_id=sec_user_id) + print("=================_to_raw================") + print(user._to_raw()) + # print("=================_to_dict===============") + # print(user._to_dict()) if __name__ == "__main__": From d68bb133e54486d7983fd799bb96daaa0fa3c8f7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:25:17 +0800 Subject: [PATCH 125/164] =?UTF-8?q?feat:=20tiktok=E8=BF=87=E6=BB=A4?= =?UTF-8?q?=E5=99=A8=E6=B7=BB=E5=8A=A0=E8=BF=94=E5=9B=9E=E5=8E=9F=E5=A7=8B?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/filter.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/f2/apps/tiktok/filter.py b/f2/apps/tiktok/filter.py index 55a1bc0..22199af 100644 --- a/f2/apps/tiktok/filter.py +++ b/f2/apps/tiktok/filter.py @@ -95,6 +95,9 @@ def ttSeller(self) -> bool: def verified(self) -> bool: return bool(self._get_attr_value("$.userInfo.user.verified")) + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -328,6 +331,9 @@ def video_height(self): def video_width(self): return self._get_list_attr_value("$.itemList[*].video.width") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -405,6 +411,9 @@ def mixName(self): def videoCount(self): return self._get_attr_value("$.playList[*].videoCount") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -641,6 +650,9 @@ def video_height(self): def video_width(self): return self._get_attr_value("$.itemInfo.itemStruct.video.width") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) From de509f71784a51c52c77b3e6d9e400bac771ffea Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:25:34 +0800 Subject: [PATCH 126/164] =?UTF-8?q?style:=20black=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/filter.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/f2/apps/tiktok/filter.py b/f2/apps/tiktok/filter.py index 22199af..1f68492 100644 --- a/f2/apps/tiktok/filter.py +++ b/f2/apps/tiktok/filter.py @@ -291,11 +291,17 @@ def video_bitrate(self): def video_bitrateInfo(self): bit_rate_data = self._get_list_attr_value("$.itemList[*].video.bitrateInfo") return [ - [aweme.get("Bitrate", "")] # 使用 get 方法以处理字典中没有 "Bitrate" 键的情况 - if isinstance(aweme, dict) - else [aweme[0].get("Bitrate", "")] - if len(aweme) == 1 - else [item.get("Bitrate", "") for item in aweme] + ( + [ + aweme.get("Bitrate", "") + ] # 使用 get 方法以处理字典中没有 "Bitrate" 键的情况 + if isinstance(aweme, dict) + else ( + [aweme[0].get("Bitrate", "")] + if len(aweme) == 1 + else [item.get("Bitrate", "") for item in aweme] + ) + ) for aweme in bit_rate_data ] From 19e7cae14f9b559ab4f89a96f1f3570f8c41779f Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:31:36 +0800 Subject: [PATCH 127/164] =?UTF-8?q?perf:=20=E5=88=A0=E9=99=A4apps=E4=B8=AD?= =?UTF-8?q?db=E6=A8=A1=E5=9D=97=E7=9A=84aiosqlite=E5=AF=BC=E5=85=A5?= =?UTF-8?q?=E4=B8=8E=E9=94=99=E8=AF=AF=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/db.py | 36 ++++++++++++++++-------------------- f2/apps/tiktok/db.py | 1 - 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/f2/apps/douyin/db.py b/f2/apps/douyin/db.py index 4569f37..b42c70c 100644 --- a/f2/apps/douyin/db.py +++ b/f2/apps/douyin/db.py @@ -1,6 +1,5 @@ # path: f2/apps/douyin/db.py -import aiosqlite from f2.db.base_db import BaseDB @@ -265,29 +264,26 @@ async def batch_insert_videos( video_data_list (list): 视频信息列表 ignore_fields (list): 要忽略的字段列表,例如 ["field1", "field2"] """ - try: - # 如果 ignore_fields 未提供或者为 None,将其设置为空列表 - ignore_fields = ignore_fields or [] + # 如果 ignore_fields 未提供或者为 None,将其设置为空列表 + ignore_fields = ignore_fields or [] - # 删除要忽略的字段 - for field in ignore_fields: - for video_data in video_data_list: - if field in video_data: - del video_data[field] + # 删除要忽略的字段 + for field in ignore_fields: + for video_data in video_data_list: + if field in video_data: + del video_data[field] - keys = ", ".join(video_data_list[0].keys()) - placeholders = ", ".join(["?" for _ in range(len(video_data_list[0]))]) + keys = ", ".join(video_data_list[0].keys()) + placeholders = ", ".join(["?" for _ in range(len(video_data_list[0]))]) - # 构建插入数据的元组列表 - values = [tuple(video_data.values()) for video_data in video_data_list] + # 构建插入数据的元组列表 + values = [tuple(video_data.values()) for video_data in video_data_list] - await self.execute( - f"INSERT OR REPLACE INTO {self.TABLE_NAME} ({keys}) VALUES ({placeholders})", - values, - ) - await self.commit() - except aiosqlite.Error as e: - print(f"Error batch inserting videos: {e}") + await self.execute( + f"INSERT OR REPLACE INTO {self.TABLE_NAME} ({keys}) VALUES ({placeholders})", + values, + ) + await self.commit() async def get_video_info(self, aweme_id: str) -> dict: """ diff --git a/f2/apps/tiktok/db.py b/f2/apps/tiktok/db.py index 3d28679..b08af65 100644 --- a/f2/apps/tiktok/db.py +++ b/f2/apps/tiktok/db.py @@ -1,6 +1,5 @@ # path: f2/apps/tiktok/db.py -import aiosqlite from f2.db.base_db import BaseDB From f96aa77ad23c9f9e3de28965131587bc84dd7342 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:32:08 +0800 Subject: [PATCH 128/164] =?UTF-8?q?feat:=20tiktok=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E5=AD=97=E6=AE=B5=E9=87=8D=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了原始内容字段,需要重新生成数据库 --- f2/apps/tiktok/db.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/f2/apps/tiktok/db.py b/f2/apps/tiktok/db.py index b08af65..89690b3 100644 --- a/f2/apps/tiktok/db.py +++ b/f2/apps/tiktok/db.py @@ -23,6 +23,7 @@ async def _create_table(self) -> None: "videoCount INTEGER", "uid TEXT", "nickname TEXT", + "nickname_raw TEXT", "uniqueId TEXT", "commentSetting BOOLEAN", "followingVisibility BOOLEAN", @@ -31,6 +32,7 @@ async def _create_table(self) -> None: "showPlayListTab BOOLEAN", "relation BOOLEAN", "signature TEXT", + "signature_raw TEXT", "ttSeller BOOLEAN", "verified BOOLEAN", "last_aweme_id TEXT", From 0dbbf98e03df847875a635f898ad586afa524dd2 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Mon, 25 Mar 2024 23:40:03 +0800 Subject: [PATCH 129/164] =?UTF-8?q?perf:=20tiktok=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E7=94=A8=E6=88=B7=E6=95=B0=E6=8D=AE=E5=8E=BB=E9=99=A4=E5=9C=B0?= =?UTF-8?q?=E5=8C=BA=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 该参数并不重要,如果需要只需自行调用接口模型传入即可 --- f2/apps/tiktok/handler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 55e7d97..6296108 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -48,7 +48,9 @@ def __init__(self, kwargs) -> None: self.downloader = TiktokDownloader(kwargs) async def handler_user_profile( - self, secUid: str = "", uniqueId: str = "" + self, + secUid: str = "", + uniqueId: str = "", ) -> UserProfileFilter: """ 用于获取指定用户的个人信息 @@ -66,7 +68,7 @@ async def handler_user_profile( raise ValueError(_("至少提供 secUid 或 uniqueId 中的一个参数")) async with TiktokCrawler(self.kwargs) as crawler: - params = UserProfile(region="SG", secUid=secUid, uniqueId=uniqueId) + params = UserProfile(secUid=secUid, uniqueId=uniqueId) response = await crawler.fetch_user_profile(params) return UserProfileFilter(response) From af18e6b98919e390f7bf3ccc782c45cffc7922e5 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:00:11 +0800 Subject: [PATCH 130/164] =?UTF-8?q?style:=20=E4=BC=98=E5=8C=96tiktok=20`ha?= =?UTF-8?q?ndler`=E6=A8=A1=E5=9D=97=E6=B3=A8=E9=87=8A=E8=A1=A8=E8=BE=BE?= =?UTF-8?q?=E4=B8=8E=E6=96=B9=E6=B3=95=E5=8F=82=E6=95=B0=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1、统称为`作品` 2、添加所有函数的返回类型与注释 --- f2/apps/tiktok/handler.py | 112 +++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 49 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 6296108..ee3340a 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -3,7 +3,7 @@ import sys from pathlib import Path -from typing import AsyncGenerator, Union, List +from typing import AsyncGenerator, Union, List, Any from f2.i18n.translator import _ from f2.log.logger import logger @@ -43,7 +43,7 @@ class TiktokHandler: # 需要忽略的字段(需过滤掉有时效性的字段) ignore_fields = ["video_play_addr", "images", "video_bit_rate", "cover"] - def __init__(self, kwargs) -> None: + def __init__(self, kwargs: dict = ...) -> None: self.kwargs = kwargs self.downloader = TiktokDownloader(kwargs) @@ -72,13 +72,18 @@ async def handler_user_profile( response = await crawler.fetch_user_profile(params) return UserProfileFilter(response) - async def get_user_nickname(self, secUid: str, db: AsyncUserDB) -> str: + async def get_user_nickname( + self, + secUid: str, + db: AsyncUserDB, + ) -> str: """ 用于获取指定用户的昵称 (Used to get nickname of specified users) Args: secUid: str: 用户ID (User ID) + db: AsyncUserDB: 用户数据库 (User database) Return: nick_name: str: 用户昵称 (User nickname) @@ -90,7 +95,11 @@ async def get_user_nickname(self, secUid: str, db: AsyncUserDB) -> str: await db.add_user_info(**user_dict._to_dict()) return user_dict.get("nickname", "") - async def get_or_add_user_data(self, secUid: str, db: AsyncUserDB) -> Path: + async def get_or_add_user_data( + self, + secUid: str, + db: AsyncUserDB, + ) -> Path: """ 获取或创建用户数据同时创建用户目录 (Get or create user data and create user directory) @@ -126,7 +135,10 @@ async def get_or_add_user_data(self, secUid: str, db: AsyncUserDB) -> Path: @classmethod async def get_or_add_video_data( - cls, aweme_data: dict, db: AsyncVideoDB, ignore_fields: list = [] + cls, + aweme_data: dict, + db: AsyncVideoDB, + ignore_fields: list = None, ): """ 获取或创建作品数据同时创建用户目录 @@ -152,9 +164,9 @@ async def fetch_play_list( secUid: str, cursor: int, page_counts: int, - ) -> Union[dict, UserPlayListFilter]: + ) -> UserPlayListFilter: """ - 用于获取指定用户的视频合集列表 + 用于获取指定用户的作品合集列表 (Used to get video mix list of specified user) Args: @@ -163,10 +175,10 @@ async def fetch_play_list( page_counts: int: 分页数量 (Page counts) Return: - playlist: Union[dict, UserPlayListFilter]: 视频合集列表 (Video mix list) + playlist: UserPlayListFilter: 作品合集列表 (Video mix list) """ - logger.debug(_("开始爬取用户:{0} 的视频合集列表").format(secUid)) + logger.debug(_("开始爬取用户:{0} 的作品合集列表").format(secUid)) async with TiktokCrawler(self.kwargs) as crawler: params = UserPlayList(secUid=secUid, cursor=cursor, count=page_counts) @@ -174,12 +186,12 @@ async def fetch_play_list( playlist = UserPlayListFilter(response) if not playlist.hasPlayList: - logger.info(_("用户:{0} 没有视频合集").format(secUid)) + logger.info(_("用户:{0} 没有作品合集").format(secUid)) return {} logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频合集ID:{0} 视频合集标题:{1}").format( + _("作品合集ID:{0} 作品合集标题:{1}").format( playlist.mixId, playlist.mixName ) ) @@ -190,18 +202,18 @@ async def select_playlist( self, playlists: Union[dict, UserPlayListFilter] ) -> Union[str, List[str]]: """ - 用于选择要下载的视频合辑 + 用于选择要下载的作品合辑 (Used to select the video mix to download) Args: - playlists: Union[dict, UserPlayListFilter]: 视频合辑列表 (Video mix list) + playlists: Union[dict, UserPlayListFilter]: 作品合辑列表 (Video mix list) Return: - selected_index: Union[str, List[str]]: 选择的视频合辑序号 (Selected video mix index) + selected_index: Union[str, List[str]]: 选择的作品合辑序号 (Selected video mix index) """ if playlists == {}: - sys.exit(_("用户没有视频合辑")) + sys.exit(_("用户没有作品合辑")) rich_console.print("[bold]请选择要下载的合辑:[/bold]") rich_console.print("0: [bold]全部下载[/bold]") @@ -233,7 +245,7 @@ async def select_playlist( @mode_handler("one") async def handler_one_video(self): """ - 用于获取指定视频的信息 + 用于获取指定作品的信息 (Used to get video info of specified video) Args: @@ -252,31 +264,33 @@ async def handler_one_video(self): async with AsyncVideoDB("tiktok_videos.db") as vdb: await self.get_or_add_video_data(aweme_data, vdb) - logger.debug(_("单个视频数据:{0}").format(aweme_data)) + logger.debug(_("单个作品数据:{0}").format(aweme_data)) # 创建下载任务 await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) - async def fetch_one_video(self, itemId: str) -> dict: + async def fetch_one_video( + self, itemId: str + ) -> AsyncGenerator[PostDetailFilter, Any]: """ - 用于获取指定视频的详细信息 + 用于获取指定作品的详细信息 (Used to get detailed information of specified video) Args: - itemId: str: 视频ID (Video ID) + itemId: str: 作品ID (Video ID) Return: - post: dict: 视频信息 (Video info) + video: AsyncGenerator[PostDetailFilter, Any]: 作品信息过滤器 (Video info filter) """ - logger.debug(_("开始爬取视频:{0}").format(itemId)) + logger.debug(_("开始爬取作品:{0}").format(itemId)) async with TiktokCrawler(self.kwargs) as crawler: params = PostDetail(itemId=itemId) response = await crawler.fetch_post_detail(params) video = PostDetailFilter(response) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -286,7 +300,7 @@ async def fetch_one_video(self, itemId: str) -> dict: @mode_handler("post") async def handler_user_post(self): """ - 用于获取指定用户的视频信息 + 用于获取指定用户的作品信息 (Used to get video info of specified user) Args: @@ -312,9 +326,9 @@ async def handler_user_post(self): async def fetch_user_post_videos( self, secUid: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户发布的视频列表 + 用于获取指定用户发布的作品列表 (Used to get video list of specified user) Args: @@ -324,13 +338,13 @@ async def fetch_user_post_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + video: AsyncGenerator[UserPostFilter, Any]: 用户发布作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 发布的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 发布的作品").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -359,7 +373,7 @@ async def fetch_user_post_videos( logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) @@ -367,16 +381,16 @@ async def fetch_user_post_videos( yield video._to_list() - # 更新已经处理的视频数量 (Update the number of videos processed) + # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("like") async def handler_user_like(self): """ - 用于获取指定用户的点赞视频信息 + 用于获取指定用户的点赞作品信息 (Used to get liked video info of specified user) Args: @@ -402,9 +416,9 @@ async def handler_user_like(self): async def fetch_user_like_videos( self, secUid: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户点赞的视频列表 + 用于获取指定用户点赞的作品列表 (Used to get liked video list of specified user) Args: @@ -414,13 +428,13 @@ async def fetch_user_like_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + like: AsyncGenerator[UserPostFilter, Any]: 用户点赞作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 点赞的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 点赞的作品").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -468,12 +482,12 @@ async def fetch_user_like_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("collect") async def handler_user_collect(self): """ - 用于获取指定用户的收藏视频信息 + 用于获取指定用户的收藏作品信息 (Used to get collected video info of specified user) Args: @@ -499,9 +513,9 @@ async def handler_user_collect(self): async def fetch_user_collect_videos( self, secUid: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户收藏的视频列表 + 用于获取指定用户收藏的作品列表 (Used to get collected video list of specified user) Args: @@ -511,13 +525,13 @@ async def fetch_user_collect_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + collect: AsyncGenerator[UserPostFilter, Any]: 收藏作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 收藏的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 收藏的作品").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -565,12 +579,12 @@ async def fetch_user_collect_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("mix") async def handler_user_mix(self): """ - 用于获取指定用户的合集视频信息 + 用于获取指定用户的合集作品信息 (Used to get mix video info of specified user) Args: @@ -602,9 +616,9 @@ async def handler_user_mix(self): async def fetch_user_mix_videos( self, mixId: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserMixFilter, Any]: """ - 用于获取指定用户合集的视频列表 + 用于获取指定用户合集的作品列表 (Used to get mix video list of specified user) Args: @@ -614,13 +628,13 @@ async def fetch_user_mix_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + mix: AsyncGenerator[UserMixFilter, Any]: 合集作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 合集的视频").format(mixId)) + logger.debug(_("开始爬取用户: {0} 合集的作品").format(mixId)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -668,7 +682,7 @@ async def fetch_user_mix_videos( videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取 {0} 个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) async def main(kwargs): From 67ecc29005e3026423e5e9a951d83f41b7d6b296 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:02:12 +0800 Subject: [PATCH 131/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E4=BA=86?= =?UTF-8?q?tiktok=E6=89=80=E6=9C=89fetch=E6=96=B9=E6=B3=95=E7=9A=84?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/handler.py | 79 +++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index ee3340a..2505c96 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -295,7 +295,7 @@ async def fetch_one_video( ) ) - return video._to_dict() + return video @mode_handler("post") async def handler_user_post(self): @@ -379,7 +379,7 @@ async def fetch_user_post_videos( ) logger.debug("===================================") - yield video._to_list() + yield video # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) @@ -450,37 +450,36 @@ async def fetch_user_like_videos( async with TiktokCrawler(self.kwargs) as crawler: params = UserLike(secUid=secUid, cursor=cursor, count=page_counts) response = await crawler.fetch_user_like(params) - video = UserPostFilter(response) + like = UserPostFilter(response) - if video.has_aweme: + if like.has_aweme: logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( + like.aweme_id, like.desc, like.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield like - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(like.aweme_id) - if not video.hasMore and str(video.api_status_code) == "0": + if not like.hasMore and str(like.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: logger.debug(_("{0} 页没有找到作品").format(cursor)) - if not video.hasMore and str(video.api_status_code) == "0": + if not like.hasMore and str(like.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - cursor = video.cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(like.aweme_id) + cursor = like.cursor logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @@ -547,37 +546,36 @@ async def fetch_user_collect_videos( async with TiktokCrawler(self.kwargs) as crawler: params = UserCollect(secUid=secUid, cursor=cursor, count=page_counts) response = await crawler.fetch_user_collect(params) - video = UserPostFilter(response) + collect = UserPostFilter(response) - if video.has_aweme: + if collect.has_aweme: logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID:{0} 视频文案:{1} 作者:{2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( + collect.aweme_id, collect.desc, collect.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield collect - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(collect.aweme_id) - if not video.hasMore and str(video.api_status_code) == "0": + if not collect.hasMore and str(collect.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: logger.debug(_("{0} 页没有找到作品").format(cursor)) - if not video.hasMore and str(video.api_status_code) == "0": + if not collect.hasMore and str(collect.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - cursor = video.cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(collect.aweme_id) + cursor = collect.cursor logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @@ -650,37 +648,36 @@ async def fetch_user_mix_videos( async with TiktokCrawler(self.kwargs) as crawler: params = UserMix(mixId=mixId, cursor=cursor, count=page_counts) response = await crawler.fetch_user_mix(params) - video = UserMixFilter(response) + mix = UserMixFilter(response) - if video.has_aweme: + if mix.has_aweme: logger.debug(_("当前请求的cursor: {0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + mix.aweme_id, mix.desc, mix.nickname ) ) logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield mix - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(mix.aweme_id) - if not video.hasMore and str(video.api_status_code) == "0": + if not mix.hasMore and str(mix.api_status_code) == "0": logger.debug(_("合辑: {0} 所有作品采集完毕").format(mixId)) break else: logger.debug(_("{0} 页没有找到作品").format(cursor)) - if not video.hasMore and str(video.api_status_code) == "0": + if not mix.hasMore and str(mix.api_status_code) == "0": logger.debug(_("合辑: {0} 所有作品采集完毕").format(mixId)) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - cursor = video.cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(mix.aweme_id) + cursor = mix.cursor logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) From 5ab31c8e6390fc1b2717c9ec3a39d5a292da5aeb Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:08:11 +0800 Subject: [PATCH 132/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E4=BA=86?= =?UTF-8?q?=E6=89=80=E6=9C=89handle=E6=96=B9=E6=B3=95=E7=9A=84=E8=B0=83?= =?UTF-8?q?=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/handler.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 2505c96..b1682cd 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -257,17 +257,19 @@ async def handler_one_video(self): aweme_data = await self.fetch_one_video(aweme_id) async with AsyncUserDB("tiktok_users.db") as udb: - user_path = await self.get_or_add_user_data( - str(aweme_data.get("secUid")), udb - ) + user_path = await self.get_or_add_user_data(aweme_data.secUid, udb) async with AsyncVideoDB("tiktok_videos.db") as vdb: - await self.get_or_add_video_data(aweme_data, vdb) + await self.get_or_add_video_data( + aweme_data._to_dict(), vdb, self.ignore_fields + ) - logger.debug(_("单个作品数据:{0}").format(aweme_data)) + logger.debug(_("单个作品数据:{0}").format(aweme_data._to_dict())) # 创建下载任务 - await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) + await self.downloader.create_download_tasks( + self.kwargs, aweme_data._to_dict(), user_path + ) async def fetch_one_video( self, itemId: str @@ -321,7 +323,7 @@ async def handler_user_post(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_post_videos( @@ -411,7 +413,7 @@ async def handler_user_like(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_like_videos( @@ -507,7 +509,7 @@ async def handler_user_collect(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_collect_videos( @@ -609,7 +611,7 @@ async def handler_user_mix(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_mix_videos( From d0773d65abebdbea0856ff0c0af3a81f2a026efb Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:08:45 +0800 Subject: [PATCH 133/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9=E4=BA=86?= =?UTF-8?q?=E9=83=A8=E5=88=86=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 2 ++ f2/apps/tiktok/handler.py | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index c91abee..a69c7d0 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -199,6 +199,8 @@ async def handle_one_video(self): ) logger.debug(_("单个作品数据:{0}").format(aweme_data._to_dict())) + + # 创建下载任务 await self.downloader.create_download_tasks( self.kwargs, aweme_data._to_dict(), user_path ) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index b1682cd..f9ab81b 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -271,9 +271,7 @@ async def handler_one_video(self): self.kwargs, aweme_data._to_dict(), user_path ) - async def fetch_one_video( - self, itemId: str - ) -> AsyncGenerator[PostDetailFilter, Any]: + async def fetch_one_video(self, itemId: str) -> PostDetailFilter: """ 用于获取指定作品的详细信息 (Used to get detailed information of specified video) @@ -282,7 +280,7 @@ async def fetch_one_video( itemId: str: 作品ID (Video ID) Return: - video: AsyncGenerator[PostDetailFilter, Any]: 作品信息过滤器 (Video info filter) + video: PostDetailFilter: 作品信息过滤器 (Video info filter) """ logger.debug(_("开始爬取作品:{0}").format(itemId)) From b0e2a19645994f0615e8e9ca9b313dd3e3b87d1d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:20:24 +0800 Subject: [PATCH 134/164] =?UTF-8?q?docs:=20=E6=9B=B4=E6=96=B0tiktok?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/tiktok/one-video.py | 10 +++++++--- docs/snippets/tiktok/user-collect.py | 19 +++++++++++-------- docs/snippets/tiktok/user-like.py | 19 +++++++++++-------- docs/snippets/tiktok/user-mix.py | 21 +++++++++++++++------ docs/snippets/tiktok/user-playlist.py | 6 +++++- docs/snippets/tiktok/user-post.py | 19 +++++++++++-------- docs/snippets/tiktok/user-profile.py | 14 +++++++++++--- 7 files changed, 71 insertions(+), 37 deletions(-) diff --git a/docs/snippets/tiktok/one-video.py b/docs/snippets/tiktok/one-video.py index 0f93a9a..fb264d2 100644 --- a/docs/snippets/tiktok/one-video.py +++ b/docs/snippets/tiktok/one-video.py @@ -12,9 +12,13 @@ async def main(): - print(await TiktokHandler(kwargs).fetch_one_video(itemId="7095819783324601605")) - print("-------------------") - print(await TiktokHandler(kwargs).fetch_one_video(itemId="7305827432509082913")) + video = await TiktokHandler(kwargs).fetch_one_video(itemId="7095819783324601605") + print("=================_to_raw================") + print(video._to_raw()) + # print("=================_to_dict================") + # print(video._to_dict()) + # print("=================_to_list================") + # print(video._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-collect.py b/docs/snippets/tiktok/user-collect.py index e23814c..effeac2 100644 --- a/docs/snippets/tiktok/user-collect.py +++ b/docs/snippets/tiktok/user-collect.py @@ -8,6 +8,7 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -16,14 +17,16 @@ async def main(): secUid = await SecUserIdFetcher.get_secuid( "YOUR_HOME_PAGE" ) # 替换开放收藏列表的用户主页 - print( - [ - aweme_data_list - async for aweme_data_list in TiktokHandler( - kwargs - ).fetch_user_collect_videos(secUid) - ] - ) + + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_collect_videos( + secUid, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-like.py b/docs/snippets/tiktok/user-like.py index 8e9bd2e..4517fb6 100644 --- a/docs/snippets/tiktok/user-like.py +++ b/docs/snippets/tiktok/user-like.py @@ -8,6 +8,7 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -16,14 +17,16 @@ async def main(): secUid = await SecUserIdFetcher.get_secuid( "YOUR_HOME_PAGE" ) # 替换开放喜欢列表的用户主页 - print( - [ - aweme_data_list - async for aweme_data_list in TiktokHandler(kwargs).fetch_user_like_videos( - secUid - ) - ] - ) + + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_like_videos( + secUid, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-mix.py b/docs/snippets/tiktok/user-mix.py index fd5a7f2..bf486de 100644 --- a/docs/snippets/tiktok/user-mix.py +++ b/docs/snippets/tiktok/user-mix.py @@ -10,6 +10,7 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -19,9 +20,13 @@ async def main(): playlist = await TiktokHandler(kwargs).fetch_play_list(secUid) for mixId in playlist.get("mixId", []): - print([ - aweme_data_list async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId) - ]) + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": asyncio.run(main()) @@ -39,6 +44,7 @@ async def main(): "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -51,9 +57,12 @@ async def main(): if selected_index != 0: # [!code focus] mixId = playlist.get("mixId", [])[selected_index - 1] # [!code focus] - print([ - aweme_data_list async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId) - ]) + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId): + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": asyncio.run(main()) diff --git a/docs/snippets/tiktok/user-playlist.py b/docs/snippets/tiktok/user-playlist.py index cae900f..8418e7c 100644 --- a/docs/snippets/tiktok/user-playlist.py +++ b/docs/snippets/tiktok/user-playlist.py @@ -14,7 +14,11 @@ async def main(): secUid = await SecUserIdFetcher.get_secuid("https://www.tiktok.com/@vantoan___") - print(await TiktokHandler(kwargs).fetch_play_list(secUid, 0, 30)) + playlist = await TiktokHandler(kwargs).fetch_play_list(secUid, 0, 30) + print("=================_to_raw================") + print(playlist._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-post.py b/docs/snippets/tiktok/user-post.py index 3ef4a7e..38cb3dc 100644 --- a/docs/snippets/tiktok/user-post.py +++ b/docs/snippets/tiktok/user-post.py @@ -8,20 +8,23 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } async def main(): secUid = await SecUserIdFetcher.get_secuid("https://www.tiktok.com/@vantoan___") - print( - [ - aweme_data_list - async for aweme_data_list in TiktokHandler(kwargs).fetch_user_post_videos( - secUid, 0, 30, 0 - ) - ] - ) + + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_post_videos( + secUid, 0, 30, None + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-profile.py b/docs/snippets/tiktok/user-profile.py index d98b6d7..5819a3e 100644 --- a/docs/snippets/tiktok/user-profile.py +++ b/docs/snippets/tiktok/user-profile.py @@ -16,9 +16,17 @@ async def main(): "MS4wLjABAAAAQhcYf_TjRKUku-aF8oqngAfzrYksgGLRz8CKMciBFdfR54HQu3qGs-WoJ-KO7hO8" ) uniqueId = "vantoan___" - print(await TiktokHandler(kwargs).handler_user_profile(secUid=secUid)) - print("-------------------") - print(await TiktokHandler(kwargs).handler_user_profile(uniqueId=uniqueId)) + user = await TiktokHandler(kwargs).handler_user_profile(secUid=secUid) + print("=================_to_raw================") + print(user._to_raw()) + # print("=================_to_dict===============") + # print(user._to_dict()) + + user = await TiktokHandler(kwargs).handler_user_profile(uniqueId=uniqueId) + print("=================_to_raw================") + print(user._to_raw()) + # print("=================_to_dict===============") + # print(user._to_dict()) if __name__ == "__main__": From d15ef40c96a3786d9a3e4bb0809301054fcecb36 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:26:17 +0800 Subject: [PATCH 135/164] =?UTF-8?q?docs:=20=E6=9B=B4=E6=96=B0douyin?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/format-file-name.py | 11 ++++++++--- docs/snippets/douyin/user-collection.py | 16 ++++------------ docs/snippets/douyin/user-like.py | 16 ++++------------ docs/snippets/douyin/user-live-room-id.py | 10 ++++++---- docs/snippets/douyin/user-live.py | 2 -- docs/snippets/douyin/user-mix.py | 10 ---------- docs/snippets/douyin/user-post.py | 10 ---------- docs/snippets/douyin/video-get-add.py | 2 +- 8 files changed, 23 insertions(+), 54 deletions(-) diff --git a/docs/snippets/douyin/format-file-name.py b/docs/snippets/douyin/format-file-name.py index 309be02..510cf80 100644 --- a/docs/snippets/douyin/format-file-name.py +++ b/docs/snippets/douyin/format-file-name.py @@ -16,7 +16,7 @@ async def main(): # 单作品的数据 aweme_data = await DouyinHandler(kwargs).fetch_one_video("7218193198328433954") # 格式化后的文件名 - print(format_file_name(kwargs.get("naming"), aweme_data) + "_video") + print(format_file_name(kwargs.get("naming"), aweme_data._to_dict()) + "_video") # 文件名模板 kwargs = { @@ -31,10 +31,15 @@ async def main(): # 用户自定义字段 custom_fields = {"location": "New York"} # 格式化后的自定义文件名 - print(format_file_name(kwargs.get("naming"), aweme_data, custom_fields) + "_video") + print( + format_file_name(kwargs.get("naming"), aweme_data._to_dict(), custom_fields) + + "_video" + ) # 格式化后的自定义文件名,长度限制在100 print( - format_file_name(kwargs.get("naming"), aweme_data, custom_fields, 100) + format_file_name( + kwargs.get("naming"), aweme_data._to_dict(), custom_fields, 100 + ) + "_video" ) diff --git a/docs/snippets/douyin/user-collection.py b/docs/snippets/douyin/user-collection.py index 4658bc7..a296c22 100644 --- a/docs/snippets/douyin/user-collection.py +++ b/docs/snippets/douyin/user-collection.py @@ -16,18 +16,10 @@ async def main(): async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos(): print("=================_to_raw================") print(aweme_data_list._to_raw()) - - # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos( - # 0, 10, 20 - # ): - # print("=================_to_dict===============") - # print(aweme_data_list._to_dict()) - - # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos( - # 0, 10, 20 - # ): - # print("=================_to_list===============") - # print(aweme_data_list._to_list()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-like.py b/docs/snippets/douyin/user-like.py index 51af131..f0e66e9 100644 --- a/docs/snippets/douyin/user-like.py +++ b/docs/snippets/douyin/user-like.py @@ -20,18 +20,10 @@ async def main(): ): print("=================_to_raw================") print(aweme_data_list._to_raw()) - - # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( - # sec_user_id, 0, 10, 20 - # ): - # print("=================_to_dict===============") - # print(aweme_data_list._to_dict()) - - # async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( - # sec_user_id, 0, 10, 20 - # ): - # print("=================_to_list===============") - # print(aweme_data_list._to_list()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-live-room-id.py b/docs/snippets/douyin/user-live-room-id.py index 24ae1fb..5c427dc 100644 --- a/docs/snippets/douyin/user-live-room-id.py +++ b/docs/snippets/douyin/user-live-room-id.py @@ -12,11 +12,13 @@ async def main(): - print( - await DouyinHandler(kwargs).fetch_user_live_videos_by_room_id( - room_id="7318296342189919011" - ) + live = await DouyinHandler(kwargs).fetch_user_live_videos_by_room_id( + room_id="7318296342189919011" ) + print("=================_to_raw================") + print(live._to_raw()) + # print("=================_to_dict===============") + # print(live._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-live.py b/docs/snippets/douyin/user-live.py index d4dc9fd..dba3acf 100644 --- a/docs/snippets/douyin/user-live.py +++ b/docs/snippets/douyin/user-live.py @@ -17,8 +17,6 @@ async def main(): print(live._to_raw()) # print("=================_to_dict===============") # print(live._to_dict()) - # print("=================_to_list===============") - # print(live._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-mix.py b/docs/snippets/douyin/user-mix.py index 1a2a48a..8e25520 100644 --- a/docs/snippets/douyin/user-mix.py +++ b/docs/snippets/douyin/user-mix.py @@ -16,16 +16,6 @@ async def main(): mix_id = await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294914031133969705") # mix_id 为PostDetailFilter对象 - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( - mix_id.mix_id - ): - print("=================_to_raw================") - print(aweme_data_list._to_raw()) - # print("=================_to_dict===============") - # print(aweme_data_list._to_dict()) - # print("=================_to_list===============") - # print(aweme_data_list._to_list()) - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( mix_id.mix_id, 0, 10, 20 ): diff --git a/docs/snippets/douyin/user-post.py b/docs/snippets/douyin/user-post.py index 6dccb55..f4130e5 100644 --- a/docs/snippets/douyin/user-post.py +++ b/docs/snippets/douyin/user-post.py @@ -25,16 +25,6 @@ async def main(): # print("=================_to_list===============") # print(aweme_data_list._to_list()) - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( - sec_user_id, 0, 10, 20 - ): - print("=================_to_raw================") - print(aweme_data_list._to_raw()) - # print("=================_to_dict===============") - # print(aweme_data_list._to_dict()) - # print("=================_to_list===============") - # print(aweme_data_list._to_list()) - if __name__ == "__main__": asyncio.run(main()) diff --git a/docs/snippets/douyin/video-get-add.py b/docs/snippets/douyin/video-get-add.py index 5cae5fd..1829cbe 100644 --- a/docs/snippets/douyin/video-get-add.py +++ b/docs/snippets/douyin/video-get-add.py @@ -21,7 +21,7 @@ async def main(): ) async with AsyncVideoDB("douyin_videos.db") as avdb: await DouyinHandler(kwargs).get_or_add_video_data( - aweme_data, avdb, ignore_fields + aweme_data._to_dict(), avdb, ignore_fields ) From 9a0a3e903f880e75be0cb289172441d0ee5dfc29 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:26:55 +0800 Subject: [PATCH 136/164] =?UTF-8?q?docs:=20=E6=9B=B4=E6=96=B0tiktok?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/tiktok/format-file-name.py | 8 +++++--- docs/snippets/tiktok/video-get-add.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/snippets/tiktok/format-file-name.py b/docs/snippets/tiktok/format-file-name.py index a55fb5b..7ad5620 100644 --- a/docs/snippets/tiktok/format-file-name.py +++ b/docs/snippets/tiktok/format-file-name.py @@ -17,15 +17,17 @@ async def main(): # 单作品的数据 aweme_data = await TiktokHandler(kwargs).fetch_one_video("7316948869764484384") # 格式化后的文件名 - print(format_file_name(kwargs.get("naming"), aweme_data) + "_video") + print(format_file_name(kwargs.get("naming"), aweme_data._to_dict()) + "_video") # 用户自定义字段 custom_fields = {"location": "New York"} # 格式化后的自定义文件名 - print((kwargs.get("naming"), aweme_data, custom_fields) + "_video") + print((kwargs.get("naming"), aweme_data._to_dict(), custom_fields) + "_video") # 格式化后的自定义文件名,长度限制在100 print( - format_file_name(kwargs.get("naming"), aweme_data, custom_fields, 100) + format_file_name( + kwargs.get("naming"), aweme_data._to_dict(), custom_fields, 100 + ) + "_video" ) diff --git a/docs/snippets/tiktok/video-get-add.py b/docs/snippets/tiktok/video-get-add.py index abf1b5b..34c6d33 100644 --- a/docs/snippets/tiktok/video-get-add.py +++ b/docs/snippets/tiktok/video-get-add.py @@ -21,7 +21,7 @@ async def main(): ) async with AsyncVideoDB("tiktok_videos.db") as avdb: await TiktokHandler(kwargs).get_or_add_video_data( - aweme_data, avdb, ignore_fields + aweme_data._to_dict(), avdb, ignore_fields ) From 6c071c1a1c338cc67d4faa833281dc6b83fd1b64 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 26 Mar 2024 00:27:42 +0800 Subject: [PATCH 137/164] =?UTF-8?q?perf:=20=E5=BC=83=E7=94=A8douyin=20`fil?= =?UTF-8?q?ter`=E7=9A=84UserLiveFilter=E6=97=A0=E7=94=A8=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 87ed6a5..1ec6a68 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -1563,35 +1563,6 @@ def _to_dict(self) -> dict: if not prop_name.startswith("__") and not prop_name.startswith("_") } - def _to_list(self): - # 不需要的属性列表 - exclude_list = [] - # 生成属性名称列表,然后过滤掉不需要的属性 - keys = [ - prop_name - for prop_name in dir(self) - if not prop_name.startswith("__") - and not prop_name.startswith("_") - and prop_name not in exclude_list - ] - - aweme_entries = self._get_attr_value("$.aweme_list") or [] - - list_dicts = [] - # 遍历每个条目并创建一个字典 - # (Iterate through each entry and create a dict) - for entry in aweme_entries: - d = {} - for key in keys: - attr_values = getattr(self, key) - # 当前aweme_entry在属性列表中的索引 - index = aweme_entries.index(entry) - # 如果属性值的长度足够则赋值,否则赋None - # (Assign value if the length of the attribute value is sufficient, otherwise assign None) - d[key] = attr_values[index] if index < len(attr_values) else None - list_dicts.append(d) - return list_dicts - class UserLive2Filter(JSONModel): # live From c62a5450640d103a589138a8132fdc47cdb0376e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sat, 30 Mar 2024 19:12:51 +0800 Subject: [PATCH 138/164] =?UTF-8?q?refactor:=20=E5=8E=BB=E9=99=A4PostDetai?= =?UTF-8?q?lFilter=E7=9A=84=E6=97=A0=E7=94=A8=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _to_list方法只针对多组数据 --- docs/snippets/douyin/one-video.py | 2 -- f2/apps/douyin/filter.py | 30 ------------------------------ 2 files changed, 32 deletions(-) diff --git a/docs/snippets/douyin/one-video.py b/docs/snippets/douyin/one-video.py index 8bddee6..a608f41 100644 --- a/docs/snippets/douyin/one-video.py +++ b/docs/snippets/douyin/one-video.py @@ -17,8 +17,6 @@ async def main(): print(video._to_raw()) # print("=================_to_dict================") # print(video._to_dict()) - # print("=================_to_list================") - # print(video._to_list()) if __name__ == "__main__": diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 1ec6a68..f34d57b 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -1412,36 +1412,6 @@ def _to_dict(self) -> dict: if not prop_name.startswith("__") and not prop_name.startswith("_") } - def _to_list(self): - # 不需要的属性列表 - exclude_list = ["has_more", "max_cursor", "min_cursor"] - # 生成属性名称列表,然后过滤掉不需要的属性 - keys = [ - prop_name - for prop_name in dir(self) - if not prop_name.startswith("__") - and not prop_name.startswith("_") - and prop_name not in exclude_list - ] - - aweme_entries = self._get_attr_value("$.aweme_detail") or [] - - list_dicts = [] - # 遍历每个条目并创建一个字典 - # (Iterate through each entry and create a dict) - for entry in aweme_entries: - d = {} - for key in keys: - attr_values = getattr(self, key) - # 当前aweme_entry在属性列表中的索引 - index = aweme_entries.index(entry) - # 如果属性值的长度足够则赋值,否则赋None - # (Assign value if the length of the attribute value is sufficient, otherwise assign None) - d[key] = attr_values[index] if index < len(attr_values) else None - list_dicts.append(d) - return list_dicts - - class UserLiveFilter(JSONModel): # live @property From 33387d7447ae3fce68c24beb1fa30fef4fd2940a Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 31 Mar 2024 22:29:34 +0800 Subject: [PATCH 139/164] =?UTF-8?q?perf:=20=E5=9C=A8=E9=80=82=E5=BD=93?= =?UTF-8?q?=E7=9A=84=E4=BD=8D=E7=BD=AEyield=E4=BD=9C=E5=93=81=E6=95=B0?= =?UTF-8?q?=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 129 ++++++++++++++------------------------ 1 file changed, 46 insertions(+), 83 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index a69c7d0..ffeb7e4 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -308,6 +308,7 @@ async def fetch_user_post_videos( ) response = await crawler.fetch_user_post(params) video = UserPostFilter(response) + yield video if not video.has_aweme: logger.debug(_("{0} 页没有找到作品").format(max_cursor)) @@ -326,8 +327,6 @@ async def fetch_user_post_videos( ) logger.debug("===================================") - yield video - # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) max_cursor = video.max_cursor @@ -416,6 +415,7 @@ async def fetch_user_like_videos( ) response = await crawler.fetch_user_like(params) like = UserPostFilter(response) + yield like if not like.has_aweme: logger.debug(_("{0} 页没有找到作品").format(max_cursor)) @@ -434,8 +434,6 @@ async def fetch_user_like_videos( ) logger.debug("===================================") - yield like - # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(like.aweme_id) max_cursor = like.max_cursor @@ -516,21 +514,20 @@ async def fetch_user_music_collection( ) response = await crawler.fetch_user_music_collection(params) music = UserMusicCollectionFilter(response) - - logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) - logger.debug( - _("音乐ID:{0} 音乐标题:{1} 作者:{2}").format( - music.music_id, music.title, music.author - ) - ) - logger.debug("===================================") - - yield music + yield music if not music.has_more: logger.debug(_("用户收藏的音乐作品采集完毕")) break + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) + logger.debug( + _("音乐ID:{0} 音乐标题:{1} 作者:{2}").format( + music.music_id, music.title, music.author + ) + ) + logger.debug("===================================") + # 更新已经处理的音乐数量 (Update the number of music processed) music_collected += len(music.music_id) max_cursor = music.max_cursor @@ -612,6 +609,11 @@ async def fetch_user_collection_videos( params = UserCollection(cursor=max_cursor, count=current_request_size) response = await crawler.fetch_user_collection(params) collection = UserCollectionFilter(response) + yield collection + + if not collection.has_more: + logger.debug(_("用户收藏的作品采集完毕")) + break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( @@ -621,12 +623,6 @@ async def fetch_user_collection_videos( ) logger.debug("===================================") - yield collection - - if not collection.has_more: - logger.debug(_("用户收藏的作品采集完毕")) - break - # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(collection.aweme_id) max_cursor = collection.max_cursor @@ -763,6 +759,11 @@ async def fetch_user_collects( params = UserCollects(cursor=max_cursor, count=page_counts) response = await crawler.fetch_user_collects(params) collects = UserCollectsFilter(response) + yield collects + + if not collects.has_more: + logger.info(_("所有收藏夹ID采集完毕")) + break logger.debug( _("收藏夹ID: {0} 收藏夹标题: {1}").format( @@ -771,12 +772,6 @@ async def fetch_user_collects( ) logger.debug("===================================") - yield collects - - if not collects.has_more: - logger.info(_("所有收藏夹ID采集完毕")) - break - # 更新已经处理的收藏夹数量 (Update the number of collections processed) collected += len(collects.collects_id) max_cursor = collects.max_cursor @@ -832,37 +827,25 @@ async def fetch_user_collects_videos( ) response = await crawler.fetch_user_collects_video(params) video = UserCollectionFilter(response) + yield video - logger.debug( - "是否有作品: {0} 是否有更多: {1}".format( - video.has_aweme, video.has_more - ) - ) - if video.has_aweme: + if not video.has_aweme: + logger.debug(_("第 {0} 页没有找到作品").format(max_cursor)) if not video.has_more: logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) - yield video break - else: - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) - logger.debug( - _("作品ID: {0} 作品文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname - ) - ) - logger.debug("===================================") - yield video + logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug( + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + video.aweme_id, video.desc, video.nickname + ) + ) + logger.debug("===================================") - # 更新已经处理的作品数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - max_cursor = video.max_cursor - else: - logger.debug(_("{0} 页没有找到作品").format(max_cursor)) - if not video.has_more: - logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) - break - max_cursor = video.max_cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(video.aweme_id) + max_cursor = video.max_cursor # 避免请求过于频繁 logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) @@ -945,6 +928,11 @@ async def fetch_user_mix_videos( ) response = await crawler.fetch_user_mix(params) mix = UserMixFilter(response) + yield mix + + if not mix.has_more: + logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) + break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( @@ -954,12 +942,6 @@ async def fetch_user_mix_videos( ) logger.debug("===================================") - yield mix - - if not mix.has_more: - logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) - break - # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(mix.aweme_id) max_cursor = mix.max_cursor @@ -1154,6 +1136,7 @@ async def fetch_user_feed_videos( ) response = await crawler.fetch_user_post(params) feed = UserPostFilter(response) + yield feed if not feed.has_aweme: logger.debug(_("{0} 页没有找到作品").format(max_cursor)) @@ -1172,8 +1155,6 @@ async def fetch_user_feed_videos( ) logger.debug("===================================") - yield feed - # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(feed.aweme_id) max_cursor = feed.max_cursor @@ -1238,13 +1219,10 @@ async def fetch_user_following( ) response = await crawler.fetch_user_following(params) following = UserFollowingFilter(response) + yield following - if following.status_code != 0: - logger.error( - _("错误代码:{0} 错误消息:{1}").format( - following.status_code, following.status_msg - ) - ) + if not following.has_more: + logger.info(_("用户:{0} 所有关注用户采集完毕").format(sec_user_id)) break logger.info(_("当前请求的offset:{0}").format(offset)) @@ -1259,12 +1237,6 @@ async def fetch_user_following( ) logger.debug("===================================") - yield following - - if not following.has_more: - logger.info(_("用户:{0} 所有关注用户采集完毕").format(sec_user_id)) - break - # 更新已经处理的用户数量 (Update the number of users processed) users_collected += len(following.sec_uid) offset = following.offset @@ -1329,13 +1301,10 @@ async def fetch_user_follower( ) response = await crawler.fetch_user_follower(params) follower = UserFollowerFilter(response) + yield follower - if follower.status_code != 0: - logger.error( - _("错误代码:{0} 错误消息:{1}").format( - follower.status_code, follower.status_msg - ) - ) + if not follower.has_more: + logger.info(_("用户:{0} 所有粉丝采集完毕").format(sec_user_id)) break logger.info( @@ -1349,12 +1318,6 @@ async def fetch_user_follower( ) logger.debug("===================================") - yield follower - - if not follower.has_more: - logger.info(_("用户:{0} 所有粉丝采集完毕").format(sec_user_id)) - break - # 更新已经处理的用户数量 (Update the number of users processed) users_collected += len(follower.sec_uid) offset = follower.offset From c6bb3db49b87718148ea3a2e0344312c909b327d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 31 Mar 2024 22:32:15 +0800 Subject: [PATCH 140/164] =?UTF-8?q?perf:=20=E7=BB=99=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E6=8F=90=E4=BE=9B=E9=BB=98=E8=AE=A4=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit max_cursor: int = 0 page_counts: int = 20 max_counts: int = None --- f2/apps/douyin/handler.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index ffeb7e4..da0cac4 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -267,9 +267,9 @@ async def handle_user_post(self): async def fetch_user_post_videos( self, sec_user_id: str, - max_cursor: int, - page_counts: int, - max_counts: int, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, ) -> AsyncGenerator[UserPostFilter, Any]: """ 用于获取指定用户发布的作品列表。 @@ -374,9 +374,9 @@ async def handle_user_like(self): async def fetch_user_like_videos( self, sec_user_id: str, - max_cursor: int, - page_counts: int, - max_counts: int, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, ) -> AsyncGenerator[UserPostFilter, Any]: """ 用于获取指定用户喜欢的作品列表。 @@ -476,9 +476,9 @@ async def handle_user_music_collection(self): async def fetch_user_music_collection( self, - max_cursor: int, - page_counts: int, - max_counts: int, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, ) -> AsyncGenerator[UserMusicCollectionFilter, Any]: """ 用于获取指定用户收藏的音乐作品列表。 @@ -726,9 +726,9 @@ async def select_user_collects( async def fetch_user_collects( self, - max_cursor: int, - page_counts: int, - max_counts: int, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, ) -> AsyncGenerator[UserCollectsFilter, Any]: """ 用于获取指定用户收藏夹。 @@ -785,9 +785,9 @@ async def fetch_user_collects( async def fetch_user_collects_videos( self, collects_id: str, - max_cursor: int, - page_counts: int, - max_counts: int, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, ) -> AsyncGenerator[UserCollectionFilter, Any]: """ 用于获取指定用户收藏夹的作品列表。 @@ -1095,9 +1095,9 @@ async def handle_user_feed(self): async def fetch_user_feed_videos( self, sec_user_id: str, - max_cursor: int, - page_counts: int, - max_counts: int, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, ) -> AsyncGenerator[UserPostFilter, Any]: """ 用于获取指定用户feed的作品列表。 From 66c749c9bd3aece0d21a9d0250b4a7210d6e8ee7 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 31 Mar 2024 22:39:40 +0800 Subject: [PATCH 141/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E8=BE=93=E5=87=BA=E7=BA=A7=E5=88=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 77 ++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index da0cac4..68f14b2 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -219,7 +219,7 @@ async def fetch_one_video( video: PostDetailFilter: 单个作品数据过滤器 """ - logger.debug(_("开始爬取作品:{0}").format(aweme_id)) + logger.info(_("开始爬取作品:{0}").format(aweme_id)) async with DouyinCrawler(self.kwargs) as crawler: params = PostDetail(aweme_id=aweme_id) response = await crawler.fetch_post_detail(params) @@ -287,7 +287,7 @@ async def fetch_user_post_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 发布的作品").format(sec_user_id)) + logger.info(_("开始爬取用户:{0} 发布的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -298,7 +298,7 @@ async def fetch_user_post_videos( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserPost( @@ -311,9 +311,9 @@ async def fetch_user_post_videos( yield video if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品").format(max_cursor)) + logger.info(_("{0} 页没有找到作品").format(max_cursor)) if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) + logger.info(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = video.max_cursor @@ -394,7 +394,7 @@ async def fetch_user_like_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户:{0} 喜欢的作品").format(sec_user_id)) + logger.info(_("开始爬取用户:{0} 喜欢的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -405,7 +405,7 @@ async def fetch_user_like_videos( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserLike( @@ -418,9 +418,9 @@ async def fetch_user_like_videos( yield like if not like.has_aweme: - logger.debug(_("{0} 页没有找到作品").format(max_cursor)) + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) if not like.has_more: - logger.debug(_("用户:{0} 所有作品采集完毕").format(sec_user_id)) + logger.info(_("用户:{0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = like.max_cursor @@ -442,7 +442,7 @@ async def fetch_user_like_videos( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个点赞作品").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个点赞作品").format(videos_collected)) @mode_handler("music") async def handle_user_music_collection(self): @@ -495,7 +495,7 @@ async def fetch_user_music_collection( max_counts = max_counts or float("inf") music_collected = 0 - logger.debug(_("开始爬取用户收藏的音乐作品")) + logger.info(_("开始爬取用户收藏的音乐作品")) while music_collected < max_counts: current_request_size = min(page_counts, max_counts - music_collected) @@ -506,7 +506,7 @@ async def fetch_user_music_collection( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserMusicCollection( @@ -517,7 +517,7 @@ async def fetch_user_music_collection( yield music if not music.has_more: - logger.debug(_("用户收藏的音乐作品采集完毕")) + logger.info(_("用户收藏的音乐作品采集完毕")) break logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) @@ -536,7 +536,7 @@ async def fetch_user_music_collection( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个音乐作品").format(music_collected)) + logger.info(_("爬取结束,共爬取 {0} 个音乐作品").format(music_collected)) @mode_handler("collection") async def handle_user_collection(self): @@ -592,7 +592,7 @@ async def fetch_user_collection_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户收藏的作品")) + logger.info(_("开始爬取用户收藏的作品")) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -603,7 +603,7 @@ async def fetch_user_collection_videos( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserCollection(cursor=max_cursor, count=current_request_size) @@ -612,7 +612,7 @@ async def fetch_user_collection_videos( yield collection if not collection.has_more: - logger.debug(_("用户收藏的作品采集完毕")) + logger.info(_("用户收藏的作品采集完毕")) break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) @@ -631,7 +631,7 @@ async def fetch_user_collection_videos( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个收藏作品").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个收藏作品").format(videos_collected)) @mode_handler("collects") async def handle_user_collects(self): @@ -746,8 +746,9 @@ async def fetch_user_collects( max_counts = max_counts or float("inf") collected = 0 + logger.info(_("开始爬取用户收藏夹")) + while collected < max_counts: - logger.debug(_("开始爬取用户收藏夹")) logger.debug("===================================") logger.debug( _("当前请求的max_cursor: {0}, max_counts: {1}").format( @@ -780,7 +781,7 @@ async def fetch_user_collects( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个收藏夹").format(collected)) + logger.info(_("爬取结束,共爬取 {0} 个收藏夹").format(collected)) async def fetch_user_collects_videos( self, @@ -806,7 +807,7 @@ async def fetch_user_collects_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取收藏夹: {0} 的作品").format(collects_id)) + logger.info(_("开始爬取收藏夹: {0} 的作品").format(collects_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -817,7 +818,7 @@ async def fetch_user_collects_videos( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserCollectsVideo( @@ -830,9 +831,9 @@ async def fetch_user_collects_videos( yield video if not video.has_aweme: - logger.debug(_("第 {0} 页没有找到作品").format(max_cursor)) + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) if not video.has_more: - logger.debug(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) + logger.info(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) @@ -851,7 +852,7 @@ async def fetch_user_collects_videos( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("mix") async def handle_user_mix(self): @@ -909,7 +910,7 @@ async def fetch_user_mix_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取合集: {0} 的作品").format(mix_id)) + logger.info(_("开始爬取合集: {0} 的作品").format(mix_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -920,7 +921,7 @@ async def fetch_user_mix_videos( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserMix( @@ -931,7 +932,7 @@ async def fetch_user_mix_videos( yield mix if not mix.has_more: - logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) + logger.info(_("合集: {0} 所有作品采集完毕").format(mix_id)) break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) @@ -950,7 +951,7 @@ async def fetch_user_mix_videos( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个合集作品").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个合集作品").format(videos_collected)) @mode_handler("live") async def handle_user_live(self): @@ -999,7 +1000,7 @@ async def fetch_user_live_videos( sub-partition, anchor nickname) """ - logger.debug(_("开始爬取直播: {0} 的数据").format(webcast_id)) + logger.info(_("开始爬取直播: {0} 的数据").format(webcast_id)) logger.debug("===================================") async with DouyinCrawler(self.kwargs) as crawler: @@ -1018,7 +1019,7 @@ async def fetch_user_live_videos( ) ) logger.debug("===================================") - logger.debug(_("直播信息爬取结束")) + logger.info(_("直播信息爬取结束")) return live @@ -1039,7 +1040,7 @@ async def fetch_user_live_videos_by_room_id( anchor nickname) """ - logger.debug(_("开始爬取房间号: {0} 的数据").format(room_id)) + logger.info(_("开始爬取房间号: {0} 的数据").format(room_id)) logger.debug("===================================") async with DouyinCrawler(self.kwargs) as crawler: @@ -1062,7 +1063,7 @@ async def fetch_user_live_videos_by_room_id( ) ) logger.debug("===================================") - logger.debug(_("直播信息爬取结束")) + logger.info(_("直播信息爬取结束")) return live @@ -1115,7 +1116,7 @@ async def fetch_user_feed_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} feed的作品").format(sec_user_id)) + logger.info(_("开始爬取用户: {0} feed的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) @@ -1126,7 +1127,7 @@ async def fetch_user_feed_videos( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserPost( @@ -1139,9 +1140,9 @@ async def fetch_user_feed_videos( yield feed if not feed.has_aweme: - logger.debug(_("{0} 页没有找到作品").format(max_cursor)) + logger.info(_("{0} 页没有找到作品").format(max_cursor)) if not feed.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) + logger.info(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = feed.max_cursor @@ -1163,7 +1164,7 @@ async def fetch_user_feed_videos( logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取 {0} 个首页推荐作品").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个首页推荐作品").format(videos_collected)) async def fetch_user_following( self, From 8a5bb940639db794b522ce8d27a384ac3202aeea Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 31 Mar 2024 22:51:30 +0800 Subject: [PATCH 142/164] =?UTF-8?q?style:=20=E4=BF=AE=E6=94=B9=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E8=BE=93=E5=87=BA=E6=B6=88=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/dl.py | 2 +- f2/apps/douyin/handler.py | 4 ++-- f2/apps/tiktok/dl.py | 2 +- f2/apps/tiktok/handler.py | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index 3cfb48f..e2224dd 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -178,7 +178,7 @@ async def handler_download( # 检查作品是否被屏蔽 if aweme_prohibited: - logger.warning(_("{0} 该作品已被屏蔽,无法下载").format(aweme_id)) + logger.warning(_("该 {0} 作品已被屏蔽,无法下载").format(aweme_id)) return # 检查作品是否可见 diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 68f14b2..c8295bf 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -311,7 +311,7 @@ async def fetch_user_post_videos( yield video if not video.has_aweme: - logger.info(_("{0} 页没有找到作品").format(max_cursor)) + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) if not video.has_more: logger.info(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break @@ -1140,7 +1140,7 @@ async def fetch_user_feed_videos( yield feed if not feed.has_aweme: - logger.info(_("{0} 页没有找到作品").format(max_cursor)) + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) if not feed.has_more: logger.info(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break diff --git a/f2/apps/tiktok/dl.py b/f2/apps/tiktok/dl.py index 04cc11b..8555967 100644 --- a/f2/apps/tiktok/dl.py +++ b/f2/apps/tiktok/dl.py @@ -172,7 +172,7 @@ async def handler_download( # 检查作品是否被屏蔽 if aweme_privateItem: - logger.warning(_("{0} 该作品已被屏蔽,无法下载").format(aweme_id)) + logger.warning(_("该 {0} 作品已被屏蔽,无法下载").format(aweme_id)) return # 检查作品是否可见 diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index f9ab81b..3f1c358 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -363,7 +363,7 @@ async def fetch_user_post_videos( video = UserPostFilter(response) if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) if not video.hasMore and str(video.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break @@ -471,7 +471,7 @@ async def fetch_user_like_videos( break else: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) if not like.hasMore and str(like.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) @@ -567,7 +567,7 @@ async def fetch_user_collect_videos( break else: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) if not collect.hasMore and str(collect.api_status_code) == "0": logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) @@ -669,7 +669,7 @@ async def fetch_user_mix_videos( break else: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) if not mix.hasMore and str(mix.api_status_code) == "0": logger.debug(_("合辑: {0} 所有作品采集完毕").format(mixId)) From 734f8b7a692e5a66030c45eac9618c21eb061ced Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 31 Mar 2024 22:54:36 +0800 Subject: [PATCH 143/164] =?UTF-8?q?docs:=20=E4=BF=AE=E6=94=B9=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E4=BB=A3=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/user-follower.py | 14 ++++++++++---- docs/snippets/douyin/user-following.py | 20 +++++++++++++------- docs/snippets/douyin/user-like.py | 4 ++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/snippets/douyin/user-follower.py b/docs/snippets/douyin/user-follower.py index 017c683..f0e6a66 100644 --- a/docs/snippets/douyin/user-follower.py +++ b/docs/snippets/douyin/user-follower.py @@ -25,11 +25,17 @@ async def main(): # max_time=1668606509, # min_time=0, ): - logger.info( - "用户ID:{0} 用户昵称:{1} 用户作品数:{2}".format( - follower.sec_uid, follower.nickname, follower.aweme_count + if follower.status_code != 0: + logger.erro("错误代码:{0} 错误消息:{1}").format( + follower.status_code, follower.status_msg ) - ) + else: + logger.info( + "用户ID:{0} 用户昵称:{1} 用户作品数:{2}".format( + follower.sec_uid, follower.nickname, follower.aweme_count + ) + ) + # print("=================_to_raw================") # print(follower._to_raw()) # print("=================_to_dict===============") diff --git a/docs/snippets/douyin/user-following.py b/docs/snippets/douyin/user-following.py index cd0accf..9fa39f1 100644 --- a/docs/snippets/douyin/user-following.py +++ b/docs/snippets/douyin/user-following.py @@ -22,14 +22,20 @@ async def main(): async for following in DouyinHandler(kwargs).fetch_user_following( sec_user_id=sec_user_id ): - logger.info( - "用户ID:{0} 用户昵称:{1} 用户作品数:{2} 额外内容:{3}".format( - following.sec_uid, - following.nickname, - following.aweme_count, - following.secondary_text, + if following.status_code != 0: + logger.error_("错误代码:{0} 错误消息:{1}").format( + following.status_code, following.status_msg ) - ) + else: + logger.info( + "用户ID:{0} 用户昵称:{1} 用户作品数:{2} 额外内容:{3}".format( + following.sec_uid, + following.nickname, + following.aweme_count, + following.secondary_text, + ) + ) + # print("=================_to_raw================") # print(following._to_raw()) # print("=================_to_dict===============") diff --git a/docs/snippets/douyin/user-like.py b/docs/snippets/douyin/user-like.py index f0e66e9..e48775d 100644 --- a/docs/snippets/douyin/user-like.py +++ b/docs/snippets/douyin/user-like.py @@ -13,8 +13,8 @@ async def main(): - sec_user_id = "MS4wLjABAAAA5sofqwkCjeZqwtTMs00E5HAg8udRR-warVgfPykwwgk" # 替换开放喜欢列表的sec_user_id - + sec_user_id = "MS4wLjABAAAAW9FWcqS7RdQAWPd2AA5fL_ilmqsIFUCQ_Iym6Yh9_cUa6ZRqVLjVQSUjlHrfXY1Y" # 开放喜欢列表的sec_user_id + # sec_user_id = "MS4wLjABAAAAkA9Zsx7wNHUWse8xwUt9zzlAUfZ-7ZOBMbPzKhkDYEjUd-f4qS_DM6fNyxP_-9l2" # 未开放喜欢列表的sec_user_id async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( sec_user_id, 0, 10, 20 ): From c189b5a754bcf164b40530e5897c4f9efdce5c0d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Sun, 31 Mar 2024 22:56:40 +0800 Subject: [PATCH 144/164] =?UTF-8?q?docs:=20=E6=B7=BB=E5=8A=A0=E6=8A=96?= =?UTF-8?q?=E9=9F=B3=E7=94=A8=E6=88=B7=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E7=89=87=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/snippets/douyin/user-collects.py | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 docs/snippets/douyin/user-collects.py diff --git a/docs/snippets/douyin/user-collects.py b/docs/snippets/douyin/user-collects.py new file mode 100644 index 0000000..fb609d1 --- /dev/null +++ b/docs/snippets/douyin/user-collects.py @@ -0,0 +1,29 @@ +import asyncio +from f2.apps.douyin.handler import DouyinHandler + +kwargs = { + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Referer": "https://www.douyin.com/", + }, + "proxies": {"http": None, "https": None}, + "timeout": 10, + "cookie": "YOUR_COOKIE_HERE", +} + + +async def main(): + collects_id = "" # 收藏夹ID + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collects_videos( + collects_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) + + +if __name__ == "__main__": + asyncio.run(main()) From e67da44a2f293fffb4809bd1fdc735d9f9624292 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 2 Apr 2024 00:24:04 +0800 Subject: [PATCH 145/164] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E8=87=AA?= =?UTF-8?q?=E5=AE=9A=E4=B9=89UA=E7=94=9F=E6=88=90XBogus=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 并使用black格式化 --- f2/utils/xbogus.py | 102 +++++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/f2/utils/xbogus.py b/f2/utils/xbogus.py index 815c9a8..d23aac7 100644 --- a/f2/utils/xbogus.py +++ b/f2/utils/xbogus.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- -''' +""" @Description:xbogus.py @Date :2023/02/09 00:29:30 @Author :JohnserfSeed -@version :0.0.1 +@version :0.0.2 @License :Apache License 2.0 @Github :https://github.com/johnserf-seed @Mail :johnserf-seed@foxmail.com @@ -12,15 +12,18 @@ Change Log : 2023/02/09 00:29:30 - Create XBogus class 2023/06/07 17:26:02 - Refactor the XB algorithm using Python. +2024/04/01 00:32:30 - Black Code Style & Support custom ua ------------------------------------------------- -''' +""" import time +import base64 import hashlib -class XBogus: - def __init__(self) -> None: +class XBogus: + def __init__(self, user_agent: str = None) -> None: + # fmt: off self.Array = [ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, @@ -30,7 +33,13 @@ def __init__(self) -> None: None, None, None, None, None, None, None, None, None, None, None, None, 10, 11, 12, 13, 14, 15 ] self.character = "Dkdpgh4ZKsQB80/Mfvw36XI1R25-WUAlEi7NLboqYTOPuzmFjJnryx9HVGcaStCe=" - + # fmt: on + self.ua_key = b"\x00\x01\x0c" + self.user_agent = ( + user_agent + if user_agent is not None and user_agent != "" + else "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0" + ) def md5_str_to_array(self, md5_str): """ @@ -43,20 +52,23 @@ def md5_str_to_array(self, md5_str): array = [] idx = 0 while idx < len(md5_str): - array.append((self.Array[ord(md5_str[idx])] << 4) | self.Array[ord(md5_str[idx + 1])]) + array.append( + (self.Array[ord(md5_str[idx])] << 4) + | self.Array[ord(md5_str[idx + 1])] + ) idx += 2 return array - def md5_encrypt(self, url_path): """ 使用多轮md5哈希算法对URL路径进行加密。 Encrypt the URL path using multiple rounds of md5 hashing. """ - hashed_url_path = self.md5_str_to_array(self.md5(self.md5_str_to_array(self.md5(url_path)))) + hashed_url_path = self.md5_str_to_array( + self.md5(self.md5_str_to_array(self.md5(url_path))) + ) return hashed_url_path - def md5(self, input_data): """ 计算输入数据的md5哈希值。 @@ -73,8 +85,9 @@ def md5(self, input_data): md5_hash.update(bytes(array)) return md5_hash.hexdigest() - - def encoding_conversion(self, a, b, c, e, d, t, f, r, n, o, i, _, x, u, s, l, v, h, p): + def encoding_conversion( + self, a, b, c, e, d, t, f, r, n, o, i, _, x, u, s, l, v, h, p + ): """ 第一次编码转换。 Perform encoding conversion. @@ -82,10 +95,9 @@ def encoding_conversion(self, a, b, c, e, d, t, f, r, n, o, i, _, x, u, s, l, v, y = [a] y.append(int(i)) y.extend([b, _, c, x, e, u, d, s, t, l, f, v, r, h, n, p, o]) - re = bytes(y).decode('ISO-8859-1') + re = bytes(y).decode("ISO-8859-1") return re - def encoding_conversion2(self, a, b, c): """ 第二次编码转换。 @@ -93,7 +105,6 @@ def encoding_conversion2(self, a, b, c): """ return chr(a) + chr(b) + c - def rc4_encrypt(self, key, data): """ 使用RC4算法对数据进行加密。 @@ -121,7 +132,6 @@ def rc4_encrypt(self, key, data): return encrypted_data - def calculation(self, a1, a2, a3): """ 对给定的输入值执行位运算计算,并返回结果。 @@ -130,18 +140,30 @@ def calculation(self, a1, a2, a3): x1 = (a1 & 255) << 16 x2 = (a2 & 255) << 8 x3 = x1 | x2 | a3 - return self.character[(x3 & 16515072) >> 18] + self.character[(x3 & 258048) >> 12] + self.character[(x3 & 4032) >> 6] + self.character[ - x3 & 63] - + return ( + self.character[(x3 & 16515072) >> 18] + + self.character[(x3 & 258048) >> 12] + + self.character[(x3 & 4032) >> 6] + + self.character[x3 & 63] + ) def getXBogus(self, url_path): """ 获取 X-Bogus 值。 Get the X-Bogus value. """ - array1 = self.md5_str_to_array("d88201c9344707acde7261b158656c0e") + + array1 = self.md5_str_to_array( + self.md5( + base64.b64encode( + self.rc4_encrypt(self.ua_key, self.user_agent.encode("ISO-8859-1")) + ).decode("ISO-8859-1") + ) + ) + array2 = self.md5_str_to_array( - self.md5(self.md5_str_to_array("d41d8cd98f00b204e9800998ecf8427e"))) + self.md5(self.md5_str_to_array("d41d8cd98f00b204e9800998ecf8427e")) + ) url_path_array = self.md5_encrypt(url_path) timer = int(time.time()) @@ -149,17 +171,16 @@ def getXBogus(self, url_path): array3 = [] array4 = [] xb_ = "" - + # fmt: off new_array = [ - 64, 0.00390625, 1, 8, + 64, 0.00390625, 1, 12, url_path_array[14], url_path_array[15], array2[14], array2[15], array1[14], array1[15], timer >> 24 & 255, timer >> 16 & 255, timer >> 8 & 255, timer & 255, ct >> 24 & 255, ct >> 16 & 255, ct >> 8 & 255, ct & 255 ] - + # fmt: on xor_result = new_array[0] for i in range(1, len(new_array)): - # a = xor_result b = new_array[i] if isinstance(b, float): b = int(b) @@ -179,21 +200,32 @@ def getXBogus(self, url_path): merge_array = array3 + array4 garbled_code = self.encoding_conversion2( - 2, 255, self.rc4_encrypt("ÿ".encode('ISO-8859-1'), self.encoding_conversion(*merge_array).encode('ISO-8859-1')).decode('ISO-8859-1')) + 2, + 255, + self.rc4_encrypt( + "ÿ".encode("ISO-8859-1"), + self.encoding_conversion(*merge_array).encode("ISO-8859-1"), + ).decode("ISO-8859-1"), + ) idx = 0 while idx < len(garbled_code): - xb_ += self.calculation(ord(garbled_code[idx]), ord( - garbled_code[idx + 1]), ord(garbled_code[idx + 2])) + xb_ += self.calculation( + ord(garbled_code[idx]), + ord(garbled_code[idx + 1]), + ord(garbled_code[idx + 2]), + ) idx += 3 - self.params = '%s&X-Bogus=%s' % (url_path, xb_) + self.params = "%s&X-Bogus=%s" % (url_path, xb_) self.xb = xb_ - return (self.params, self.xb) + return (self.params, self.xb, self.user_agent) + +if __name__ == "__main__": + url_path = "https://www.douyin.com/aweme/v1/web/aweme/post/?device_platform=webapp&aid=6383&channel=channel_pc_web&sec_user_id=MS4wLjABAAAAW9FWcqS7RdQAWPd2AA5fL_ilmqsIFUCQ_Iym6Yh9_cUa6ZRqVLjVQSUjlHrfXY1Y&max_cursor=0&locate_query=false&show_live_replay_strategy=1&need_time_list=1&time_list_query=0&whale_cut_token=&cut_version=1&count=18&publish_video_strategy_type=2&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Edge&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=12&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7335414539335222835&msToken=p9Y7fUBuq9DKvAuN27Peml6JbaMqG2ZcXfFiyDv1jcHrCN00uidYqUgSuLsKl1onC-E_n82m-aKKYE0QGEmxIWZx9iueQ6WLbvzPfqnMk4GBAlQIHcDzxb38FLXXQxAm" + # ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0" + ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" -if __name__ == '__main__': - url_path = "aweme_id=7196239141472980280&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333" - print("url:", url_path) - XB = XBogus() + XB = XBogus(user_agent=ua) xbogus = XB.getXBogus(url_path) - print("xbogus:", xbogus[1]) \ No newline at end of file + print(f"url: {xbogus[0]}, xbogus:{xbogus[1]}, ua: {xbogus[2]}") From f81343feaee74540e8f7c9b30e066f3103ca50c4 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 2 Apr 2024 00:25:27 +0800 Subject: [PATCH 146/164] =?UTF-8?q?perf:=20=E6=8A=96=E9=9F=B3XBogusManager?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0UA=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 2e927d9..fad2ed8 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -197,23 +197,32 @@ def gen_s_v_web_id(cls) -> str: class XBogusManager: @classmethod - def str_2_endpoint(cls, endpoint: str) -> str: + def str_2_endpoint( + cls, + user_agent: str, + endpoint: str, + ) -> str: try: - final_endpoint = XB().getXBogus(endpoint) + final_endpoint = XB(user_agent).getXBogus(endpoint) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) return final_endpoint[0] @classmethod - def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: + def model_2_endpoint( + cls, + user_agent: str, + base_endpoint: str, + params: dict, + ) -> str: if not isinstance(params, dict): raise TypeError(_("参数必须是字典类型")) param_str = "&".join([f"{k}={v}" for k, v in params.items()]) try: - xb_value = XB().getXBogus(param_str) + xb_value = XB(user_agent).getXBogus(param_str) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) From 3fc19000dd5a6f6e97a00395c314ee4da587471a Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 2 Apr 2024 00:25:51 +0800 Subject: [PATCH 147/164] =?UTF-8?q?perf:=20=E6=8A=96=E9=9F=B3crwaler?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0UA=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/crawler.py | 110 +++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 32 deletions(-) diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 56c98e7..da37917 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -51,106 +51,136 @@ def __init__( async def fetch_user_profile(self, params: UserProfile): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_DETAIL, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.USER_DETAIL, + params.dict(), + ) logger.debug(_("用户信息接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_POST, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.USER_POST, + params.dict(), + ) logger.debug(_("主页作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_FAVORITE_A, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_FAVORITE_A, + params.dict(), ) logger.debug(_("主页喜欢作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collection(self, params: UserCollection): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_COLLECTION, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_COLLECTION, + params.dict(), ) logger.debug(_("主页收藏作品接口地址:{0}").format(endpoint)) return await self._fetch_post_json(endpoint, params.dict()) async def fetch_user_collects(self, params: UserCollects): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_COLLECTS, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_COLLECTS, + params.dict(), ) logger.debug(_("收藏夹接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collects_video(self, params: UserCollectsVideo): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_COLLECTS_VIDEO, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_COLLECTS_VIDEO, + params.dict(), ) logger.debug(_("收藏夹作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_music_collection(self, params: UserMusicCollection): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_MUSIC_COLLECTION, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_MUSIC_COLLECTION, + params.dict(), ) logger.debug(_("音乐收藏接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.MIX_AWEME, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.MIX_AWEME, + params.dict(), + ) logger.debug(_("合集作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_detail(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.POST_DETAIL, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.POST_DETAIL, + params.dict(), + ) logger.debug(_("作品详情接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_comment(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.POST_COMMENT, params.dict() + self.headers.get("User-Agent"), + dyendpoint.POST_COMMENT, + params.dict(), ) logger.debug(_("作品评论接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.TAB_FEED, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.TAB_FEED, + params.dict(), + ) logger.debug(_("首页推荐作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_follow_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.FOLLOW_FEED, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.FOLLOW_FEED, + params.dict(), + ) logger.debug(_("关注作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_friend_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.FRIEND_FEED, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.FRIEND_FEED, + params.dict(), + ) logger.debug(_("朋友作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_related(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.POST_RELATED, params.dict() + self.headers.get("User-Agent"), + dyendpoint.POST_RELATED, + params.dict(), ) logger.debug(_("相关推荐作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_live(self, params: UserLive): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.LIVE_INFO, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.LIVE_INFO, + params.dict(), + ) logger.debug(_("直播接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) @@ -160,7 +190,9 @@ async def fetch_live_room_id(self, params: UserLive2): # 避免invalid session self.aclient.headers.update({"Cookie": ""}) endpoint = XBogusManager.model_2_endpoint( - dyendpoint.LIVE_INFO_ROOM_ID, params.dict() + self.headers.get("User-Agent"), + dyendpoint.LIVE_INFO_ROOM_ID, + params.dict(), ) logger.debug(_("直播接口地址(room_id):{0}").format(endpoint)) return await self._fetch_get_json(endpoint) @@ -169,49 +201,63 @@ async def fetch_live_room_id(self, params: UserLive2): async def fetch_follow_live(self, params: FollowUserLive): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.FOLLOW_USER_LIVE, params.dict() + self.headers.get("User-Agent"), + dyendpoint.FOLLOW_USER_LIVE, + params.dict(), ) logger.debug(_("关注用户直播接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_locate_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.LOCATE_POST, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + dyendpoint.LOCATE_POST, + params.dict(), + ) logger.debug(_("定位上一次作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_login_qrcode(self, parms: LoginGetQr): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.SSO_LOGIN_GET_QR, parms.dict() + self.headers.get("User-Agent"), + dyendpoint.SSO_LOGIN_GET_QR, + parms.dict(), ) logger.debug(_("SSO获取二维码接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_check_qrcode(self, parms: LoginCheckQr): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.SSO_LOGIN_CHECK_QR, parms.dict() + self.headers.get("User-Agent"), + dyendpoint.SSO_LOGIN_CHECK_QR, + parms.dict(), ) logger.debug(_("SSO检查扫码状态接口地址:{0}").format(endpoint)) return await self._fetch_response(endpoint) async def fetch_check_login(self, parms: LoginCheckQr): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.SSO_LOGIN_CHECK_LOGIN, parms.dict() + self.headers.get("User-Agent"), + dyendpoint.SSO_LOGIN_CHECK_LOGIN, + parms.dict(), ) logger.debug(_("SSO检查登录状态接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_following(self, params: UserFollowing): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_FOLLOWING, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_FOLLOWING, + params.dict(), ) logger.debug(_("用户关注列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_follower(self, params: UserFollower): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_FOLLOWER, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_FOLLOWER, + params.dict(), ) logger.debug(_("用户粉丝列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) From a0075d9be95d666bfc00c9eb596a72838148c6be Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 2 Apr 2024 00:27:55 +0800 Subject: [PATCH 148/164] =?UTF-8?q?perf:=20TikTok=20XBogusManager=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0UA=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/f2/apps/tiktok/utils.py b/f2/apps/tiktok/utils.py index 6eb8a4e..2688313 100644 --- a/f2/apps/tiktok/utils.py +++ b/f2/apps/tiktok/utils.py @@ -213,16 +213,25 @@ def gen_odin_tt(cls): class XBogusManager: @classmethod - def str_2_endpoint(cls, endpoint: str) -> str: + def str_2_endpoint( + cls, + user_agent: str, + endpoint: str, + ) -> str: try: - final_endpoint = XB().getXBogus(endpoint) + final_endpoint = XB(user_agent).getXBogus(endpoint) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) return final_endpoint[0] @classmethod - def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: + def model_2_endpoint( + cls, + user_agent: str, + base_endpoint: str, + params: dict, + ) -> str: # 检查params是否是一个字典 (Check if params is a dict) if not isinstance(params, dict): raise TypeError(_("参数必须是字典类型")) @@ -230,7 +239,7 @@ def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: param_str = "&".join([f"{k}={v}" for k, v in params.items()]) try: - xb_value = XB().getXBogus(param_str) + xb_value = XB(user_agent).getXBogus(param_str) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) From f2efcadc66d4b9c357096486649012a41fb9dd4c Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Tue, 2 Apr 2024 00:29:30 +0800 Subject: [PATCH 149/164] =?UTF-8?q?perf:=20=E6=8A=96=E9=9F=B3crwaler?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0UA=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/crawler.py | 44 +++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/f2/apps/tiktok/crawler.py b/f2/apps/tiktok/crawler.py index ae92374..cb65c29 100644 --- a/f2/apps/tiktok/crawler.py +++ b/f2/apps/tiktok/crawler.py @@ -43,63 +43,81 @@ def __init__( async def fetch_user_profile(self, params: UserProfile): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_DETAIL, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + tkendpoint.USER_DETAIL, + params.dict(), + ) logger.debug(_("用户信息接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_POST, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + tkendpoint.USER_POST, + params.dict(), + ) logger.debug(_("主页作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_LIKE, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + tkendpoint.USER_LIKE, + params.dict(), + ) logger.debug(_("喜欢作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collect(self, params: UserCollect): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_COLLECT, params.dict() + self.headers.get("User-Agent"), + tkendpoint.USER_COLLECT, + params.dict(), ) logger.debug(_("收藏作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_play_list(self, params: UserPlayList): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_PLAY_LIST, params.dict() + self.headers.get("User-Agent"), + tkendpoint.USER_PLAY_LIST, + params.dict(), ) logger.debug(_("合辑列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_MIX, params.dict() - ) # fmt: off + self.headers.get("User-Agent"), + tkendpoint.USER_MIX, + params.dict(), + ) logger.debug(_("合辑作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_detail(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.AWEME_DETAIL, params.dict() + self.headers.get("User-Agent"), + tkendpoint.AWEME_DETAIL, + params.dict(), ) logger.debug(_("作品详情接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_comment(self, params: PostComment): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.POST_COMMENT, params.dict() + self.headers.get("User-Agent"), + tkendpoint.POST_COMMENT, + params.dict(), ) logger.debug(_("作品评论接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_recommend(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.HOME_RECOMMEND, params.dict() + self.headers.get("User-Agent"), + tkendpoint.HOME_RECOMMEND, + params.dict(), ) logger.debug(_("首页推荐接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) From 021a4f58c9db5119e34267719811007f6cc6e873 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 18:00:48 +0800 Subject: [PATCH 150/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84api?= =?UTF-8?q?=E5=BC=82=E5=B8=B8=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/exceptions/api_exceptions.py | 45 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/f2/exceptions/api_exceptions.py b/f2/exceptions/api_exceptions.py index 069677e..9241f79 100644 --- a/f2/exceptions/api_exceptions.py +++ b/f2/exceptions/api_exceptions.py @@ -8,70 +8,71 @@ class APIError(Exception): """基本API异常类,其他API异常都会继承这个类""" - def __init__(self, status_code=None): - self.status_code = status_code + def __init__(self, message=None, status_code=None): exception_console.print( "请前往QA文档 https://johnserf-seed.github.io/f2/question-answer/qa.html 查看相关帮助" ) + self.status_code = status_code + super().__init__(message) - def display_error(self): - """显示错误信息和状态码(如果有的话)""" - return f"Error: {self.args[0]}." + ( - f" Status Code: {self.status_code}." if self.status_code else "" + def __str__(self): + """返回错误信息和文件路径(如果有的话)""" + return f"{super().__str__()}" + ( + f" Status Code: {self.status_code}" if self.status_code else "" ) class APIConnectionError(APIError): """当与API的连接出现问题时抛出""" - def display_error(self): - return f"API Connection Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIUnavailableError(APIError): """当API服务不可用时抛出,例如维护或超时""" - def display_error(self): - return f"API Unavailable Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APINotFoundError(APIError): """当API端点不存在时抛出""" - def display_error(self): - return f"API Not Found Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIResponseError(APIError): """当API返回的响应与预期不符时抛出""" - def display_error(self): - return f"API Response Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIRateLimitError(APIError): """当达到API的请求速率限制时抛出""" - def display_error(self): - return f"API Rate Limit Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APITimeoutError(APIError): """当API请求超时时抛出""" - def display_error(self): - return f"API Timeout Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIUnauthorizedError(APIError): """当API请求由于授权失败而被拒绝时抛出""" - def display_error(self): - return f"API Unauthorized Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIRetryExhaustedError(APIError): """当API请求重试次数用尽时抛出""" - def display_error(self): - return f"API Retry Exhausted Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) From 9fe53468b3d1cdb3c4f5bd7dfdac5aa5a73e8fb1 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 18:02:55 +0800 Subject: [PATCH 151/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=BC=82=E5=B8=B8=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/exceptions/file_exceptions.py | 41 +++++++++++++------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/f2/exceptions/file_exceptions.py b/f2/exceptions/file_exceptions.py index 3bbd5ef..6edb2b8 100644 --- a/f2/exceptions/file_exceptions.py +++ b/f2/exceptions/file_exceptions.py @@ -8,50 +8,41 @@ class FileError(Exception): """基本的文件错误异常类,其他文件异常都会继承这个类""" - def __init__(self, filepath=None): - self.filepath = filepath + def __init__(self, message, filepath=None): exception_console.print( "请前往QA文档 https://johnserf-seed.github.io/f2/question-answer/qa.html 查看相关帮助" ) + self.filepath = filepath + super().__init__(message) - def display_error(self): - """显示错误信息和文件路径(如果有的话)""" - return f"File Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __str__(self): + """返回错误信息和文件路径(如果有的话)""" + return f"{super().__str__()} Filepath: {self.filepath}" if self.filepath else "" -class FileNotFound(FileError, FileNotFoundError): +class FileNotFound(FileError): """文件不存在错误""" - def display_error(self): - return f"File Not Found Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message=None, filepath=None): + super().__init__(message, filepath) -class FilePermissionError(FileError, PermissionError): +class FilePermissionError(FileError): """文件权限错误""" - def display_error(self): - return f"File Permission Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message, filepath=None): + super().__init__(message, filepath) class FileReadError(FileError): """文件读取错误""" - def display_error(self): - return f"File Read Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message, filepath=None): + super().__init__(message, filepath) class FileWriteError(FileError): """文件写入错误""" - def display_error(self): - return f"File Write Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message, filepath=None): + super().__init__(message, filepath) From 3802f91044521c251bba058d4c537eb85a382728 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 18:08:13 +0800 Subject: [PATCH 152/164] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=BA=93=E5=BC=82=E5=B8=B8=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/exceptions/db_exceptions.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/f2/exceptions/db_exceptions.py b/f2/exceptions/db_exceptions.py index 38456a6..1edb871 100644 --- a/f2/exceptions/db_exceptions.py +++ b/f2/exceptions/db_exceptions.py @@ -8,46 +8,48 @@ class DatabaseError(Exception): """基本数据库异常类,其他数据库异常都会继承这个类""" - def __init__(self, *args, **kwargs): + def __init__(self, message=None, db=None): exception_console.print( "请前往QA文档 https://johnserf-seed.github.io/f2/question-answer/qa.html 查看相关帮助" ) + self.db = db + super().__init__(message) - def display_error(self): - """显示错误信息""" - return f"Database Error: {self.args[0]}." + def __str__(self): + """返回错误信息和db(如果有的话)""" + return f"{super().__str__()}" + (f" Database: {self.db}" if self.db else "") class DatabaseConnectionError(DatabaseError): """当与数据库的连接出现问题时抛出""" - def display_error(self): - return f"Database Connection Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class RecordNotFoundError(DatabaseError): """当在数据库中找不到预期的记录时抛出""" - def display_error(self): - return f"Record Not Found Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class MultipleRecordsFoundError(DatabaseError): """当期望找到一个记录但实际找到多个时抛出""" - def display_error(self): - return f"Multiple Records Found Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class DatabaseTimeoutError(DatabaseError): """当数据库操作超时时抛出""" - def display_error(self): - return f"Database Timeout Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class DatabaseConstraintError(DatabaseError): """当违反数据库约束时抛出,例如唯一性约束""" - def display_error(self): - return f"Database Constraint Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) From d6f940609cce8130bd8b683422dfe1755309cb8b Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 18:18:12 +0800 Subject: [PATCH 153/164] =?UTF-8?q?refactor:=20=E4=BF=AE=E6=94=B9=E9=87=8A?= =?UTF-8?q?=E6=94=BE=E5=BC=82=E5=B8=B8=E6=B6=88=E6=81=AF=E5=92=8C=E5=AE=8C?= =?UTF-8?q?=E5=96=84i18n=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/cli/cli_commands.py | 2 +- f2/crawlers/base_crawler.py | 28 +++++++++++++++------------- f2/utils/conf_manager.py | 17 +++++------------ 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/f2/cli/cli_commands.py b/f2/cli/cli_commands.py index c17a532..a1da310 100644 --- a/f2/cli/cli_commands.py +++ b/f2/cli/cli_commands.py @@ -134,7 +134,7 @@ def set_cli_config(ctx, **kwargs): try: asyncio.run(run_app(kwargs)) except APIError as e: - logger.error(e.display_error()) + logger.error(e) async def run_app(kwargs): diff --git a/f2/crawlers/base_crawler.py b/f2/crawlers/base_crawler.py index 791f7b0..bd8cb1b 100644 --- a/f2/crawlers/base_crawler.py +++ b/f2/crawlers/base_crawler.py @@ -175,7 +175,7 @@ async def get_fetch_data(self, url: str): self.handle_http_status_error(http_error, url, attempt + 1) except APIError as e: - e.display_error() + logger.error(e) async def post_fetch_data(self, url: str, params: dict = {}): """ @@ -223,7 +223,7 @@ async def post_fetch_data(self, url: str, params: dict = {}): self.handle_http_status_error(http_error, url, attempt + 1) except APIError as e: - e.display_error() + logger.error(e) async def head_fetch_data(self, url: str): """ @@ -252,7 +252,7 @@ async def head_fetch_data(self, url: str): self.handle_http_status_error(http_error, url, 1) except APIError as e: - e.display_error() + logger.error(e) def handle_http_status_error(self, http_error, url: str, attempt): """ @@ -277,31 +277,33 @@ def handle_http_status_error(self, http_error, url: str, attempt): if response is None or status_code is None: logger.error( - _("HTTP状态错误: {0}, URL: {1}, 尝试次数: {2}").format( + _("HTTP状态错误:{0}, URL:{1}, 尝试次数:{2}").format( http_error, url, attempt ) ) - raise APIResponseError(f"处理HTTP错误时遇到意外情况: {http_error}") + raise APIResponseError( + _("处理HTTP错误时遇到意外情况:{0}").format(http_error) + ) if status_code == 302: pass elif status_code == 404: - raise APINotFoundError(f"HTTP Status Code {status_code}") + raise APINotFoundError(_("HTTP状态码错误:"), status_code) elif status_code == 503: - raise APIUnavailableError(f"HTTP Status Code {status_code}") + raise APIUnavailableError(_("HTTP状态码错误:"), status_code) elif status_code == 408: - raise APITimeoutError(f"HTTP Status Code {status_code}") + raise APITimeoutError(_("HTTP状态码错误:"), status_code) elif status_code == 401: - raise APIUnauthorizedError(f"HTTP Status Code {status_code}") + raise APIUnauthorizedError(_("HTTP状态码错误:"), status_code) elif status_code == 429: - raise APIRateLimitError(f"HTTP Status Code {status_code}") + raise APIRateLimitError(_("HTTP状态码错误:"), status_code) else: logger.error( - _("HTTP状态错误: {0}, URL: {1}, 尝试次数: {2}").format( - status_code, url, attempt + _("HTTP状态错误:{0}, URL:{1}, 尝试次数:{2}").format( + http_error, url, attempt ) ) - raise APIResponseError(f"HTTP状态错误: {status_code}") + raise APIResponseError(_("HTTP状态码错误:"), status_code) async def close(self): await self.aclient.aclose() diff --git a/f2/utils/conf_manager.py b/f2/utils/conf_manager.py index f7cff44..c1dca20 100644 --- a/f2/utils/conf_manager.py +++ b/f2/utils/conf_manager.py @@ -1,7 +1,6 @@ # path: f2/utils/conf_manager.py import f2 -import time import yaml import click @@ -27,14 +26,10 @@ def __init__(self, filepath: str = f2.F2_CONFIG_FILE_PATH): def load_config(self) -> dict: """从文件中加载配置 (Load the conf from the file)""" - try: - if not self.filepath.exists(): - raise FileNotFound(_("'{0}' 配置文件路径不存在").format(self.filepath)) - return yaml.safe_load(self.filepath.read_text(encoding="utf-8")) or {} - except FileNotFound as e: - e.display_error() - time.sleep(2) - exit(0) + if not self.filepath.exists(): + raise FileNotFound(_("配置文件不存在"), self.filepath) + + return yaml.safe_load(self.filepath.read_text(encoding="utf-8")) or {} def get_config(self, app_name: str, default=None) -> dict: """ @@ -58,9 +53,7 @@ def save_config(self, config: dict): try: self.filepath.write_text(yaml.dump(config), encoding="utf-8") except PermissionError: - raise FilePermissionError( - _("'{0}' 配置文件路径无写权限").format(self.filepath) - ) + raise FilePermissionError(_("配置文件路径无写权限"), self.filepath) def backup_config(self): """在进行更改前备份配置文件 (Backup the conf file before making changes)""" From 315800f7d537032f25c77741a27e3fb39403469a Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 21:49:02 +0800 Subject: [PATCH 154/164] Create CHANGELOG.md --- CHANGELOG.md | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4a3351c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,293 @@ +# Changelog + +本项目的所有变更都将记录在此文件中。 +格式基于 [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)、 +本项目遵循 [Semantic Versioning](https://semver.org/spec/v2.0.0.html)。 + +## [Unreleased] + +- `0.0.1.6`版本中添加对`weibo`,`x`的支持 + +## [0.0.1.5] - 2024-04-04 + +### Added + +- 添加安全政策汇报 +- 添加`run_app`时输出版本号 +- 添加`douyin`用户收藏夹下载 +- 添加`douyin`的`filter`对非法收藏夹名字符的处理 +- 添加`douyin`用户音乐收藏下载 +- 添加`douyin`音乐歌词json转lrc方法 +- 添加`douyin`用户收藏音乐下载任务 +- 添加`douyin`配置`--lyric` +- 添加`f2 utils`的`get_cookie_from_browser`方法 +- 添加`f2 utils`的`check_invalid_naming`方法 +- 添加`f2 utils`的`merge_config`方法 +- 添加`douyin`粉丝用户接口方法 +- 添加`douyin`关注用户接口方法 +- 添加`douyin`,`tiktok`数据过滤器的原始字段 +- 添加对30位时间戳进行格式化 +- 添加测试抖音原声歌词转换 +- 添加获取抖音用户粉丝代码片段 +- 添加获取抖音用户关注代码片段 +- 添加`fetch`方法的`timeout`参数,避免请求过于频繁 +- 添加`douyin`用户收藏夹代码片段 +- 添加`自定义UA`生成`XBogus`参数 + +### Changed + +- 修改`douyin`主页收藏模式为`collection` +- 更正`douyin`文档`user-mix`方法 +- 修改`F2`版本号输出 +- 修改`douyin`,`tiktok`帮助信息 +- 优化`douyin`,`tiktok`的`utils`中`msToken`,`ttwid`,`sec_user_id`,`aweme_id`,`webcast_id`,具体请求错误的输出 +- 明确`douyin`,`tiktok`所有`fetch`函数返回为过滤器类型 +- 更新了F2版本号的导入 +- 优化`tiktok`的`handler`处理播放列表的逻辑 +- 优化`douyin`,`tiktok`中对具体请求错误的输出 +- 更正`douyin`,`tiktok`受`collects_id`类型导致的多次转换 +- 更正`tiktok`的`handler`多种获取用户信息方法的参数 +- 添加`base_downloader`对重命名文件时的异常处理 +- 更新`_dl`的`head`请求`Content-Length`失效时调用`get`方法 +- 更新`douyin`,`tiktok`接口文档代码片段 +- 更新`douyin`,`tiktok`在`cli`中的`handler_auto_cookie`方法 +- 更新`douyin`,`tiktok`在`cli`中的`handler_naming`方法 +- 更新`douyin`,`tiktok`的`--mode`统一`choice`管理 +- 更新`F2`帮助说明格式 +- 统一了`douyin`关注粉丝用户的`total`字段 +- 修改下载逻辑以提高性能 +- 更新`douyin`,`tiktok`数据库字段(需要删除旧数据库或迁移) +- 优化`douyin`,`tiktok`的`handler`模块注释表达与方法参数格式 +- 重构了所有`handle`方法的调用 +- 重构了所有`fetch`方法的返回类型 +- 调整`douyin` `mix`作品在没有更多数据时提前`break` +- 调整`tiktok`获取用户数据去除地区参数 +- 优化在适当的位置`yield`作品数据 +- 修改日志输出级别 +- 重构数据库异常类 +- 重构文件异常类 +- 重构接口异常类 +- 完善`i18n`消息 + +### Deprecated + +- 弃用`douyin` `UserLiveFilter`的无用方法 +- 弃用`douyin` `PostDetailFilter`的无用方法 + +### Removed + +- 删除文档旧版本`-d`指令 +- 移除`tiktok`的`post\detail`接口示例 +- 删除无用的`__init__.py`文件 +- 删除`douyin`,`tiktok`:`cli`下的`get_cookie_from_browser`方法 +- 删除`example`示例 +- 删除无用导入 +- 删除`apps`中db模块的`aiosqlite`导入与错误处理 + +### Fixed + +- 修复本地化服务 +- 修复`douyin`关注用户数据过滤器`_to_list`方法的排除字段 +- 修复`douyin`数据过滤器时间戳类型 + +### Security + +- 更新`rich`版本到`13.7.1` +- 更新`douyin`接口版本到`19.5.0` + + +## [0.0.1.4] - 2024-02-16 + +### Added + +- 添加`black`格式化白名单 +- 添加`douyin`,`tiktok`命令行对`--proxies`命令的支持 +- 添加`tiktok`数据库忽略字段 +- 添加文档QA页面 +- 添加`douyin`对`msToken`值验证 +- 添加写入配置文件时处理文件权限问题 +- 添加提取有效URL的错误类型 +- 添加`split_filename`方法处理不同系统下文件名长度 +- 添加`douyin`,`tiktok`:`cli`模块的`merge_config`方法 +- 添加了低频配置文件默认路径 +- 添加`split_filename`函数单元测试 +- 添加`base_downloader`模块日志堆栈错误输出 +- 添加`tiktok`的`get_secuid`方法对不支持地区的错误消息 +- 添加`douyin`,`tiktok`:`utils`模块对空urls列表的错误处理 +- 添加`douyin`,`tiktok`:`utils`模块对AwemeIdFetcher的连接失败处理 +- 添加`douyin`图集`aweme_id`测试链接 +- 添加文档`algolia`配置参数 +- 添加`douyin`,`tiktok`:`{aweme_id}`与`{uid}`的文件名模板 + +### Changed + +- 重写`douyin`,`tiktok` handler对`crawler`与`dl`的配置,提升性能 +- 将`dict`类型的`--proxies`添加默认值`None` +- 将配置文件中`url`设置为空,防止因为缺省出错 +- 对高低频配置合并时只合并非空值 +- 更新翻译模板 +- 调整`timestamp_2_str`方法的默认时间字符串格式 +- 将低频参数配置移入`F2`的`conf.yaml` +- 修改`tiktok`对`msToken`值验证 +- 修改`douyin`,`tiktok`的`TokenManager`里固定配置的读取方式 +- 改进 `douyin`,`tiktok` handler类的结构和清晰度 +- 更新方法签名,使用 `self` 替代 `cls` +- 在适当的情况下,用异步实例方法替代类方法 +- 更新`douyin`,`tiktok` `handler`类下的`fetch`用法 +- 修改`main`入口函数,实例化每个app的`handler`并传递给相应的方法 +- 更新`douyin`,`tiktok`的`get_or_add_user_data`方法,以处理`Filter`类型的数据 +- 更新`F2 -d`参数,现在需要指定`debug`模式 +- 更新`conf_manager`模块,添加了日志输出 +- 更新`douyin`接口文档`format-file-name`代码片段 +- 更新`douyin`,`tiktok`的`crawler`模块重新添加异步上下文管理器 +- 更新`douyin`,`tiktok`的`utils`模块捕获错误时显示具体类名 +- 更新了配置文件加载逻辑 +- 更新了日志输出 +- 更新`split_filename`方法适配双语种环境 +- 更新`douyin`,`tiktok`的`crawler`模块获取`response`的多种http请求方法 +- 修改`file_exceptions`模块,使输出更简洁 +- 修改`db_exceptions`模块,使输出更简洁 +- 修改`api_exceptions`模块,使输出更简洁 +- 更改`base_crawler`模块里的方法名称 +- 完善所有`APIConnectionError`的错误处理 +- 更新在无代理时配置默认值 +- 改进`douyin`的cli模块的`handler_sso_login`方法 +- 更新`douyin`,`tiktok`单元测试用例 +- 更新接口文档开发者代码片段 +- 修改`cli_console`进度条默认宽度 + +### Deprecated + +- 弃用`douyin`:`extract_desc_from_share_desc`方法 +- 弃用`douyin`:`get_request_sizes`方法 + +### Removed + +- 移除文档`reference`页面 +- 删除`douyin`:`VerifyFpManager`注释代码 +- 删除`douyin`: `cli`模块的英文注释 +- 移除`split_filename`方法的`desc_length_limit`参数 +- 删除`conf.yaml`中的代理值 +- 删除`base_crawler`模块选择随机代理的注释代码 +- 删除`base_downloader`模块中`_download_chunks`方法的`finally` +- 删除`F2 conf.yaml`中的代理值与无效值 +- 删除弃用接口测试 + +### Fixed + +- 修复部分自定义配置失效的问题 +- 修复接口缺失时间戳值导致的问题 +- 修复`get_or_add_user_data`中的`AttributeError`问题 +- 修复了非windows系统下创建长中文名文件出错的问题 +- 修复了`tiktok`文件名出错的问题 +- 修复了在更新配置时缺少自定义配置文件路径的问题 +- 修复`douyin`直播嵌套ts文件无法获取字节大小的问题 +- 修复`base_downloader`下载文件区块时未能正确捕获超时错误 +- 修复`cli`退出时`base_downloader`出现`UnboundLocalError`错误的问题 +- 修复`douyin`收藏作品下载错误的问题 +- 修复`douyin`,`tiktok`:`cli`的默认参数影响kwargs合并 +- 修正`douyin`的`utils`模块对`aweme_id`的处理 + +### Security + +- 依赖更新`pyyaml6.0 -> pyyaml6.0.1` + + +## [0.0.1.3] - 2024-01-07 + +### Added + +- 添加`douyin`,`tiktok`对`--interval`命令的支持 + +### Changed + +- 取消`bool`参数的默认值,防止配置文件与`cli`命令冲突 +- 调整日志控制台输出与级别 +- 修改默认与自定义配置读取与合并 +- 恢复`tiktok`接口模型的`msToken`值 +- 修改自定义文件名模板中作品创建时间的键名 +- 更新主配置文件格式 + + +## [0.0.1.2] - 2024-01-05 + +### Added + +- 添加依赖缺失时输出错误到日志 +- 使用`black`统一代码风格 +- 添加`douyin`单个作品(one)与`--sso-login`命令帮助 + +### Changed + +- `--auto-cookie`命令去掉`none`参数 +- 所有app的`--interval`命令参数改为`all` +- 完善`douyin`的`cli`帮助说明 +- 更新`F2`帮助说明 +- 完善`tiktok`的`cli`帮助说明 +- 修改代码片段高亮 +- 更新项目文档 +- 更新翻译文件 + +### Fixed + +- 修复`--init-config`命令初始化的问题 +- 修复`douyin`文档`user-live`代码片段错误方法名 +- 修复`douyin`文档`user-mix`代码片段`aweme_id`不明的问题 +- 修复`douyin`,`tiktok`未提供参数也自动获取ck +- 修复显示语言中`en_US`缺失 +- 修复接口文档的代码片段格式与错误 +- 使用缺省`none`来避免触发`callback`干预程序运行 + + +## [0.0.1.1] - 2024-01-01 + +### Added + +- 添加依赖缺失时输出错误到日志 + +### Fixed + +- 修复pyproject.toml依赖部分遗漏造成的`Error: No such command` + + +## [0.0.1-pw.1] - 2024-01-01 + +### Added + +- 创建文档 +- 添加`douyin`,`tiktok`应用 +- 添加`douyin`,`tiktok`测试 +- 添加代码示例 +- 添加`i18n`翻译模板文件 +- 添加`show_qrcode`方法,用于显示二维码 +- 添加`s_v_web_id`方法 +- `douyin`:添加`room_id`查询直播间信息接口 +- `douyin`:添加`--sso-login`命令,使用扫码获取cookie +- `douyin`:添加`sso登录`测试 +- 添加`douyin`,`tiktok`开发接口文档 +- 添加`douyin`,`tiktok`接口地址生成XB的方法 +- 添加`douyin`,`tiktok`接口文档代码片段 +- 创建目录时支持绝对与相对路径 +- 添加`douyin`,`tiktok`获取列表`secuid`,`unique_id`,`aweme_id`的方法 + +### Changed + +- 细化`Basecrwaler`的`response`处理方法 +- 自定义将日志输出到控制台 +- 将guide文档调整为统一文件夹下 +- 修改文档代码片段高亮行号 +- 重命名接口模型生成XB的方法 +- 修改`douyin`提取列表用户id返回值变量名 +- 修改`douyin`提取列表用户直播rid返回值变量名 +- 完善配置文件site-config部分 +- 修改默认配置参数置空 + +### Fixed + +- 修复`douyin`用户数据库名称 +- 修复`douyin`直播结束后无法下载 +- 修复`douyin`在`handler_user_mix`方法中`AsyncUserDB`只初始化一次 +- 修复`user-nickname`代码片段导入 +- 修复`douyin`文档`user-get-add`代码片段导入 +- 修复`tiktok`文档`user-mix`代码导入与缩进 +- 修复`tiktok`文档`one-video`代码缩进 From 62eb5b2c6e67dd691112eebc582c3bd93c2a3319 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 22:58:53 +0800 Subject: [PATCH 155/164] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Ddouyin?= =?UTF-8?q?=E6=94=B6=E8=97=8F=E5=A4=B9=E4=BD=9C=E5=93=81=E9=87=8D=E5=A4=8D?= =?UTF-8?q?=E4=B8=8B=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 71 ++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index c8295bf..1b83389 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -685,6 +685,10 @@ async def handle_user_collects(self): self.kwargs, aweme_data_list._to_list(), tmp_user_path ) + logger.info( + _("爬取结束,共爬取 {0} 个收藏夹").format(len(choose_collects_id)) + ) + async def select_user_collects( self, collects: UserCollectsFilter ) -> Union[str, List[str]]: @@ -702,7 +706,9 @@ async def select_user_collects( rich_console.print(_("0: [bold]全部下载[/bold]")) for i in range(len(collects.collects_id)): rich_console.print( - _("{0}: {1} (包含 {2} 个作品,收藏夹ID {3})").format( + _( + "{0}:{1} (包含 {2} 个作品[以网页实际数量为准],收藏夹ID {3})" + ).format( i + 1, collects.collects_name[i], collects.total_number[i], @@ -711,7 +717,7 @@ async def select_user_collects( ) # rich_prompt 会有字符刷新问题,暂时使用rich_print - rich_console.print(_("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]")) + rich_console.print(_("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]")) selected_index = int( rich_prompt.ask( # _("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]"), @@ -751,7 +757,7 @@ async def fetch_user_collects( while collected < max_counts: logger.debug("===================================") logger.debug( - _("当前请求的max_cursor: {0}, max_counts: {1}").format( + _("当前请求的max_cursor:{0}, max_counts:{1}").format( max_cursor, max_counts ) ) @@ -762,26 +768,26 @@ async def fetch_user_collects( collects = UserCollectsFilter(response) yield collects + # 更新已经处理的收藏夹数量 (Update the number of collections processed) + collected += len(collects.collects_id) + if not collects.has_more: - logger.info(_("所有收藏夹ID采集完毕")) break logger.debug( - _("收藏夹ID: {0} 收藏夹标题: {1}").format( + _("收藏夹ID:{0} 收藏夹标题:{1}").format( collects.collects_id, collects.collects_name ) ) logger.debug("===================================") - # 更新已经处理的收藏夹数量 (Update the number of collections processed) - collected += len(collects.collects_id) max_cursor = collects.max_cursor # 避免请求过于频繁 logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.info(_("爬取结束,共爬取 {0} 个收藏夹").format(collected)) + logger.info(_("爬取结束,共找到 {0} 个收藏夹").format(collected)) async def fetch_user_collects_videos( self, @@ -807,14 +813,14 @@ async def fetch_user_collects_videos( max_counts = max_counts or float("inf") videos_collected = 0 - logger.info(_("开始爬取收藏夹: {0} 的作品").format(collects_id)) + logger.info(_("开始爬取收藏夹:{0} 的作品").format(collects_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -828,31 +834,42 @@ async def fetch_user_collects_videos( ) response = await crawler.fetch_user_collects_video(params) video = UserCollectionFilter(response) - yield video - if not video.has_aweme: - logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) - if not video.has_more: - logger.info(_("收藏夹: {0} 所有作品采集完毕").format(collects_id)) - break + # 更新已处理视频数量 + videos_collected += len(video.aweme_id) - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) - logger.debug( - _("作品ID: {0} 作品文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname - ) - ) - logger.debug("===================================") + if video.has_aweme: + if not video.has_more: + yield video + break - # 更新已经处理的作品数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - max_cursor = video.max_cursor + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) + logger.debug( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( + video.aweme_id, video.desc, video.nickname + ) + ) + logger.debug("=====================================") + + yield video + max_cursor = video.max_cursor + else: + logger.info(_("{0} 页没有找到作品").format(max_cursor)) + + if not video.has_more: + break + + max_cursor = video.max_cursor # 避免请求过于频繁 logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.info(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) + logger.info( + _("收藏夹:{0} 所有作品采集完毕,共爬取 {1} 个作品").format( + collects_id, videos_collected + ) + ) @mode_handler("mix") async def handle_user_mix(self): From c8f013eaae730df310723af2cc95182aeb91516d Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 23:01:29 +0800 Subject: [PATCH 156/164] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a3351c..bd7b907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - 添加获取抖音用户关注代码片段 - 添加`fetch`方法的`timeout`参数,避免请求过于频繁 - 添加`douyin`用户收藏夹代码片段 +- 添加对丢失链接的重试逻辑 - 添加`自定义UA`生成`XBogus`参数 ### Changed From e784b7b61dff779207ba233e008ae63563ece6ca Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 23:37:17 +0800 Subject: [PATCH 157/164] =?UTF-8?q?perf:=20=E4=BF=AE=E5=A4=8D=E6=94=B6?= =?UTF-8?q?=E8=97=8F=E5=8E=9F=E5=A3=B0=E6=B2=A1=E6=9C=89=E6=AD=8C=E8=AF=8D?= =?UTF-8?q?=E6=97=B6=E6=95=B0=E6=8D=AE=E7=BB=93=E6=9E=84=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/filter.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index f34d57b..1465ee3 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -558,7 +558,12 @@ def lyric_type(self): @property def lyric_url(self): - return self._get_list_attr_value("$.mc_list[*].lyric_url") + # 不是每个作品都有 lyric_url,如果不存在则为 None + lyric_urls = [] + for item in self._data.get("mc_list"): + lyric_urls.append(item.get("lyric_url", None)) + + return lyric_urls @property def play_url(self): @@ -1412,6 +1417,7 @@ def _to_dict(self) -> dict: if not prop_name.startswith("__") and not prop_name.startswith("_") } + class UserLiveFilter(JSONModel): # live @property From 8b4ce54db6d8e96903dc40322f8a2e319b453d31 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 23:51:46 +0800 Subject: [PATCH 158/164] =?UTF-8?q?perf:=20=E6=B7=BB=E5=8A=A0tiktok?= =?UTF-8?q?=E5=AF=B9UserProfile=E8=AF=B7=E6=B1=82=E5=86=85=E5=AE=B9?= =?UTF-8?q?=E4=B8=BA=E7=A9=BA=E7=9A=84=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/tiktok/handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 3f1c358..9cd3705 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -33,6 +33,7 @@ create_or_rename_user_folder, ) from f2.cli.cli_console import RichConsoleManager +from f2.exceptions.api_exceptions import APIResponseError rich_console = RichConsoleManager().rich_console rich_prompt = RichConsoleManager().rich_prompt @@ -70,6 +71,9 @@ async def handler_user_profile( async with TiktokCrawler(self.kwargs) as crawler: params = UserProfile(secUid=secUid, uniqueId=uniqueId) response = await crawler.fetch_user_profile(params) + user = UserProfileFilter(response) + if user.nickname is None: + raise APIResponseError(_("API内容请求失败,请更换新cookie后再试")) return UserProfileFilter(response) async def get_user_nickname( From 0db0a07dbf07b5a1d990926e7f70b2e2d4b9f935 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 23:52:05 +0800 Subject: [PATCH 159/164] =?UTF-8?q?perf:=20=E6=B7=BB=E5=8A=A0douyin?= =?UTF-8?q?=E5=AF=B9UserProfile=E8=AF=B7=E6=B1=82=E5=86=85=E5=AE=B9?= =?UTF-8?q?=E4=B8=BA=E7=A9=BA=E7=9A=84=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/apps/douyin/handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 1b83389..f2af93f 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -52,6 +52,7 @@ show_qrcode, ) from f2.cli.cli_console import RichConsoleManager +from f2.exceptions.api_exceptions import APIResponseError rich_console = RichConsoleManager().rich_console rich_prompt = RichConsoleManager().rich_prompt @@ -84,6 +85,9 @@ async def handler_user_profile( async with DouyinCrawler(self.kwargs) as crawler: params = UserProfile(sec_user_id=sec_user_id) response = await crawler.fetch_user_profile(params) + user = UserProfileFilter(response) + if user.nickname is None: + raise APIResponseError(_("API内容请求失败,请更换新cookie后再试")) return UserProfileFilter(response) async def get_user_nickname( From 7fe08f56c712db8a51253ced275ccbc023763a13 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Thu, 4 Apr 2024 23:52:42 +0800 Subject: [PATCH 160/164] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd7b907..8c0cd88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ - 添加`douyin`用户收藏夹代码片段 - 添加对丢失链接的重试逻辑 - 添加`自定义UA`生成`XBogus`参数 +- 添加`douyin`,`tiktok`对`UserProfile`请求内容为空的报错 ### Changed From c2e68b251d8c59005e2318b6383cffc3612f1192 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Fri, 5 Apr 2024 00:34:51 +0800 Subject: [PATCH 161/164] =?UTF-8?q?perf:=20=E6=9B=B4=E6=96=B0=E7=BF=BB?= =?UTF-8?q?=E8=AF=91=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- f2/languages/en_US/LC_MESSAGES/en_US.mo | Bin 29107 -> 34741 bytes f2/languages/zh_CN/LC_MESSAGES/zh_CN.mo | Bin 27692 -> 32992 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/f2/languages/en_US/LC_MESSAGES/en_US.mo b/f2/languages/en_US/LC_MESSAGES/en_US.mo index e263a600e773479f362cdf668f7774ac859cebdb..55aebf4f5eb21f9126ab3a08c08f4d4826a9aac1 100644 GIT binary patch literal 34741 zcmd6w34B~vb^jj(LNN&mBq3yj7eZn?NU|d*A#oPsc#FXswj4u@u}7LGY4B)9%#186 z5U`!tmSfp+;(d$l#2bm%c(=UJLTM?5KTBx~4fGGy8%f<#%D)9lTmIj3&wX#+NF&+F zpa1{!A9D43@7;UP-OfGtoOACrKYj1p);att-{LqogZtmn#fY*XY!0W(ofbR!i110Off&AyZ^Cri6H~3-j7H|aE41N-n^1ca<0Q2Cj z;EjB|3VaZJA2<>G7+3?|4!#851AY^fbiV_y1h0blVDJ`;W5Eyb-T;0WYy+j=K(7BvUcY@b~^8PVU%9&)}XWRFAURgDG$ZDCxfrjsw3BUJc$rCtVNT2}-{`3et7XeEa=* zP}=n}DEYq%J^+3Pl<^q!A^m>4#S!2Yd>;i$J12wEPoD*)pBLEogJ31^e`?=%l>fi2(% z=Xue+^3hYpL8ca0K`SxD=Fj z{|uD)%W0J0?Vx-gW8V{?%Ke*ZNn{dL>jjsvSYQ$Xpb`Jm8!3Y7M41*Lvp2Ze4PlyQG6 zohRwWT8x8qo%1v(bdG{T_l(7#fzq#kaIem@a`1lMp8}=-Pg?vgNLM;N_WeeLn7oew zC0*RUKV{z!g3``E0i|C53X;{ihelrqCPB%!7<>ra0)7_!5jYFHk4mfp*Mh=Fw~bVs z1WNxjgVN5Iz#G6n0zU!1295`ZJm5H=1{=W%;GcogU)MgU^X6V~F7NX}`Tken8t^xu z3$CP6(q9KbDd)JwzW}f2{g-ya4g z-8}pLJP7MJ7eFchUqNZ#Z$Pp-*F2)@-89h58<4JbR)HS}zX5&#e9tJ29}Y@AXMn;t z&7k!Ac2M%40%bn^1eAWgjKPxkVc-YB8DI<81kMCs2c=%)N3#Zjlldd`o(Co0EB5_+ zpwRz&P{#eLG1^b#KCbv>;DrM-K>_kkxtM2zztP{#3HGxhtO;I+Ju0ww?BAR@-80VUl| z`~9mHzYR*gehj`F%!ATj{|4R#-ux+@?~_3pr}_5%JShE`2c?~tBP4~-D#5$K$3e-z z0=y4A2fi2lcW^lPPMDsdaz=uefo&F7TU-kYzib5S!E>PW^BuDs=Mk_9>;Tt+!bhL{ z3~dI}pp45AQ0nnx@I&BLvvvF{!P|MC0t)>m@J?_um;nD66n-D}S$GXx4-N+31@jAz z03QNpf%k)(zz4w}fKO92^2>!1se&!0W+d;LYHlgHn$k@DA|G&uKmHwpazq_ngHo;757? zBk(5hKR_w>ee+a4jRvpbJqtbnF0=SGQ26F|;1%HgNp0^qQ2Hrp-=70r-cN#$fqxB3 zzunrP-$#K$ry6`4xD^}7dl_%is;*OCVFrIR{pOw=gLo;bcHWq|*t?_>QEIN5N`P%G=E!>4%$` zG~WkDfl|)3&B~vTfYP3Epp0J)DCK?G;wJDq-j7=RCMf0nt;KgNPGxR{mw;04 zA&Wl*%X$ATSP2eoK}QBZ1Il=M;COHecs=-SQ25LT?+34VQtkF}pp@Tc@fi3~-d_Xn z0bfj9B~8h9)CYw$(z#xFX~a_}4|d^G+k`VyQ7-T*EGWxRKS z(ys4;!@*yHQvQ%H>AEu)yoL8>i|fF9cs~k02=;)|zB|6G?S2@Pe)}va^jblxp0#C3V%HgCcsy~cJR7qw0^rm;h(<*W8hntXnW(}t-LP=h2Abu`tMs{8F=|p zjUNF@f6M?~a0U1_a6fn_cm$O3eGPm9eA_bRn>nC-UkSbi+)Vf-VFy8e!c$BE=W>bU zCuMw#P{#Wa`M_@m;ZuaG2{Pu_5_S{5N%#SQE;m1yJW{9c+4sHRrwRX)@JEDyCw!Q& zlOS_vBjG)S^91?*8v!;lzmM^EIbk?7|4cub_2L1Ze?g#ooYx40iN73VdYIoN{!0J- z2SNIeY3uwI;oYQrAK@E3%kL_}9lQs>o9*-a?1x1bZ?kv;l<|`vU1@B&AA)bQ`Q8HF zD3SK}U*J)~I|;&nKOqR8&LxZ?>>)l|)#7I@2Jja=TuJyi;ZFXf@@f|+aU?DKZ8gK!yPy#00; zWxt0|L0}4)U!K2r6CN>dg+E#Q_yN*h!L!UKnS-76Jmse zgdxzV0v{$kMv&j_e0vnU&e9k8CBOegxQ1{8-=6_D5nRIidA|cZKv+R|g&@E86B2|! zCcH}cSHfFK8wbBc*hQ!%1i$|b9>7-nyc+y<I1J_iFI-ybmS}<+&U5C7v*m@Ik(P4h(+J@$in| z1w3b;Pg?w^NZc8o#}V!)JRkWUJT}>fB={Zsd<*5R=ee3N%YOR}cst=8#L4dpi~ob? z?-Kr*aGCu!jpx53$Zrba=Y(d$Ercsc^E0rVu%95mrwA(v`{ae+7YKh#NE5zKkiC$v zfbS(tB@82clJF;lTZvl(evKf%TM3^aEG4|1IQh*~a4vwO2`R$$Htr_y4|#7RTt_fJ zzSZ#XF2cJB|3c^@m|wZ;uUyx8>gCRJTm3bQUO(H`d8(sl=kf5hYw_vs<1d7-`L!py zH?)VZ{_585^UoDtx17(fUj`BW=6AO8;2&Ppv+YG5y0#q4KYu{JtlI9sbolQUE_VLU ziiTQ#?Rmd-L-)>oub*wzn!J8?d03ggb%++-z3QajzW&ifG9jUB^D~-{N@+}0b0Sw$ zueC1vnDa8dl72efRMk}N^`()h>-)_=+}?HO#m>`vN<^JM=&#%y7S?s{x%|d8ooAlw z*?zS1^y)&P2M_z_*2Nkdq0oK!>Fz`8@*8&h7dH5I8r3693?7$mfqN z_IDlWKDL`m$SdQ%^2Pl6!!_yjyoBekUga-cNu4E6e(OsA*`1xIR{HISy4LQ4C%TSw z_^VgGes=R0hCk(uo;bm8Uz}fe*uFnjRW-Hi*%SGNi@J6!>RG#~`_Lg`oWf`QR_6ML zw^73Qu~pIPjC7${=4zVmLwoX1FX7W&PxZ-f6NV*~PNj7!b7xGOD35nRT8qu^-)XB{ zskF63L)CfAOD5B0?#y&18Nbz;K7C65rG@$JyYtImp-Ea|e${Tj{Uv|vVSn2~+wd@5 z*QO4JexP)kB02y)%cQ$sKf6eZQ{L`8)7I5-j1o(fRW)7NLI+q0Y2Axsx=$>Dooq~E zUR8Qte#4IZ;bZ>F!`*vd%D=GMndM54yYB3f^4NLmMTqrzqlNk93-eo-_pCWjZF-(Q zCuEKyk%Ye_qgElv7`~Cw#2#$Rx2_IjgEt$iy&tKRfqf+pZ3o#_D`Ksp7)op^v^G3hW5P76hNr<%Gb5?VE4gY zA$2XbZ>j^!{qsx0R9dAzsk#qd=-F_v=f&r`PK)#wjyEh7%p6m6_sOHQTYE`ntlJ`d z*gTum-RIZ%OLqIG+VWesXs07qRG9i_+Wq#Ebfr%7Ad_R{K=9Td30)}sU8nn_G8g8U z3tJqZJa;Y=NM2TU?cYP;{&VLL1NnVB{j=>(HS$@4n{Qq3%7de#gw)+npX)qzrt2$b z5FOHTMBVu+D#SKi=NGx7FXYPdNYrNKAKbd3mfU zmoB$Cy?C8B*NLZ_S`sN!e7wb}t#qZRBa7Y9=~T{3<;o{|sk&Uf%XIMDpX*w?J-_wk zuI;-c)4r6{Nu;v5SY0O8VCW|5oMa-GOL}t&$ z^UGQM5RGjJPx?Vus;-VT`EA=Hd2}vDr*T7;x$31{YhGJ&)sDy)U3fy) z>E5%5vC){V`*ek2lsiv7OHJ|%J2ZLEx(oi9J@Ae+{w|0d*wFo~&2{os|G-9nD@q00 z%hK*WOS`tLS9)jGvPwnUCxhL(I*<-+ofo!|wR_o7v#yxRMY9BBW{8#U3CY9CtZRMV zbTI6iU${v1D3h~yywR{q#dPmoid;~^N@I!yt1U=1BLC#8-7mi4A8GGid#=BPTh8}v zS!!#v<$TxjW&P7debj_H&Q@`uM6;5*L;SWS{_0oUFH}Coj4vpjp~RRl#{I&Gr-n+C z?C_M#GOM#=H#!of`3GO<*|daJw`W;9W9lzkI$N4yR8=HUK=0@6N1dN<-r@Xw>srSy zZ)UwHbCp}^1|30YxD;*^p zMIJ}!BMAn_lF7j|^o$m%tDWr=`^m^TNJKU9A?cJil07k)LBuEs%1)$HQvGlcw8cSeR^qg7 zranU17zi|HzW4t}^|b1$d=D{F@uHPpsggr;323 zL^Up!mki4>5{2yOA8bS2RxTl~L&kCA@vgmx4PnFY2K7n<&Z`r4q3U z8)Y|ts9k0;>zFahELG9_VgHqW$C>Vf2hlfKXDWv~vUb1WTw-o^cD&TJdTF>K2y<>- z=R=S9xpkvzVbGgROQ>z5BVv&61j7;&iGk)vRXi8TSj2t#>L52CBdCeBy9c{#f zF*;JVNyaMHk>X4DLa{((bd2piVT=xd8%->qSf4btyoAZEs5&RBt+ZwBp?uq-u8wW~ zq0>~qYs)DfbX+a{OV40>%hXBbopP8A1uJUXGXLOaD-D+}33#SY>XJ#dWr?^*SJX0* zrApf@m>P>zV5RBR8Tv85V_DCO8?{Spqq+}0(|zzvDdsXYQai6?^aa*(F|5?|GKQ71 z-z=*fs}4A8n0*yyH0yikxm7Hy;oQ;5N_F}rsDv&lKZ$FA<3Z9NN@_v}2~S3)o} zJzE!|68FRZ2tPG^vTbmjHK%FFA$WTSPDs7}m6s5SOa9rU05ii5E7k1P!NSG)_V z$*(_c7RffXjOeob&X=%zySA>NiOj(;|Iz%>eKvnzJJ??$(TrQZ^`w<_B@zV^HrfSr z+Z&L=(RCQDBEOOeW<;Z^OODD43;u;AJv%${8xN@~N<0`ZtmqSWV1vKpRKIapJ)Ngs zEs9ptQ5APsM@$a0T)5*Z-Du0K0cCWhHZ&r??KIIcpU`SB#h>w8H&Gkt3tM#UTY){6 zKY)LMZWfNsAK0ZFjCJ9+Z@`SrAJ`2Wuypw6j`(YE#!&d~&W;XTHOhKwIonl97jczx zQQf9z*)pav3s@dsk0$DWT#Xk|`>(`Y)yaSHA}Fu_BG#<*(ZvJrks;<>WD)a?&g;-s zDz8+RU)B~?Zk2JZ4Dfe7!wMt*D7CCawk=wQm6m^|-QTqykCVUjc)&^rRpBCIb?`4`)!eSQ8!YhS@~tAjRyYLZ{R zm$|QltAh>$?uro=U4XP`Xf!Evk=}q;VI|O^PBSbhK3z_*0Idf_qtwv@rJANpHk~mQu(^6g zY5(<(YltnFGeFit^u@AkJTuWXdD2a(Tvon`oG^_DtCOy+&3d`g^rCTwP%hEnSu)mE zKZAn&vd(^FH!WAuCVhl4xoa=m7sfg+_)PN8u7|C~xe#^!su&ODt$Ih5id5CODAhCV z^qbT!dlyZs&ZnYe?d{zc4m%gIKf*~JDx2Y#B26>=+OG@_QWUNtQ9UTL)LAGtMV-uC?eWTSmZ<)YG8-Hqk+XY-*`Gdy|(~CwQxVpkok9qDkDCo3taEq56{C5i% z4Nj#8m!hOAc?n7;;tfy=hOkd%HR6p>iZ>dWhAw^Q=@u3cyRLuU7R>warf=+~}}b`)^^r2CpSdH10M-TM|& z73C%oZiAn!pEG(g0~Hy$;M1bPUTV&a=`ws$*qh7^Y381q>_y7_g@gIEr-{8Nd6@?x zTlKCSkpqx-#N>K*2c~gX$I`G%yAK`9Z(M~Gz{BS+>zI{Hq~<+joFF(gK9{afWxY&o zc@}-DqAroEZ>p|Hq=(g34$CHT9@{0U+C*JNeXb!ndsaM_nKx&CBAxVdv!nLjtYOiZ zS;Jz_GQ`1gpHmZ!}stz<7C{lIx zN*ZD+QJ^JbVzz(7;$qESgEuD;?~kxnUUZ*mo%+(rdNp&J7{~!gMeC>)41{x}wS6f! zd(}0uY;GXJMu)R??6m_Fc1EErO;OnVqR6aQ*T8;THrQ#`1;#(#=I`D>bRn(ug67xA zLQFB0J{xDUL5QuZwwUfio0UchoMvCz?rlo}t6KBhWGg_quTK+poW^gUZeZP)6qg)J zDOP~t3NqkeVbhT|2Q0Bj-Rmq7sx{b^a_1{-(Q2A-*F}63>U7gs^IVA4b2;J%lw&ve zo-rgHJht5zR_TF|sndFpc}rG`Fxa)3eUz?OHaWo}XM{ephZd;15UiNSiqupZt(rk` zwAr>Q%oA5yy>_1yHJf#tH@|HsE+TqFTjE$N^2}1JoxEIRGxUj7LqQqV=2h)ogd?`9 z*&q!_g=$!dl!4NPQ?j6R>BlI+;$&+06xF={#L?lrXaz34s?pt;i<0fPql(*AWTRfJ z6MFWnXGZ5&?dx6{98gl7-_fnnPnAEKu4~Fh_MeB^7|TR@1Sz7;GIpUVs%hN0@mMZ4 z*KYLM?A@C-`-_&rI(9po&*BxdtAaUEqC->Uhtfo$y05RgqoRv}tDiJ094=q+xWCDK zZqJL`@cZbNvyd7QkWv+-gtee0Dc|?&5VQ3*-jHzilxV0jl1QA$39?e@%#77W!`7VW zvn|#cc9D8W!WMNv|5J&QWaMoC!k{wL9Jdz)BE@4BYN{MY_(oc zj%?`JssVGfZc=lHotDniOS<+Q6(^lJsT9ZoLj*g5l0&zDe+3m1T%4{E$Y_NClU5u069O;uyLt#KeN*ORZy=_D>Pqt>#=?1f?i~S->k1e#X z-(8kB{PPh}P)tg5TQ`k(6OCmVoZgT_JZx-7{IhbRj>B~$&HO8PccQCE(COx&lo9k@ zd!Lui@7UYbalq+1um!s-Y6M!w3QlOqs@Jhq*KdwNVv%-Vc!63o@tpzUSh)MjFYVWom=Xky_K<1XY3Bma*hF>=6a7RY8c5ReuLK?wXMVXihqiOV{ zQ#2(M87eG%LID*NZ4C z?C*uDIbGza6C_+x^;O*rGTVCZU90|VdQ!!M`|;^hCfh@vWH81MOsi^}D!2ojP9ml1 zyxkYp`deNQX5M%xlv$!lXs6As^*CXswvU`J!9I<9>Fk{9mN_={T&<2nmT-Eo+krI>bcBtYRS1x6*S5TAsfj@qe+X1Dn;}Ff9WP2YDWi2 z#e+EpA096pF5_WNqoXF+o%x7Q02dCroXsy?Vg@-BATk-`E9xfoG_Nt8$(2va z)+OTQk2ck1%d65O-LaEKO_=z|s4-)vjh#My(9|jA)4cf!$z48%Oe0+m<=tIAod51| zhYufFdG|+#-#2{tpousw%Bw7C+2Q%Pb#+b0)8#%`b^V$1?2YJT#+fT={*4bW+(`FH=4?Rg;bGSUK~s}WnOL%X zTsqT`9qFbT&1?3d$_HG1d}v6@z4amYuAvVM8tnq59)dW7t_M3QDUc-bs{m#t__=XymN zf|Y!+{Y;;{9bD28!sfeCWvNMo;3O3@$hRL$zE=3EbFrPD!L{Vf9#!8LpzdqHsrPR5Izt)6J=5Iu_4H zE-5kO=E%OA@z4>oIk%SS?8VENBAk8q_{Jm9 zCrFscx^R>mn-85>b<(S#dtzBW#v0&qS-iN!-OicMECq!J1|L2;r6cP7a zFOz}Ii@uvrW$rjrL6^HsVVz^SoY&BpGo;Ggv67Wu&yi5bqm@R&7x3HcH4bE?gY) zs!VZ+$)H;j=DCS1hM+q}BwwG$iKYiBCTO^~pfSNkk}`Ov+-Sb`+EAEqdQ(jeLs8p= zysL?&Qc}xwZP0POQ&%y&!(?7+ej<}ji6>szKUSTRX_yS>P+eLkz7+`ah>%Uxw)CdS zvR9kPG?;`zwMlFSxp&%`F{V5GKke z+SSo}QBzeM&_txHBPCgpPSTx?B1tl-Y~h9# z{cA2*Y3t;^rYqcmgyYhU2%VM+*H*kSla>|Ojk)t zW^Q2#U+CbbYE9XCElWmuFs(>XWm(>+dSe>tnGLRHNlAs9S-b@ovv`c$%i=A#mSuas zz<&)Ml39iZ$z-%sMQ5JuqY>17A9L*BxCbo zBnC-VQYo*0I^n>ZbTd+-07%Y0-Du*g>OI7`Q5axinSwR6n@St8ma8X5w6s2HFC~r9 zh0Xd>OiXGMNTYzTD!5ovEY$U)lX+-h)yZ^?D9>hY_Q?^;q12efskEDDKylZyL@z4H zhaemOX{;kaWRqEoh9CkuRB^yXfd^%&(Li ztI0JLiRMNzEM}LW7`^{B#%hN~h>MC@W~PH3ep*ZMPMFB3&oHl97UIDqGE)m{tWGi; zUDQ~aeQ$x4xq1nVA&v|PoSd|Zd00>b+8_!NtGp2=)~FFv#a!qrbFpL!)tW@=k?s^p zJk?l06ImO|5PBvt%Tl2e=;29;1!cybC=ih1M!KS-z`4>$*IYX*bL-O$vRE>L`c(`p zZj+g{M>`|9cV=rQYhZA>i)Ye?OP5V=6K08uV@H^ zd}IWlEC7rz;ycIb2M8d>ac|?D~QWp;4w~WkGjg zvdGCNV}+U{P!&b7Zk&~eIn#u`2gzzEHHv+6ksD#s#cr^7XI`ps_9(lIYRc7{^TqJU zC8skB3N@D{OB6(wVu}!JyHOluqMK*a$tKo+5!6U8mOoOW_SI*#6q6ItZ`Q>y;{f@j z^JI*qpuqTTY^oN|7d^o>?s}_WWE;JjL~VkPh8NHgm4PL*2oq7xWWEI&Qb6z`Bs7`c zA1fjwt93!{h20$aRF%NvVR~EJu6&3M6HbpJ`*fzX(CUc&rI~>iCpM?ryK0{@9%6#_ zvoELmg*$N^$FH+uT%bmaoN)#qrJ6OP$sn2mlNrXp-cW80oSK$^Sg~LU=}LNOtq{Nl zrOz9U9z<0m`eEP972;4r-((O;T2T1=rZhZ-z|7!Mkr6a9RW_<@QO;P^?!}o{bo~s2 z6s2!I{zv44x{fkUsiNdXm(RpAW^SR0DQMf2Wq1(G@^3Ua(G5Nbva32kQ_>u0*zv_bi;G>6z^#sO(osD3zULSZq|k%&rvscD5f z!I;vbEWy-bK^Q%<^eR@GM&E^OLMom>FBIL#v_cNi=p0nvU18mEMrGW}$TC>a$j+OdZ39j~$Wq?!-`Q;yY60K0gCzS};#}(c(T!!8w&OqrQ`39>A zyBaN2)ybMntl9K|NIPpOY3aST@Nj1)96|H-v1_y=jT$ zGXzioX=FdCq;9NkHf5ofrKp8QL-viuN*Pic>Z;a6|HTxlGF8=(ps3M7XBSe44jn{? zJy?hi=0=|+k>N5XlhF~Sej~-I9B}N6#Fa@IB)8+36%E1e9~h~_KZ|6i;oV-Ed*1}2IgS=DT`GqiB|LA|OniW` zxSSjp@m!NkW7J2ISx+8~jNYOvkSQHsxK?M@F_{}$FjCxX`)D&Kg_rXNOm=D1u*`*= zxtIre(MF5Qqzt*3r$tXO&r)ouni*0e29(e^@hzhLv1@>5Ni}V=rqPM$lVar3cd{U= zlWdIsb5%t<6osWITi67`g;wLKDriXIew9ry^_Hv2!1hXH8T)nwn6VwWs5eG zFty7%!Aw(wwm&m9OcfYe;fILsAAXx@mhFBPv>=t}maIyBx39tkWi2?Wb= znHioSL9;jljXZ(b$B7HsxZesZM0A}g6)(I|*pPz1g&o8f!SMl{=(6l#da+j^(=6No zFm4k3E5=WZ#B|2c#f+}{@7bAaWF>|K%31~cy7sk-S7q9QgHsF~>+d%XLn+2O^I6{m zHzt&&dB1fkzVj;nwh$ z^x@`yvL%zK!MN^~M*XwqWPx$K_DN7g#H0&klF3@(PBAkx5uWB>E<{Z$)o8p z_#5LjIQ2BB%jmGHy@puS0=?)a=0T!hF-hot zIHrnm3NjC6I*DG2_8M*_^g0_uO9~|i7aW5UtSc@U){7JvQxg_xU*|=LMh|t{n!ho+ zeH&tQVKbK_M|@_6cOMJ;U2nD_MkzM!i0;D{==SPF(aKqe?8QhVq^OFtHQZaq(99SC zL|bIpmVrre@`>parYTLLcdCJRw`I3qXvfqPx4VL}bjw#$6cV&Pt*%7T3`A2fUT+jw z@lG4fyl?ZS+H(emcq7JkBPTILMG$6-)yU~Hk*{{BWMio4Ji$oAf6}44d!=sFTD#e3 zoJ05?b-!6S)eNxE7R@@`7AsCCi%r&4plG*BNt*%0@5O;VKA5cycCtk`V^c9!&4#Zt z(@U~?&@+sjo`st_O9LA?MyUkh0$=VW*0ey6^#_-EqZzY z=8$hCu zgP{^RHp;v&Y1^4}*1JD(09R}klC8j)!CKX%%U-~RwmZc9u*SQwQEaN>?8+I92@iY5 z_(KApj?@SPhm%{Z)uv}-y-GHnt}U#^RWOhTS%~!>EfG|q2g#IHNWtir_B0g}qS%NR zDnbKEQ`}6gD_cKxFeS3L-@P*Q3OV=I#3~Y_>8WzI*e;ewGMFPl(p5O_18X!;zs!0h zHIR~H=3p^H40r4PTF#_rtBe!!xR8V5>>ga1TT~@$1DmaIt0#vwEVD-fnGnB^noM|n ztTRz|Zv)?kc`5ic%)t&9-ydgr&79$IxQC;`vc8pAW^ppkS|b_}7)n&vE0G_4ri44h zUXU7UIO3w|HPtlPY-V($+x=$P`xc-XWG>0V!QNq@?8yu;XAW743r;JNM{kBsWo0)R z;W9I-x`xS#QS%uckX;5{nnjEyQca%tTx9QcjOY>5Ul>g_*_sy4Cp|8R7oL6(NBoLQzrjZ4?exhz{Ey7#8XKAD_!q?if9 zcV_Fu+SDb=4)(x}yXp5?RCH+!omxfbRh2(SvoV@(FsdqJ8#zo^usEX$d)c#cQ#j5b+eMF#aP?D!Rwg&eA(o&+_i%Oa(2Yp|OCN-%T9hHZ#l$YI7&be7J;LuZ0>p_F%Wp zSUH=RZj_UuI$KqOiZsDvY8;|+{!12HqmIb1wi!d(r$i!&MHi)58-mK37K zgcoK|45jN6sm7+9{DT9K)!isrwv%36EZNs6EzS@u{iV|uB#d1{q*sDg2Og9X?UM-| zGz-oN9$_wRmOXojc04*FC)hW0OwZew;WjaAGdTxBU@piB_ zHfSy(yF~N}nXZOAT!)M3^Rj&v0HzE)2w*%yJn+qSnc=nxv>fEZs)saEFj?LCes-jA#yVUb2#9QcsJV Oz?={oK%=5ZL;p96y1RY= delta 11592 zcmbW534B!5*@sWqRCd8YfP`Ek5S9=^*dvi80YO;;VRHi}nVV!}G81PegrGQKN7+3r zf>0I#ae;tQc3k__w$_SE)z6}J$xM>Ew$@gy*6(@my+PvAe!t!y|2${A_nh;d_dVwf zPi$yZ^+8Pdopw#OD4uUnyznxmrp73Bp^cO(WyC7g4R(XwVHR8jiyVFcJCUE$L8+_Y zLP(KX2jk!ausu8rJHzv^HT)1JC>2&;lPIAeuA@@D;oVR^SOpFEqQg(%)#TgoRTZA#$NiYzP?!rPzsBJ)DDB>W@||W~l+YN$|O+;Jct$rV7gUK8M%97^W@xOounXa7zkqBq1g(fMUu8 zkV&bBpp5i6mml32`UY8b^_V^nk7=ZHP35PGk9P%H+WH^F_bwM{23;YUV6!irh39ssDzds!^Y2|ac z9?AfZz%lTbP$qU+n6EY?kqBi*JspmL^5Hp9#9-IE zR~=Jq4u<{7=R&c-GAQluad-^M#NLFl;$`1D1??I36e@?g_;V-Um4%o5 z6e#tJoctqD*7kKc0(N2)W8q9V1nz_~kyjymk*b54^sgH6l{s(}Tn~3c@zT@*_S)q+ zTnlB!$Dz#lJQPcP0>{Bt#BMH}4rjqba6FWc8DIhwC%p~MgAc>7L81l0ngLUwhJ{ed z{|T|KipMcz?Z!h{(|J%#xD?8O9)uEh&p738LK)b%PJKMnm-mK1@uAsJe5zsy_Ae)K zkb-v57;2AvAe6OR1GC`0P)znZYy&@q?csOOfbDS<3AaIzPpRop%GW>{$fFSdR4+h@ zvM-B@>m%#5IRC)k}`C=Ol=JHuI! zJw^o}?xnWE>F_xy6N|$^#IlKDCvgK5la|4$a3?h2CvY}wlW7mI2uizq;dSr?%z@v* zjxgt1dnt+_{-wfD+E+s6q+Wy4zBP^@77NcMAu|X=qY*QOGUC&tZBy330`gsP?YVFT zl#U{>3;Y(!QgmXS!~*?bTR09T!db8dEQ2zTWpFz@1lelBYD~60;`xvWQ%j*VJnHZd zP&)V;ikG)0rid^#1M&g29EwjIhOOW^XsxxwFJKSK6L~oaj)n2C2wLa=3KG)60oWX# za`;OqBY(-s{}D=uQ78j#gZ*=1CX}_@=4!gqk*R!3%bXW!-f>*(AHz>s_sUff>TnKRowZh3i=kPpi zLHXOTHLQbS2C2Rzkqk#ow9oA#*q8h+cs2ZuQ~ohzuTe>p?5%k^l(1a~acs2*Hij1* zzU#0W%97T?BG_uO-G0Gj*1sPGk5M2ddKV_Z_EYQ`41h8-KkNmMLG~o|7L>JYgDcBg zrbBVcDX;(*!VvsD6tB-Bq*CExC>DJb-VE!eVt?_{vDjZ|LaeG*Kxud$u7s^v=ybRl zibbA>_@nyJ$)^$AQa&B_gbSeocSBjSr=cwA&>2cKg_EHy>GUuOF-;H}@IENsdJ4)5 ze}WQbe~0qnb~Ekr;jk_F94Iq)K{0(fly-|9?uW9pFF@(|b140N1=$2uxa%xCTxLO; z!4fBbz~M_y`Bx4T*p1|aqoI7r1N+1EP?qFL7zf{j9pTqd_Lp`y+TTxs(mn@b@vy2O zAtT=7R6Gyclm7=4r)qJNy@uC8*;O2V-1$}R^H<)!$obEL^1a`R9_WDV1F!?YX3ZH=4@MAa*rq8jr)^dlx zg))&YbM2+=4@Z&Dfu(R0tPGR5Y##SBcmj%-HfA&ue(BJFb6^?_!$f!#5{c?Oq;Hjw zrxgFx9sH4jyaQ#4dgR*|l1WfH+z7=Y&D^$=r@-(l6qJ#W4zD-uuv-8-k$>Fb@1dBm z7RmtH6xeIqA4>UkPTmK_t5?D#cmVc+zlHd zp$yM7UW(o@fxHV!*oI*iJPT#O>Nb0z1{6zP2W8(`cpLWL zL*gI>_e#Zldj@a8UgW=W@=0FX$wtF2l-~h6!)-7gJ_ftPmtZ#h8!U#y`O_3W2z$fB zuqS-O;lIKp*c((2pZ$SFkU^{6P)2%%-=1*>WQ?i+vh3<*C^H^dVt;Td6w57yDX{72S<~f#jKpp0ZNJEhMroKG%|Z z6CH3$?z&iGCor(W@vY}`{x)~geJ!&7calg)Vy-%?&^{jDCw&54h8{*@{vGH7`Vzg2 zwj+6x(L?BRG!MxWLK@wN{*GFq|3$KQV14W9M*r#~Co=;sLEoTf(LyATM1nk@*|7ZO zO49AnuhBJ5x!`LjEzT=X1v1b|B#yZb#iF(-ill#aPX6%hNAIEm=nC{4a-;g^4ibMt zlhFor9zBS%P*-#>YL6~`o+dL0eT>ec-=XV~JaN+hFcLe_E$C|02FWwqhFSq%MYp13 z=r?Etk|zg!fo_lt&qlNly@eW~chGSZM5j;*l4mM<0=1O+7m;`o$#WPDwo>+=Az&?% z5GqGrB+oz_Kl;0ilHa0P=>8wnZX!PojYhvjCs7Zv|2FhC+KZMVc@CogL5C#6^8o6P zZbnT|E?SOuqx$DY62C`(a0-3}UqYvyd_A6V5@*J^Y z{rwAnyP#2M0a}ez@*Qc5la0C+>uSy&iEdq0edc)WiOR^6%d1bWiyXc$vh&bSw9v0y z-@a;Zrzc~Yy3M@OLOs9BoYX;S1B{x@$Er`SkM7=Befs{o&D(1$D^rZwBATDB1d=CR&I$t zx;MJzC`IXdZ+wbwoN!G{hR#k%;;%R1TK*nQD2$8jKUVwr0mHa8L)~l`kt1uPt2aun zYEjpYF}ky{{uZMk-RS2w+~r2UenqCYX3O@- zhP9EiTOu1yZC9?VKKE43=0pAZ1%v*6UVov-7uj^K`pkajsh=}m4`*0& zdY~e@`#|K>@#y`JN4Fl8PODEIHuC+xg4A+XvA4l8QB$*HxltUP?w@b^=%o70fymlZ ztfb-gmzH~c2A`;XaCv0)b~;f`yPxSN{NHsH^2`tU=Qr%gXNF8yuzYTDY0#5zL|1P$ zd}guoyFI?Sp-_eqwdNRoY;Sbu-E|c^YM)w_!qoML-8x<7w9!v>o2YvxW$G7`CN{3E zJfJhjC+W|wZlVWX{g%EnDP6DazE_X#F*dVTv1eiL6r%=X-Fvu~*R#MR7hQj-cHPG6 zb2}q?Nv{IabW7RBoi&f|bIO9IE0AB*TPO6mpkM5nF)F&bGP-sNri?zZq4vSmk?n`- zD(8kgn4hN9~C!j9z_mP3^gJ`u3C#==zSe53a1+w~0uJt~+9f zWMuvB$OHPVl$-SX{YE75rH!j=pWGB#y{&fdp#~$8n!5W}YX7K2?MaQJ>s6_#{5_L8 zP`f^@9x}N$jN1_hSO?o zTGFMpTX#ku-FInir#8{e$7{E3XjoRW^i=H=zi3c)#rL`Q1|NjJlPV$)uBlo5 zC>xdCxnqd4OQVMm@OJf?Gm(2_H<56P{HV0S5)VkzC(`@sf2Ft84-MR)-GkceeX@M^ z9c5XLHs~w4+zck`^r6{twxNHb^3I`S^~Rx1+xALwd)@9vhjxqXTa8Oh$*xU1esmIoIQs%oU|R&+VmF&VNl}^ZKe3Lr=YCOcRgWO2q0` z*R*VL@qoZo{%iU)`q`?)mStvMzAG4_52sP{5k17%=_8&VWiNAd&nbN0j=PBEgwbsW zqC0o9RCU`Px0lMUZD^^qWc}rc6MZT-)a`x3uE#?fG)YgxPb-Ev)Vq}A{*D^|2ON$$8^>^$C&z)G0!DLPCQm~Mn`w;R`&keU{BEJvtspASuI%K zx~xS0w$2`|7iV9iU&|&SvfH$7V4%VJv+Vw{(QU^ubb|__sO8vrUh1F|$6nEJJmLFRiwHB)Vr+-M;%MAD~;0@1yg^PwjfR_E1Ibktct20xc?< zn>9IEA06LUqUN3PeHt0v^qL7XuZ^re%4LkTAO>nS@3wDc7+!8%*2&EUi!;6E+@_i{ zJ2{*pE1$r8^^vIGm{6_@u6GUFzQ6kP%E-Dc7d=0+e=A>!tTeG`4R;`?T zd(60y-{{W`Etq0(hho<-4S#{*GPvj(WksgX2$tp*dqN?Puh4M$j0wSDshMJwc)9eL zhAR|uP6Hk>zy-nTV@c3jWOn*|>N^{x+w(9zHcU z_WK($gEfmwxRY@?4opqcuS`uCnpI@x&o^?*OtZwu4Y@+4L1Th1WCjXc`KB@2?GBj1 zV1~-gonqK6C(2dU$kpFWjnm_%P0)JUZOx2vE{``@m76;=Mh9kO>(eum!wXzqkDCd| zmDnh81sFJA#l}nv6Zwl>Ay2-`>n*oz@`Ky9Q9wi+<^Ixukr(in1gDlVph>=ff> zemEq<$gx*9-!hK8T3X_E;Xm@Cv!RN!NcQ4oU0<2s-Yum}hG8;6+cCHfm(}0U1EpT} zrAsd7|7}OfKi83SLzj+%GM`biI$XSG7tKnFbMD-ij@~HncuhTH)+GJRtYP}u*`2kK z-$dUxds>s}MV_D@uq?i9o+;k%uKz*iu4;N?lbEK{nSMxjn;qM&K%CU@+ZJKD7Vw+R z6SO?l$SY?Z^qQOYr%$rnxx`OM7+%kOqA1_*^_ux)x!FtE0?OnLpJLdzcHQIVnJrR` zNm4tJTAg!CV&)8Au(YJa9|#d*j(ZsXl8^*74pLrX)?bmWUv8GG6#D}vk>M@rrhT`( z*CdA_=$L8kb?Th=^)KcQ);G`X6zet%T&3O+t1PEv32TBc{)Kt*Np;aBf}~ zePLdXUhW#E!>-K}>|41L<&KbnV?lMD*PrL|8Vfvb({GtxqFT(0CoP}{ zk6#z(&D5b)SLwI%x(`xWe&Uf$!fW~p`5ePb!#wjnB_)@xiJIW^gggY@B1**-TLix^ z(mDBYx_kc2_*^R4iR`}dXqUCSs7dnEP4CYy&{NH>EnFp`(tzn!SY3?NOyzi8rw943)qVY4+BN9X{*CamFIL~{AE@8;H}9A)OYt*ps{MWXv!TivDjTe8 zfxXk&_Q>~_l>f|og(U;@9RKCb?05A|B|YR#91zhrP7EqH0^owJ~~p;Mw7LMwve_|Ki3JG_t=BVxq);uh?|?tUG|$ z!#U2mARMv0;?@bZW@Bfz#aiY4ZIEEgL6iVhNj# zsYW}$V;WYAbJsU9T=o&FCx<#Eh|_Ww7MjwXb49dzlVd}V4t3Y(LdiN1O3>p%iKIt% z{^4iXF|8xXQ&82TG(P6~`u|uk#0*$AKi?nVKVAxD8D$ujbpob+npr0p*A2F3m$%_M zr6oTwYC%lqbY7|VTVH8$9*af{u=xaJ-MBniqn9lxJ?%%u`mVCU`m%*_t(_3HLe_L^ zcX>DcVtJPS=8opEvXfY`rDjMxxy=wa2ML6*Rgf&_wRIvmA!hkXe*K2@|F~S$|June z#f334KMV|0?oyl#OwPJ{(MPG*ZLL|Jtdz{I=>j&qiF(SNNjme+igsD{y~S04vFv+> zK6_`RkJv{JDMK!m+}`9J$>xRvroD$d%blV3EKa#nO*6gR8r=0xkfD?B8lEuDvZj1BlRbD4QkzBeFp2yaFQMw?hxCcctDzx5&87oa4(${?xMrt z%YXRd$?)(#sm2uhc+9|k7oYZbqgxRl8crBowNyDCQN!4=6vQtMCD|MW_+ zi#;~Tc_G&|{atuSry|1E%LFCFotU?mKtEftdYW7nu?SVN-_^&Q{oAQwof9BGRdw-K zlvL_9OWxA2E=}t((>hT(o{ah*OW&7F_k_IG%~{7R>#Juk%k4f9)0Yy91fq4fZ(!ME z=l9aX%euD?1Ij;-xX$2wi6)%I4K_nebyN2;C9xO8sk`+I)RIeD`Y?Y*DRy?>lj ze|+C(-}mQ9e{<6+rUIq`s z%i){wO86cWr+3NL^c;dx(po#iNaJ?dO|3oL~a?}L^Mufw;k`l~mn{y-@9CP49T5gZOz!cp+2uphkI=Qx+Z5ik!< zgkt{<_#k`-UJS1yldgcbLCLq_kgRiFu==k+N!MN|?!OD~hChZ<9=&c-{XUk1;rZx4 z041G|L&>LSq2%*itFD3RsDEJ9pIcsjv*VBiCl!jjC!nM^03}`Pton6p{zo_n{azHx z6Yw5*7A&<~VYwRepRbMNEDrahdy{! zsiW)EJ%?$-+@wpUWMY{2UhKq>b#$vm+)$}$_0bb=Q1bPwcWOIJfp?*v4<-MPS-uU)N~hJT zuci|d^tl#hX9$VD9OgiAw-DY7H^OJ(FX1FOlt`4r)lllu z%^AwapybaSDCyh|uY%u$kHYuhL$Kf7j`K8}4M)QtLdmbo?$LI0C!C7<1t|KzfUDrA z&v%T~nsx)pTBjU70^fw!!%H4e^E4>&oCu}f%z={c zo1wU`hti&Y4JBXCqOe4r3U7cD;XGIXpMClyYFi{aVuS5W$e_u(z@;)k_dhrn;3o&ukN2jR=` zd@7lgdnpwAN8wobE|hQwP&iWWN5bCl87S@Yc_{X`LP`IdkfL^6DDmt=rIz@Nh7zB# zQ0%=7#s5uE+@G@gmyIEuGf6vq5&hARE8l=pkA^*={q@sO;*$p@zZb)=!D@IlJP0p@ z{|K*x@4`9oQ+O?Wp1>u4%i)did+;LI0wsO_0VN&hky(A*g=fLLo>ty#`2dvqI0{aM z&%t@{M{twq(`XLE7I+0*KS|5&Ae8)g58e$wfs!A0d`tNVlzK50ivKTL?tnv3zXgZF zzrtKNbh7qmyP%}w9he5scvka29j2h34oAY(R{cwOJ?e9&C~t*SWoHx|1}kAE{5cf= zhECOd2|zL52*v%oP~!O+lzMXmj;@4RP}=iLmJ8uEsFzyqgyQ~nNY!HaF+U4 z4#oVC2l=8~4>W%Ow z)NerXCkmzBoI6ANj~k(>Cs5kSBbI(B;TJ;5hbnjjY=YAM-nHfz&D8pLI~kOwww>8{#9D__pSQ3R((m1n%@Sc+$O=>U>HigHp4sN2T;;` z6^%yhrNVpR!|++S1Wtl3EQb%|X+3(|@-uil`nLo%onzrusDtoPxD-AFe+KCSoa<)m zd~6Ps{CXWqyZIfQ3U7X1^%uicsCPjh%*faLngk_3vn&@tsSjJA_*W0FgTJ!;1olVW zJEZx1KOBI1hUID~=C51z@1c~(m0`6v45p*bfl^+Zp`>pYybpdC_J)6jQf|EqbUe8m zK7u*~CB7%1nEx6|JkNYV>&-1t@_h^xb3g0@mqE$bI;(yUO8q@^j`pWF!6#8yL5bIY zz!C6sDE&@8%SLPd zb4U|*{tPF;NiV9qIw=19v*qvMVASWlWb+3~xFL8mTn?pNYN7b^9-IgVeOuF43Z+H%lQsMPb{C^x? z4?}Pmd<9B8-+_|f{|O~N{|&|7fI{{EQ7G=KpyA-^HW zkB|YV4~hmqN&i#G#mF|~GGr(67V<9$L7SfsH(%lFCsw^1K8^eq`5yA0h{S0taxJnB zxdb_Z$nVbx*=T;l_;)FihMzxFE$0DvH}VluguI9J#a!}XE+W6jkPPHAM9Pus>ihz^ z5PQ9mH~E&|MaZqFPxY6nRR=E)$_xD zLvFJAgZTaxG7I@7avZr6`QOM)e16-wF>P%aMJE{H{a-$Uh*jAs-`WVmBLp2ibv4N8-P~#6Mt( z{ayh-@#weky&8Gfqt|M`{}GlW^Q>C>o!bfTMfg|b7QW@T38^q&^v|%+ehc!E+pKy! zJQp`-!e@|+toxzX{A$$FCv8LqBh|7lz z4A~a3kf3cAaWM+D!ddKi=-moK)#RMh}kOm_lW#%L>@(!Ab*FM{GL=eC*cSr z54pmcT?1c7U5Z?em>+u6_&5)_5cwk#Ma(b7=a#K$so&euT;;A>aO!wzOMO%8)*}gJ zbYVm6$fkrcvifLjZAC)qt|*C}SlOoBcp_3+j3@jX*;>MfTeqOKdK(|njfW$z91xxI z&F=QP-xV%&{?CHOx5(-fZpqr%);*_=muO5*9bcM=Oy@XwgN~ISb1N!`2XX=;(G82$ z9g$L#33CGBX)`s}?KH#w{0>&14i-!($ntmgBQT@0o?BNDZQRzc1Lg4#)`V(O%2_$FlqbFf#mr*+XD0*N&@Pf9gf&I4mdyGx4P zng+a&99-z`I2b#;lSqh?axdE!sjQn849*Jp-4*5Tk}~2fZX#7>?y{{d^<{2FZFKb> z>O}Nlle?nq)bR~3rOkInj2Z1#ER3wFv+9Q@Oc)zob~IABAi8Bi>+1Ef+FDGUHf_}# z;B)J$3E`np6TH#M@ZskGpW2Dl?uslbLTk|cPVTM6L`YrR(l`z9O&m8yJ_q5o1{>MG z)kZm8{nikTug=5%oSfhQ-;=@ooa`H&@#CL}Y%h##-WgfCk0fb`k@B5x#df!<&aE!A z2~XIIu5Y5~PiIeUcnhGlShD-n@dXl`*6o(Y(rD9RLhK@}3FEa|XaTE78u#`lv7<#) zCu&zAaac-Bcp3L{GkBUMXVSDhd> zt&5t)n?v+SQoqEZMj`Gnp~9gtttpL^tVo!~Rn}B%L?JD-buUeTPPK!(=+>H8 z&5nfc8f<4<2bQ`giW0UoN}X)QYEHJUt!dr1GTI=$x72u3rQ$8e1RXneh;(Z%NsIN( zlX^I3GPYwUR=Gtx-TKl<)ke*9x)tqA-Np*H;uu+}%{=bNG5tVX)twhQP`Eo9y7Of! zm18Q^;sD|KrqX~!Q5N05i@@EL&2$5iJzL%56;2lYvxqNJQt6WqN4pYi#}+lW)Hg<7 zZKUgvl+)Fn*hpoHtSJ#+)t)b&71Wf@^MEBJ)zi{xCvDSnce!-|1WQ}sJGshpu3YzaZS#fQweVj|~t5-&63S=Tus zZWx($tdE!srn*K77wA07xa??dW>{UB#C9*CU(k+~#I);JZ9qCBa*w?h+qTa=SP@&@ z+?~aZCt5cyu`$|sBD%D=dw-ceszohlJ8>aIGm`rHxur$!ihaJ9(&y9S+a{ig9%J-K z-%Eq%_m?Eu;_1@Kq?V?g%#jGqt=ZJNzKBt`wYY*Zb&HowmSmWzDm_r#zYi)7IUj7; z;(SoG+Ofl%87~I-w6>BBT7vlDBXDbp*%c#(TiV#V_Am|HtzSe<(nLupTaMF75zq9r z^z@XpLHze^J_qY37W!u9jT)qmb~aC}mg(p4!aEc17tHhb$GLYp-fwm=6z)I3 z=be-H%L&Z#qvxHN_nQ{X$?;DUtJ;%$=gR$Z19SVwllJ98COVyimOSt*3gTf0f_W1C zL?LL3Nja`N65U;AJodTA3q@z`QEH~P zEzQKpkp+?ZqR9F>`Z)BNhe|Cn{c?OPG+ll|H+HPnJ+W9Co2m6}3&1WWh*}}t%Bslz zZCa?^TJqM5t}HPMo5^mZwnADl^%*QQS_WTd9f z9ccb%l%Y1Yqlp+jQcKDv$*hXCq|k{K$_hkE$E>|akJJM2c`b_;v7QetSL^>5cVKeM#7X-*tzl9WjHso>Zp{YU8!jn|*O^YX zyR@n)3uH_0%Ct=SQuW&mm}*L|z(~^}GgLFOrMPw5I?WQBs95ddSWRPBmCM9P*LhtU zpJXhTg_SP7%)&~m-y}OZ);Zv$RI8NAXvX)J=5mJBMBCBEN_4vQP(AuW?wRFQE*z~4zQOpGc_e;rax!4-5$|7F$1B-*sZT| zm%ZxHS-A%b6ZKbi7j!0H*O$e}&fedl$sTO`Jm zTcqTe?diI(6z^fZSwOaZiC)+{4l}EWl+nOU*QnDaNBas3_heD))~3k1THT709gI!O zJDD9=>lW2_YsRXlrT(>c#=3OWi91zCmKxm8iOSOpZSvRP_uEPTQyqsbmeSEB^|`Im-EhTT8oQm#tdK)_IlJ(_vLDK5jFq1Xsg=zdk%u|x({Mi znaf-}zI$Xmb9xxWTr=l&$SUoxbS__9>Ydzb#kGBayJImUjO<70%1ZjS1;wn=B8w~B z9hK}jxl4}3tI~lLj41S@nl8E%@8Hh16uoRqa~bOvN{&6H?o+1of8BhFZ{m4o5q2+5 z>o04Yipt2*lFl8=jSjO3rY4c4yJ`DcxLW8`fJoJTqTm)CWWi_hLgwwJMp#$TlKptB zaf~O>c&7&55==~<+c}Mtu#Lg=`j}>itkq1zYi}t&qt_sOU3;bdgZeYw@6YbpmPYS% z_7ch5!XzRQo#6E8A%D1Q|GZ|#qi`VCZ@sZ=^+qPhd$sk`caw7M+9Z!yOzzmt@r79( zx9yokmQ_-%Wply1`K#S{qQBMMQT0WlYPKkKXWHr3svY)vw5yv>?W|Q)#7@>ZJ*+G~k2RB6N6SFgAk}Ma*?4>P z&c?bqZ!3W{Hv6fO>XU4BMQVy-CPXFUEHCz9&+9t*l<{a?@2i=_0Ej!7jI)-_f zy0kW}V;lXBw6s}SWQxIet}8NW-p=7vyiPKmTd_;$ z`#R~19Kd#&BlEntf$p!_IbnAOl4%vSOwdVV-+BH}-`^E3=$jYp+m%l`l6T>gG58Wb z#fz|0WYyry_~cEt$$2>@wi#1W$4oi8e1D>wV6)Ap>x)?JAsQ@lw44)*BEr$BcjxKL zspFdo`9yR_qg!iEpR4xUHHGR%n<|)qtCpT?TI;de1F=1YL`7?pbZ&iLoH=F0;}n#q z46yoXNGfgCQr)F%%3&og+MUJAD--8?@dbenoLTatP4QL?85BU zPCq3nc6T0YkpCWXL}wm-Dtz{G?q6823o2yK^nZZFVIW)V&(EnY8z% z)4`b>5VF?QwV0kmn~_F6Xy9Dho^4A27HRJez=yy#%$QoTPWZn@`sp z*;HkBLAkP_cdJh8qGY`;cQ|QjX()>BIV78O=B84-A26PncuWK+H}B)9@xmb%ex4)6)S#2Lq@%(DjmPM2FZ#vc5)N3rr|rM zF=OOzXRNDjJ;>_Tple6E-0T>hdYCX1r`_FK>b`nw#b!EVPiU5M4e%H@&t&aZbf|SL zGucA2hl-k5w09dyqNe`sp6X<6gkA#m^!FxAT2aGNT?oa`VNG?BRC~CwIQb_TaCBr;M=wS9~?MiyYkqNero~)NKZ7Z5_|9b^dH}Qo9G=BjcZV++Om; zfmsYuX?5DB6Wr6>PQs?nd1EJ6yBjx2WnNdC=(A)dp_w*yx}O_ny7rM9Cah1h{lUQop*Td5gISw~(z^u~xl^qSSkt ziKZp@HnpSCnvkeQqG`4+bstx|9lcUYUGl|e^wJy%Q138k#!C3+(rMc=EnV!WrY`Tb zn-}}^{4sOYlNi#i`o;u8VcGv%QNpT5YOVL?wby3%mP}vSAvu~BZz^b$ZLyXawz-C3 z>qc9s%(ArewVj{2-pz+}Rf3^uFczI@*-I z*Y5S)Yu{_Nou}#}^8k0rdM#>43rV{Na}7Rmy)aG6!`wz^nqW`nJ$nLd;V_r8?j=i1 zAtyQzX$+#vIPT~a&LYv=RM)#(?PE~nVu?Dl)^#4(#c<5ro!1#s!X_dTS+gdl5k*%Y zY-uc)J?G7vb^C=!7V)y<*5?*(J(6FEnq!Nq(|Vw*P(7BqFdmy9e2&X&DWkJfzUj{o zagi*;H$6QioS&H&%E=7-dyVtY4(5kb9t+I~WTy--m=Q{u5X|t6dhCJGWA1-o`_(seO-}pf8?3}># zfImB7Bg1Dl=z2Xi`ms@MNg6aTtyla8@q~G^iDB43H=H^o|19>wtd_%JW;a+2N3i2~^QXUNE=Y}$Td9#f&bnl?MefoKC z+TFb#^5^;U$qBM-UWRXYW|lw4mp*XNV50F*PD7ipu0WrQ|!MFZfQ0DUG|c*|f6C=$cZQy(AtX z*&B0asn$jJov>#Y)Tgm$*XedOAhWgj%qsabZRKv6A_tbDpW%98Se^Vj(SSk5fN!~ljpqqTG zq&Ka)d{EVPm5g=u>7PL*A1igZ?-xlvR^kq>Uf@eUR`TAR>hV>PT9${u-p2$?{ymS` z^<9g!Bu$&8X<9o@owP~a-_M@+W)!<|n*7qHaq>&Q=bbyF87-Kk z%OCyBvyZ-W-`(#$_ug~wyS#D9hwCk#-yzosKWbTD{D5Ws^g?-SS@va?H5QJApM^8v zv#`MME%+(iPhD@8B z?;HLceg^l2Bq~Ll0`Gt`U=~~ruZJDR|EKVL-0#3o!`~WSOX6d2-wCgS3!vopykR4h z^0&iL@Vijb`Jv&PhQELldH?^R2d{%tpBoIPLrM23NY>VKP|CH(@U(gVx^ce; zW!zmxXGpluK}mNelzQF|ZCDCvBCEmpcR^Ig`Z}Bn{}IyU)@UM0zEdGpv%Un$!YVgB zWcZ5VZwzfZN80mVDCs<7xD85uyA6K;r9FOT{H+^xUidVWdQXGWZ(oKIt{xK9Is$1r z>vbsg{5?dqtt%!dr@~3N=Rv9eYAE417#@dGuOCB6_xHv6M+yzBH zCr;LWTVz-TrM?|d>iY_mcKSJ-1o3p|syBDCIc_WxRdEy#Fzja{b=;Uqj_3-px?-=W!_d zR1VAF0eA_tKd)mv~werc&DI@lh+_kXZ-?x3VxQ!=Sp}x zyb3-BnOdw8h-z64Fb94UO1-W`Iiy{$GrS*4doG7xgiX+fKZlRQ3sDj&UjdYGFTi`? zDL5DY0bT*;-md*n08uULc_`sqAyu+|2qpXlD1o%s0x0Ew9$xlA>ItQvTJO?$ufk_> z|Eu9wP;LqT4!jheN9SAyuZB|p$x!;`J}B+*6ubbI!E0e9Tm}z7sm~QNHNCr_jeEY~ zSK()H?}XCd-+&a&dKIG5*2N4e(Vv-6{GW!RKhMHX!)n7eI0pA?@H+TQDCNI|!6KXh zrTnwtDEMV~KFl{PHCzoD($*$;J^VUE^(@!8$Iuz_ekPP~PeVzk5i*rp2jRu=71#>j zH2yCzI7!0V4KIRGDE;^wDEf8LY}LD&Fdz3q!|y;TcjkTCF0-KIw-%ye))wP_#qbUI zN#4H$rM>eFTXW5#{)0~+s6$P{6H34RFH8g4e+ z3O~#H9dIH1I@|=uKB(iq9X^lS_mI~67(}(Kzl5KIADg4~p8};`bD-3FBfK0oL+Qs( z$dI;v0E^%!P;zO%GB_R{gFg5phzeOBVUS6@`wa6SUe?!O2K-x?3;zvD{hxSP^{5hx z9v*}eKMJM2J}^%?7E1ZMvip!DZ_^ zao500aUU}7UMT7Q2ueNQfHwTtBRUSoL-BtAUIXXBsc;z_3lBg^|2t6h_n)EEfApiu zTMZW*ZZvF%(w;wolFr8;Q{D)rz7HDu;bpj=gA#5tlzHJKlzP7crQhC#5^fxmo%H+d z@CF!w(jIkC=8waM-+_~GyHM(P`4bxcTEm%8>a`R~x^>2V0HPw+_uv=c)nC%}`w=Ml zt~KuC#{H&opFiJ}3rfERAXBq-6f&h){qSZu>C2km5_l!q*tm zZ$Rm{UmIRWpikkR4X=fZ;TX6Q&Vsuks$xYUO=jJ?K-c%p(8m2!cnADFTnMM}$A&M$ zY-+Uk+A9Drgsb3a_yV-y0eBt!Hk5Xbn)hS;T8=xQ zwBu9oYFG)S9Q%y_UmO3oq16AfCAz-91f`uPRscrF^f#tKiSz zXW&N)bbTKW8G6>EQ0jFA-Uwedd;@+K_o#rb?~|a7dnH7Ltvzr&d=E;v>z~&3{XQu3 z(hE@f=PfAx@E#QXzhtR~pAJQz^Pz-qhSFZYgi`)X3YnuI-D3S5agY;;=;AtL79#md z-Q}rQSZ~2^BOfyER(P-ZE#;6r<(YuYMo>HJ2gp5$wCkslJ|u|9^Gl@N`^DIVTaX51 zJ@O(V4@-n~#{0$if=`*>g4>ZFBm2d{^EE}{ugv|ZmiPRMzoX3W8x6k-vyf>>zj=27 zd=oi^d=xp1(7e`mW$b(hxz)TE{MP&)XIKtxWGylisYYb(y9jX+ zd3qJrKKOr-$;ii%Zz6d}{P{C}{1fsJQj5HT97JXyV~`h+(Ma<1bsRS#|BReQ{u;Rt zk>^U}7NiOJGV&SZLPVa&6;=g&4S5ndj(iun4UtE5`q#+);^3)6_8|X&d=Pm9=|DnA z7qSeI=ZnZmGqMVi=K%7T$RTm?>_sLa^N|lD^N=;jE+qau!H>T|{?5Gk7W`YJ z+qmQShWSx!_;%a_K8w7L zd;{+lnn-80b8MT_bZEp7?$+JT+D3Qh>i)($&bgfG`bg*DNNC{!m`Zf(Xcl1#d65+4gT*(ioZDs$? zJ^YC7IN`oJt+D1q@1CyIlDvC*eWEZ!%aELPtmc$c z+b}&)7!ZkWU!(CTl=|c>4}=#NXsJ_PhW#ajf<6%}%_&{vADTuWf9N}>tv1@dC(?Cb z7_Yt-r+RxLvuN)ociZMj_on_`?UAm!flOQ4oZkA}Wy?q*&%dZN->GbyR2a+;MLIW1 z`yN=;zjEF5NA7WJ>muFzW`{zhes^m}to2lX;~J;)K&*AYs*BUrF~7v0_oP#~yT5Ut zs6cnOTVLz0*%dj{?5=E`KS_M8n=&Wc(JjX#-8Jqm65P?hr7_mpI>APZ-1d5HtZ;Za z_R>w6H<`EYdOD`mFl(V)qy5$i4A3fgV)KzEO=&=OQbfde` z!R@?C4lwySYkF?TKQp+zxG_Io9mU~8CI>j8Il1+U0+aC{T+k%u4f{3qC& zoO`sHJb9HhViYX}bsI=nv-6U)?)4WI1}E5$1xpI^##-~{&2jgZySw(g>yHpcL$Y^qPJqXOkJK7pkd}O8n>p^ z*|XX?y~^qA)FyJ)S37;>jIRE}v>3`bC|D7%I(qZ~lb^Gxm(k=N zYIaW7T8o&{L~OUR!ImG^;$U!Tz)$Gd>fT6acl4!h2AE{XB+<9SZCdZvS4yfH&W@LW z4rz$3JM3&Y6zQpPPu9i`RyvJ`wBhX~S++0F&MUKhz5;*YGHSrwW0cm@OVW>=*%_^G zbZV=d)Adg6DNR1xmauE9BfV#s3Vgm$DClDX3lvj{NY6f58}fpsWr1R^{CQ;-IZ0WM zud-(Zi^Klnu88V}-4xxj%WXOw-L*eyPz^Vr1I3|mZhlE_k(X{D-zp4* z!-f6@%ySDC(Lmno!jeT+(Uj6#RR5`eIAne{9oBhG3SZkwL+a`f?b+<^+?5nZ6+d~* z6onWV{^D$}7TU#K9WE&ZpQP_UDGm78I3+0*k#?EA|&z!Ms56f^az7Hq+P3&FEEs z`F7NIf>(2|!pRnkR?oz<;yIzS;iZ>;Nxx!lR4=f6L zzn3qv%7Ud~mTu2k7G7k@OyzWMK`WDsDf(4e7oSL~&}l5J*t&Lal^7Bwt|ne$thTyv zJEvZb?K$EcuZ?Z#r5|fsn*l5{+?YmoKNiFT4t2~&3#PhBV?p@ z&^eUI+0oa(W34ICj=t#nbt9*X(Wn7+kQj4?*1;6j#h;lY(z$_I#x3u0)>b$zUH$cE zobCh6N3vp1rs5Q`+%4>Iha81*8MCG zPE~jR))UMMPUmX!(sD>c>^{xZNJd$iSy{f!$^8FSeooP!1jxuOEX-gmc59aI`f}9Q zRKvXTW>=C(wvopz3x>kuC)mk`^0>mlQa|sKt>tlxgN23u#S&C))p13E6%x-gjEV7# z7o+A-tlHRmQJx{o|?7e@Q8m&1|p4 ze_GBIXJ`XSuO#g!5-r60q$ZN6No_>_W`iS^tXF?EKX~tMX?3esiiX9Ps{Zoz{morW zKYg2>iv7&BiRB6dE84V?N!Z!9-RW#`HoP=el4e?W>KL9KPUTML;6|sa!Fi!dr|RS> zfuJV{l+Z_~rOK&5y~SF>uSc3~+sWubu?uBRIbAMqwsxSJyg}8qO76*(ZfAwt*v2s6 zJr<;NnzyHmFTr9A54_bq9x7<`eqgVbYQM>H?U{^jYb_&F)+=wrqgv$I$mmD4)6*Sm zX~BqNj?T)IB_z_bH(IwgF<_;On#yS+Tb8NA@j_J~vbY$RoO=lOI}fuY}%Cbx@rCo^4{5 zF&|{aw+R_0b)5>d?WM|0M^)>bmhEPuSX&{JkF>FuZgS{hgKDrqp7yIZ&1l-*YM{Qc z)-|z~?m<1`C9nI7!9HRJiP^6PzuHb-cg-)8t-Qab>=Qbp>PSr_ zK{cjg%~d)EhSTA53rC~(Ht6`dpcv5Sawz8v@Vj&;18?BsQ@f89Fl0p>cl zwcgSxdf3+VqHAGNRh>8_@cs!CXxM}|SBrJ#?djRQnWkuHa*vwjaZqla`C>7Qb+D>o zx2`JLPEpM>1IIa5#{|a2pd}nMk8>ozv=Qm8#)?7X+4m1yMzzeaQ8-_6J1U2cyRjC_ z0c(r9{ve}MH7L^4!|{kreoaS7*r_;<@!^$P4rV-06DwMIl!TKTl2N1zk`ltLWOIof zbV}cFV)A~Kdu%@?)^4VkWR)BG7UdXNk?@^>|Y&{gdo8d3)!f*j-+V2?LZZ zuC92xgnOyA*At9ZexE>tPf>NxsHY1$^CTN9#)>h4T6>rzFp%S?b-J_C{T7vU+L&L( zNKlQsDKm5FB3e{C-PyNBOhBD2Qa4p{x~dx~J!&!S#mURI3a`2LI(0SizGgkDJD_%o z>U-k+9di`(ff8qw`9m3Rm9NYw4rW-gfDIQRMe)5D!v*!GUFn%~P1}obPHC$m{1Itu zgb~yBT3EOw}z z))<)kGM+71FykTACaJIEuU2OGix)gHPn1bA`x9f6G&Y*7t2& z;G`v4lR|;8-zNuvf&7UD;iAGP=jY{?EM4$SAXw-RKbdUQ&YzU*Gk?;PYUNtq9_^q` z3OX$^Q0;*x^99{b2` z+UV479L_9aslg7@X_Glo=eFGC{v!VZJ{=9~TUs3WO6d@9X>IQ_$dFVxDbYp5JNq)T z3CB6!#!FV<QK{e0-1E=0JPRO5M#6f!~{*_CIzH_q5*}oOxa$+u@ z7C0T%vl3lAy(LK#=#r0%`UOccsHx<8H3_Q94X(lNE;dG~OX9#$pfRvLEl8abGnpp5 zsCtZ~-$LZLfsMlUHG0%ZR%~MVu)%7jz0Y!`MkYU!kFT8OBOEbln8Y!u?BDcUL48#h zdkLal5l}9f_Jx6TFr^wwXoNmnV)`Q?`~VfxHSB1H#)>m^5^m zNHq^N;vk!SP||1rls!<+vxn0U4dxSTT@5uH`K(4hi@n&SizUjek<0Rz{`|>nz?WLw9OQNZq;U(c<7#lKoPWU|X1+n}$<+e3-%v z@=i%N107nB5x{7jKc0ww+>!4>@S=jF?b;4*(nu1u@mY|j#4 zjsU&AET7lKD6VMl--?Y;PP?Pu3Hx)zGO0HI4;Dd}!x`RJ^`vRfD_TvsrVuY1TE4qdE zPJr$E=FNGC(-6Hxv)w!7M`v|`kDb}#?08W$bX#j;B9apXt=+;Uem+AlC=~-xzVBht z=lO%71&hiSn9vJ#g0gg<5^L$xFCYVkyV>gy;8bEzux2t*y6ZP{jF>z+bev<0p&uzVt^tHk<}-uJF(>1>x0a^yhX z#HW|Pn7fz?u>(@h3Oo~?%usHfso7&rDovi4IVyhHD5q>0B@FvlgeNU4%ncOZYA-IxEeZL<(;ms0<-27doD_74 zzr=TU@nX5ulAmqgvM3N9HMg*|B)8BvD_Bw#%C?J_dCt(Z$+z12b6V!Dqwet+vn+=F zG+9};Jw11kztGN_IC%=ic-&_^r2*y!{ds|~J!RsQiCOr}CSpM!ah*Dx1EI%y>;4 z2_|ktCr(2XH-6~S}IfBH&t`bsh%#LwyCboxqi z`bu*8O0xbl2k-W>ZWhy5lKEP|#|J$_PhUxn|NDjHqsjD@WbU@4o@eSe0PisPtaqK$ zSCZ3Ll4~1MZYFrQRORb{_fPIpF8C&gOJ7OmgU+Da%IPb~-e)fTw5k7aJAEZtznZ46 zBqv?x8tJ;Teoo+{-nm|PPJDkz`nD{WugzuNbGc9aKDnJs5}YazpZ`dgO%m6X(pQqx zSCWTZUgio)`bu*8N^<&2a{5Yg`bu&``bx4~qEEWbXX}mO^p)iFmE`o5 Date: Fri, 5 Apr 2024 00:35:01 +0800 Subject: [PATCH 162/164] Create __version__.py --- f2/utils/__version__.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 f2/utils/__version__.py diff --git a/f2/utils/__version__.py b/f2/utils/__version__.py new file mode 100644 index 0000000..db87905 --- /dev/null +++ b/f2/utils/__version__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +""" +@Description:__version__.py +@Date :2023/01/15 23:42:17 +@Author :JohnserfSeed +@version :0.0.1.5 +@License :(C)Copyright 2019-2022, Liugroup-NLPR-CASIA +@Github :https://github.com/johnserf-seed +@Mail :johnserf-seed@foxmail.com +------------------------------------------------- +Change Log : +2023/01/15 23:43:07 - Create __version__.py +------------------------------------------------- +""" + +_author = "JohnserfSeed" +_description_cn = "基于[red]异步[/red]的[green]全平台下载工具." +_description_en = "[yellow]Asynchronous based [/yellow]full-platform download tool." +_reponame = "f2" +_repourl = "https://github.com/Johnserf-Seed/f2" +_version = "0.0.1.5" + +__all__ = [ + "_author", + "_description_cn", + "_description_en", + "_reponame", + "_repourl", + "_version", +] From f85dcc29e526d66ff68504bc017d51407e950d92 Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Fri, 5 Apr 2024 00:37:17 +0800 Subject: [PATCH 163/164] =?UTF-8?q?release:=200.0.1.5=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## [0.0.1.5] - 2024-04-04 ### Added - 添加安全政策汇报 - 添加`run_app`时输出版本号 - 添加`douyin`用户收藏夹下载 - 添加`douyin`的`filter`对非法收藏夹名字符的处理 - 添加`douyin`用户音乐收藏下载 - 添加`douyin`音乐歌词json转lrc方法 - 添加`douyin`用户收藏音乐下载任务 - 添加`douyin`配置`--lyric` - 添加`f2 utils`的`get_cookie_from_browser`方法 - 添加`f2 utils`的`check_invalid_naming`方法 - 添加`f2 utils`的`merge_config`方法 - 添加`douyin`粉丝用户接口方法 - 添加`douyin`关注用户接口方法 - 添加`douyin`,`tiktok`数据过滤器的原始字段 - 添加对30位时间戳进行格式化 - 添加测试抖音原声歌词转换 - 添加获取抖音用户粉丝代码片段 - 添加获取抖音用户关注代码片段 - 添加`fetch`方法的`timeout`参数,避免请求过于频繁 - 添加`douyin`用户收藏夹代码片段 - 添加对丢失链接的重试逻辑 - 添加`自定义UA`生成`XBogus`参数 - 添加`douyin`,`tiktok`对`UserProfile`请求内容为空的报错 ### Changed - 修改`douyin`主页收藏模式为`collection` - 更正`douyin`文档`user-mix`方法 - 修改`F2`版本号输出 - 修改`douyin`,`tiktok`帮助信息 - 优化`douyin`,`tiktok`的`utils`中`msToken`,`ttwid`,`sec_user_id`,`aweme_id`,`webcast_id`,具体请求错误的输出 - 明确`douyin`,`tiktok`所有`fetch`函数返回为过滤器类型 - 更新了F2版本号的导入 - 优化`tiktok`的`handler`处理播放列表的逻辑 - 优化`douyin`,`tiktok`中对具体请求错误的输出 - 更正`douyin`,`tiktok`受`collects_id`类型导致的多次转换 - 更正`tiktok`的`handler`多种获取用户信息方法的参数 - 添加`base_downloader`对重命名文件时的异常处理 - 更新`_dl`的`head`请求`Content-Length`失效时调用`get`方法 - 更新`douyin`,`tiktok`接口文档代码片段 - 更新`douyin`,`tiktok`在`cli`中的`handler_auto_cookie`方法 - 更新`douyin`,`tiktok`在`cli`中的`handler_naming`方法 - 更新`douyin`,`tiktok`的`--mode`统一`choice`管理 - 更新`F2`帮助说明格式 - 统一了`douyin`关注粉丝用户的`total`字段 - 修改下载逻辑以提高性能 - 更新`douyin`,`tiktok`数据库字段(需要删除旧数据库或迁移) - 优化`douyin`,`tiktok`的`handler`模块注释表达与方法参数格式 - 重构了所有`handle`方法的调用 - 重构了所有`fetch`方法的返回类型 - 调整`douyin` `mix`作品在没有更多数据时提前`break` - 调整`tiktok`获取用户数据去除地区参数 - 优化在适当的位置`yield`作品数据 - 修改日志输出级别 - 重构数据库异常类 - 重构文件异常类 - 重构接口异常类 - 完善`i18n`消息 ### Deprecated - 弃用`douyin` `UserLiveFilter`的无用方法 - 弃用`douyin` `PostDetailFilter`的无用方法 ### Removed - 删除文档旧版本`-d`指令 - 移除`tiktok`的`post\detail`接口示例 - 删除无用的`__init__.py`文件 - 删除`douyin`,`tiktok`:`cli`下的`get_cookie_from_browser`方法 - 删除`example`示例 - 删除无用导入 - 删除`apps`中db模块的`aiosqlite`导入与错误处理 ### Fixed - 修复本地化服务 - 修复`douyin`关注用户数据过滤器`_to_list`方法的排除字段 - 修复`douyin`数据过滤器时间戳类型 ### Security - 更新`rich`版本到`13.7.1` - 更新`douyin`接口版本到`19.5.0` --- .github/ISSUE_TEMPLATE/bug-report.md | 2 +- docs/.vitepress/config.mts | 2 +- f2/__init__.py | 2 +- f2/helps.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index cd9d271..22b4ac8 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -30,7 +30,7 @@ A: - 操作系统: [e.g. Win10 x64 22H2 19045.4046] - 浏览器 [e.g. Edge 122.0.2365.52] - 终端 [e.g. WT 1.18.10301.0] - - F2版本 [e.g. 0.0.1.4] + - F2版本 [e.g. 0.0.1.5] diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 0fc351b..4fa7db7 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -6,7 +6,7 @@ const require = createRequire(import.meta.url) const pkg = require('vitepress/package.json') -const version = "v0.0.1.4-pw.1" +const version = "v0.0.1.5-pw.2" // https://vitepress.dev/reference/site-config export default defineConfig({ diff --git a/f2/__init__.py b/f2/__init__.py index 2e8867f..609517b 100644 --- a/f2/__init__.py +++ b/f2/__init__.py @@ -1,5 +1,5 @@ __author__ = "JohnserfSeed " -__version__ = "0.0.1.4" +__version__ = "0.0.1.5" __description_cn__ = "基于[red]异步[/red]的[green]全平台下载工具." __description_en__ = "[yellow]Asynchronous based [/yellow]full-platform download tool." __reponame__ = "f2" diff --git a/f2/helps.py b/f2/helps.py index 4563ca0..ece0bc3 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -4,7 +4,7 @@ @Description:helps.py @Date :2023/02/06 17:36:41 @Author :JohnserfSeed -@version :0.0.1.4 +@version :0.0.1.5 @License :Apache License 2.0 @Github :https://github.com/johnserf-seed @Mail :johnserf-seed@foxmail.com From df937cdce2342762db03195b1ab9ada1e383475e Mon Sep 17 00:00:00 2001 From: JohnserfSeed Date: Fri, 5 Apr 2024 00:42:03 +0800 Subject: [PATCH 164/164] Delete __version__.py --- f2/utils/__version__.py | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 f2/utils/__version__.py diff --git a/f2/utils/__version__.py b/f2/utils/__version__.py deleted file mode 100644 index db87905..0000000 --- a/f2/utils/__version__.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- -""" -@Description:__version__.py -@Date :2023/01/15 23:42:17 -@Author :JohnserfSeed -@version :0.0.1.5 -@License :(C)Copyright 2019-2022, Liugroup-NLPR-CASIA -@Github :https://github.com/johnserf-seed -@Mail :johnserf-seed@foxmail.com -------------------------------------------------- -Change Log : -2023/01/15 23:43:07 - Create __version__.py -------------------------------------------------- -""" - -_author = "JohnserfSeed" -_description_cn = "基于[red]异步[/red]的[green]全平台下载工具." -_description_en = "[yellow]Asynchronous based [/yellow]full-platform download tool." -_reponame = "f2" -_repourl = "https://github.com/Johnserf-Seed/f2" -_version = "0.0.1.5" - -__all__ = [ - "_author", - "_description_cn", - "_description_en", - "_reponame", - "_repourl", - "_version", -]