diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index cd9d271..22b4ac8 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -30,7 +30,7 @@ A: - 操作系统: [e.g. Win10 x64 22H2 19045.4046] - 浏览器 [e.g. Edge 122.0.2365.52] - 终端 [e.g. WT 1.18.10301.0] - - F2版本 [e.g. 0.0.1.4] + - F2版本 [e.g. 0.0.1.5] diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8c0cd88 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,295 @@ +# Changelog + +本项目的所有变更都将记录在此文件中。 +格式基于 [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)、 +本项目遵循 [Semantic Versioning](https://semver.org/spec/v2.0.0.html)。 + +## [Unreleased] + +- `0.0.1.6`版本中添加对`weibo`,`x`的支持 + +## [0.0.1.5] - 2024-04-04 + +### Added + +- 添加安全政策汇报 +- 添加`run_app`时输出版本号 +- 添加`douyin`用户收藏夹下载 +- 添加`douyin`的`filter`对非法收藏夹名字符的处理 +- 添加`douyin`用户音乐收藏下载 +- 添加`douyin`音乐歌词json转lrc方法 +- 添加`douyin`用户收藏音乐下载任务 +- 添加`douyin`配置`--lyric` +- 添加`f2 utils`的`get_cookie_from_browser`方法 +- 添加`f2 utils`的`check_invalid_naming`方法 +- 添加`f2 utils`的`merge_config`方法 +- 添加`douyin`粉丝用户接口方法 +- 添加`douyin`关注用户接口方法 +- 添加`douyin`,`tiktok`数据过滤器的原始字段 +- 添加对30位时间戳进行格式化 +- 添加测试抖音原声歌词转换 +- 添加获取抖音用户粉丝代码片段 +- 添加获取抖音用户关注代码片段 +- 添加`fetch`方法的`timeout`参数,避免请求过于频繁 +- 添加`douyin`用户收藏夹代码片段 +- 添加对丢失链接的重试逻辑 +- 添加`自定义UA`生成`XBogus`参数 +- 添加`douyin`,`tiktok`对`UserProfile`请求内容为空的报错 + +### Changed + +- 修改`douyin`主页收藏模式为`collection` +- 更正`douyin`文档`user-mix`方法 +- 修改`F2`版本号输出 +- 修改`douyin`,`tiktok`帮助信息 +- 优化`douyin`,`tiktok`的`utils`中`msToken`,`ttwid`,`sec_user_id`,`aweme_id`,`webcast_id`,具体请求错误的输出 +- 明确`douyin`,`tiktok`所有`fetch`函数返回为过滤器类型 +- 更新了F2版本号的导入 +- 优化`tiktok`的`handler`处理播放列表的逻辑 +- 优化`douyin`,`tiktok`中对具体请求错误的输出 +- 更正`douyin`,`tiktok`受`collects_id`类型导致的多次转换 +- 更正`tiktok`的`handler`多种获取用户信息方法的参数 +- 添加`base_downloader`对重命名文件时的异常处理 +- 更新`_dl`的`head`请求`Content-Length`失效时调用`get`方法 +- 更新`douyin`,`tiktok`接口文档代码片段 +- 更新`douyin`,`tiktok`在`cli`中的`handler_auto_cookie`方法 +- 更新`douyin`,`tiktok`在`cli`中的`handler_naming`方法 +- 更新`douyin`,`tiktok`的`--mode`统一`choice`管理 +- 更新`F2`帮助说明格式 +- 统一了`douyin`关注粉丝用户的`total`字段 +- 修改下载逻辑以提高性能 +- 更新`douyin`,`tiktok`数据库字段(需要删除旧数据库或迁移) +- 优化`douyin`,`tiktok`的`handler`模块注释表达与方法参数格式 +- 重构了所有`handle`方法的调用 +- 重构了所有`fetch`方法的返回类型 +- 调整`douyin` `mix`作品在没有更多数据时提前`break` +- 调整`tiktok`获取用户数据去除地区参数 +- 优化在适当的位置`yield`作品数据 +- 修改日志输出级别 +- 重构数据库异常类 +- 重构文件异常类 +- 重构接口异常类 +- 完善`i18n`消息 + +### Deprecated + +- 弃用`douyin` `UserLiveFilter`的无用方法 +- 弃用`douyin` `PostDetailFilter`的无用方法 + +### Removed + +- 删除文档旧版本`-d`指令 +- 移除`tiktok`的`post\detail`接口示例 +- 删除无用的`__init__.py`文件 +- 删除`douyin`,`tiktok`:`cli`下的`get_cookie_from_browser`方法 +- 删除`example`示例 +- 删除无用导入 +- 删除`apps`中db模块的`aiosqlite`导入与错误处理 + +### Fixed + +- 修复本地化服务 +- 修复`douyin`关注用户数据过滤器`_to_list`方法的排除字段 +- 修复`douyin`数据过滤器时间戳类型 + +### Security + +- 更新`rich`版本到`13.7.1` +- 更新`douyin`接口版本到`19.5.0` + + +## [0.0.1.4] - 2024-02-16 + +### Added + +- 添加`black`格式化白名单 +- 添加`douyin`,`tiktok`命令行对`--proxies`命令的支持 +- 添加`tiktok`数据库忽略字段 +- 添加文档QA页面 +- 添加`douyin`对`msToken`值验证 +- 添加写入配置文件时处理文件权限问题 +- 添加提取有效URL的错误类型 +- 添加`split_filename`方法处理不同系统下文件名长度 +- 添加`douyin`,`tiktok`:`cli`模块的`merge_config`方法 +- 添加了低频配置文件默认路径 +- 添加`split_filename`函数单元测试 +- 添加`base_downloader`模块日志堆栈错误输出 +- 添加`tiktok`的`get_secuid`方法对不支持地区的错误消息 +- 添加`douyin`,`tiktok`:`utils`模块对空urls列表的错误处理 +- 添加`douyin`,`tiktok`:`utils`模块对AwemeIdFetcher的连接失败处理 +- 添加`douyin`图集`aweme_id`测试链接 +- 添加文档`algolia`配置参数 +- 添加`douyin`,`tiktok`:`{aweme_id}`与`{uid}`的文件名模板 + +### Changed + +- 重写`douyin`,`tiktok` handler对`crawler`与`dl`的配置,提升性能 +- 将`dict`类型的`--proxies`添加默认值`None` +- 将配置文件中`url`设置为空,防止因为缺省出错 +- 对高低频配置合并时只合并非空值 +- 更新翻译模板 +- 调整`timestamp_2_str`方法的默认时间字符串格式 +- 将低频参数配置移入`F2`的`conf.yaml` +- 修改`tiktok`对`msToken`值验证 +- 修改`douyin`,`tiktok`的`TokenManager`里固定配置的读取方式 +- 改进 `douyin`,`tiktok` handler类的结构和清晰度 +- 更新方法签名,使用 `self` 替代 `cls` +- 在适当的情况下,用异步实例方法替代类方法 +- 更新`douyin`,`tiktok` `handler`类下的`fetch`用法 +- 修改`main`入口函数,实例化每个app的`handler`并传递给相应的方法 +- 更新`douyin`,`tiktok`的`get_or_add_user_data`方法,以处理`Filter`类型的数据 +- 更新`F2 -d`参数,现在需要指定`debug`模式 +- 更新`conf_manager`模块,添加了日志输出 +- 更新`douyin`接口文档`format-file-name`代码片段 +- 更新`douyin`,`tiktok`的`crawler`模块重新添加异步上下文管理器 +- 更新`douyin`,`tiktok`的`utils`模块捕获错误时显示具体类名 +- 更新了配置文件加载逻辑 +- 更新了日志输出 +- 更新`split_filename`方法适配双语种环境 +- 更新`douyin`,`tiktok`的`crawler`模块获取`response`的多种http请求方法 +- 修改`file_exceptions`模块,使输出更简洁 +- 修改`db_exceptions`模块,使输出更简洁 +- 修改`api_exceptions`模块,使输出更简洁 +- 更改`base_crawler`模块里的方法名称 +- 完善所有`APIConnectionError`的错误处理 +- 更新在无代理时配置默认值 +- 改进`douyin`的cli模块的`handler_sso_login`方法 +- 更新`douyin`,`tiktok`单元测试用例 +- 更新接口文档开发者代码片段 +- 修改`cli_console`进度条默认宽度 + +### Deprecated + +- 弃用`douyin`:`extract_desc_from_share_desc`方法 +- 弃用`douyin`:`get_request_sizes`方法 + +### Removed + +- 移除文档`reference`页面 +- 删除`douyin`:`VerifyFpManager`注释代码 +- 删除`douyin`: `cli`模块的英文注释 +- 移除`split_filename`方法的`desc_length_limit`参数 +- 删除`conf.yaml`中的代理值 +- 删除`base_crawler`模块选择随机代理的注释代码 +- 删除`base_downloader`模块中`_download_chunks`方法的`finally` +- 删除`F2 conf.yaml`中的代理值与无效值 +- 删除弃用接口测试 + +### Fixed + +- 修复部分自定义配置失效的问题 +- 修复接口缺失时间戳值导致的问题 +- 修复`get_or_add_user_data`中的`AttributeError`问题 +- 修复了非windows系统下创建长中文名文件出错的问题 +- 修复了`tiktok`文件名出错的问题 +- 修复了在更新配置时缺少自定义配置文件路径的问题 +- 修复`douyin`直播嵌套ts文件无法获取字节大小的问题 +- 修复`base_downloader`下载文件区块时未能正确捕获超时错误 +- 修复`cli`退出时`base_downloader`出现`UnboundLocalError`错误的问题 +- 修复`douyin`收藏作品下载错误的问题 +- 修复`douyin`,`tiktok`:`cli`的默认参数影响kwargs合并 +- 修正`douyin`的`utils`模块对`aweme_id`的处理 + +### Security + +- 依赖更新`pyyaml6.0 -> pyyaml6.0.1` + + +## [0.0.1.3] - 2024-01-07 + +### Added + +- 添加`douyin`,`tiktok`对`--interval`命令的支持 + +### Changed + +- 取消`bool`参数的默认值,防止配置文件与`cli`命令冲突 +- 调整日志控制台输出与级别 +- 修改默认与自定义配置读取与合并 +- 恢复`tiktok`接口模型的`msToken`值 +- 修改自定义文件名模板中作品创建时间的键名 +- 更新主配置文件格式 + + +## [0.0.1.2] - 2024-01-05 + +### Added + +- 添加依赖缺失时输出错误到日志 +- 使用`black`统一代码风格 +- 添加`douyin`单个作品(one)与`--sso-login`命令帮助 + +### Changed + +- `--auto-cookie`命令去掉`none`参数 +- 所有app的`--interval`命令参数改为`all` +- 完善`douyin`的`cli`帮助说明 +- 更新`F2`帮助说明 +- 完善`tiktok`的`cli`帮助说明 +- 修改代码片段高亮 +- 更新项目文档 +- 更新翻译文件 + +### Fixed + +- 修复`--init-config`命令初始化的问题 +- 修复`douyin`文档`user-live`代码片段错误方法名 +- 修复`douyin`文档`user-mix`代码片段`aweme_id`不明的问题 +- 修复`douyin`,`tiktok`未提供参数也自动获取ck +- 修复显示语言中`en_US`缺失 +- 修复接口文档的代码片段格式与错误 +- 使用缺省`none`来避免触发`callback`干预程序运行 + + +## [0.0.1.1] - 2024-01-01 + +### Added + +- 添加依赖缺失时输出错误到日志 + +### Fixed + +- 修复pyproject.toml依赖部分遗漏造成的`Error: No such command` + + +## [0.0.1-pw.1] - 2024-01-01 + +### Added + +- 创建文档 +- 添加`douyin`,`tiktok`应用 +- 添加`douyin`,`tiktok`测试 +- 添加代码示例 +- 添加`i18n`翻译模板文件 +- 添加`show_qrcode`方法,用于显示二维码 +- 添加`s_v_web_id`方法 +- `douyin`:添加`room_id`查询直播间信息接口 +- `douyin`:添加`--sso-login`命令,使用扫码获取cookie +- `douyin`:添加`sso登录`测试 +- 添加`douyin`,`tiktok`开发接口文档 +- 添加`douyin`,`tiktok`接口地址生成XB的方法 +- 添加`douyin`,`tiktok`接口文档代码片段 +- 创建目录时支持绝对与相对路径 +- 添加`douyin`,`tiktok`获取列表`secuid`,`unique_id`,`aweme_id`的方法 + +### Changed + +- 细化`Basecrwaler`的`response`处理方法 +- 自定义将日志输出到控制台 +- 将guide文档调整为统一文件夹下 +- 修改文档代码片段高亮行号 +- 重命名接口模型生成XB的方法 +- 修改`douyin`提取列表用户id返回值变量名 +- 修改`douyin`提取列表用户直播rid返回值变量名 +- 完善配置文件site-config部分 +- 修改默认配置参数置空 + +### Fixed + +- 修复`douyin`用户数据库名称 +- 修复`douyin`直播结束后无法下载 +- 修复`douyin`在`handler_user_mix`方法中`AsyncUserDB`只初始化一次 +- 修复`user-nickname`代码片段导入 +- 修复`douyin`文档`user-get-add`代码片段导入 +- 修复`tiktok`文档`user-mix`代码导入与缩进 +- 修复`tiktok`文档`one-video`代码缩进 diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 0fc351b..4fa7db7 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -6,7 +6,7 @@ const require = createRequire(import.meta.url) const pkg = require('vitepress/package.json') -const version = "v0.0.1.4-pw.1" +const version = "v0.0.1.5-pw.2" // https://vitepress.dev/reference/site-config export default defineConfig({ diff --git a/docs/guide/apps/douyin/index.md b/docs/guide/apps/douyin/index.md index 495f955..c95294e 100644 --- a/docs/guide/apps/douyin/index.md +++ b/docs/guide/apps/douyin/index.md @@ -15,7 +15,7 @@ outline: deep | 下载单个作品 | handle_one_video | | 下载用户发布作品 | handle_user_post | | 下载用户喜欢作品 | handle_user_like | -| 下载用户收藏作品 | handle_user_collect | +| 下载用户收藏作品 | handle_user_collection | | 下载用户合辑作品 | handle_user_mix | | 下载用户直播流 | handle_user_live | | 下载用户首页推荐作品 | handle_user_feed | @@ -25,7 +25,7 @@ outline: deep | 单个作品数据 | fetch_one_video | 🟢 | | 用户发布作品数据 | fetch_user_post_videos | 🟢 | | 用户喜欢作品数据 | fetch_user_like_videos | 🟢 | -| 用户收藏作品数据 | fetch_user_collect_videos | 🟢 | +| 用户收藏作品数据 | fetch_user_collection_videos | 🟢 | | 用户合辑作品数据 | fetch_user_mix_videos | 🟢 | | 用户直播流数据 | fetch_user_live_videos | 🟢 | | 用户直播流数据2 | fetch_user_live_videos_by_room_id | 🟢 | @@ -74,7 +74,7 @@ outline: deep | 用户信息接口地址 | DouyinCrawler | fetch_user_profile | 🟢 | | 主页作品接口地址 | DouyinCrawler | fetch_user_post | 🟢 | | 喜欢作品接口地址 | DouyinCrawler | fetch_user_like | 🟢 | -| 收藏作品接口地址 | DouyinCrawler | fetch_user_collect | 🟢 | +| 收藏作品接口地址 | DouyinCrawler | fetch_user_collection | 🟢 | | 合辑作品接口地址 | DouyinCrawler | fetch_user_mix | 🟢 | | 作品详情接口地址 | DouyinCrawler | fetch_post_detail | 🟢 | | 作品评论接口地址 | DouyinCrawler | fetch_post_comment | 🟡 | @@ -167,7 +167,7 @@ outline: deep | :--- | :--- | :--- | | aweme_data | dict | 视频数据字典,包含视频ID、视频文案、作者昵称、页码等 | -<<< @/snippets/douyin/user-collect.py{16-17,22-25} +<<< @/snippets/douyin/user-collection.py{16-17,22-25} ### 用户合辑作品数据 🟢 diff --git a/docs/snippets/douyin/format-file-name.py b/docs/snippets/douyin/format-file-name.py index 309be02..510cf80 100644 --- a/docs/snippets/douyin/format-file-name.py +++ b/docs/snippets/douyin/format-file-name.py @@ -16,7 +16,7 @@ async def main(): # 单作品的数据 aweme_data = await DouyinHandler(kwargs).fetch_one_video("7218193198328433954") # 格式化后的文件名 - print(format_file_name(kwargs.get("naming"), aweme_data) + "_video") + print(format_file_name(kwargs.get("naming"), aweme_data._to_dict()) + "_video") # 文件名模板 kwargs = { @@ -31,10 +31,15 @@ async def main(): # 用户自定义字段 custom_fields = {"location": "New York"} # 格式化后的自定义文件名 - print(format_file_name(kwargs.get("naming"), aweme_data, custom_fields) + "_video") + print( + format_file_name(kwargs.get("naming"), aweme_data._to_dict(), custom_fields) + + "_video" + ) # 格式化后的自定义文件名,长度限制在100 print( - format_file_name(kwargs.get("naming"), aweme_data, custom_fields, 100) + format_file_name( + kwargs.get("naming"), aweme_data._to_dict(), custom_fields, 100 + ) + "_video" ) diff --git a/docs/snippets/douyin/one-video.py b/docs/snippets/douyin/one-video.py index 6e664d2..a608f41 100644 --- a/docs/snippets/douyin/one-video.py +++ b/docs/snippets/douyin/one-video.py @@ -12,9 +12,11 @@ async def main(): - print(await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294994585925848359")) - print("-------------------") - print(await DouyinHandler(kwargs).fetch_one_video(aweme_id="7305827432509082913")) + video = await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294994585925848359") + print("=================_to_raw================") + print(video._to_raw()) + # print("=================_to_dict================") + # print(video._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-collect.py b/docs/snippets/douyin/user-collection.py similarity index 51% rename from docs/snippets/douyin/user-collect.py rename to docs/snippets/douyin/user-collection.py index d37064c..a296c22 100644 --- a/docs/snippets/douyin/user-collect.py +++ b/docs/snippets/douyin/user-collection.py @@ -8,23 +8,18 @@ }, "proxies": {"http": None, "https": None}, "cookie": "YOUR_COOKIE_HERE", + "timeout": 10, } async def main(): - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos() - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos( - 0, 10, 20 - ) - ] - print(results) + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collection_videos(): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-collects.py b/docs/snippets/douyin/user-collects.py new file mode 100644 index 0000000..fb609d1 --- /dev/null +++ b/docs/snippets/douyin/user-collects.py @@ -0,0 +1,29 @@ +import asyncio +from f2.apps.douyin.handler import DouyinHandler + +kwargs = { + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Referer": "https://www.douyin.com/", + }, + "proxies": {"http": None, "https": None}, + "timeout": 10, + "cookie": "YOUR_COOKIE_HERE", +} + + +async def main(): + collects_id = "" # 收藏夹ID + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collects_videos( + collects_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/snippets/douyin/user-follower.py b/docs/snippets/douyin/user-follower.py new file mode 100644 index 0000000..f0e6a66 --- /dev/null +++ b/docs/snippets/douyin/user-follower.py @@ -0,0 +1,49 @@ +import asyncio +from f2.log.logger import logger +from f2.apps.douyin.handler import DouyinHandler + +kwargs = { + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Referer": "https://www.douyin.com/", + }, + "proxies": { + "http": None, + "https": None, + }, + "timeout": 10, + "cookie": "YOUR_COOKIE_HERE", +} + + +async def main(): + sec_user_id = "" # 公开粉丝的账号 + # sec_user_id = "MS4wLjABAAAAGPm-wPeGQuziCu5z6KerQA7WmSTnS99c8lU8WLToB0BsN02mqbPxPuxwDjKf7udZ" # 隐私设置的账号 + # 根据max_time 和 min_time 区间获取用户粉丝列表 + async for follower in DouyinHandler(kwargs).fetch_user_follower( + sec_user_id=sec_user_id, + # max_time=1668606509, + # min_time=0, + ): + if follower.status_code != 0: + logger.erro("错误代码:{0} 错误消息:{1}").format( + follower.status_code, follower.status_msg + ) + else: + logger.info( + "用户ID:{0} 用户昵称:{1} 用户作品数:{2}".format( + follower.sec_uid, follower.nickname, follower.aweme_count + ) + ) + + # print("=================_to_raw================") + # print(follower._to_raw()) + # print("=================_to_dict===============") + # print(follower._to_dict()) + # print("=================_to_list===============") + # 数据量多的情况下_to_list这种数据结构比较慢 + # print(follower._to_list()) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/snippets/douyin/user-following.py b/docs/snippets/douyin/user-following.py new file mode 100644 index 0000000..9fa39f1 --- /dev/null +++ b/docs/snippets/douyin/user-following.py @@ -0,0 +1,49 @@ +import asyncio +from f2.log.logger import logger +from f2.apps.douyin.handler import DouyinHandler + +kwargs = { + "headers": { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", + "Referer": "https://www.douyin.com/", + }, + "proxies": { + "http": None, + "https": None, + }, + "timeout": 10, + "cookie": "YOUR_COOKIE_HERE", +} + + +async def main(): + sec_user_id = "" # 公开关注的账号 + # sec_user_id = "MS4wLjABAAAAGPm-wPeGQuziCu5z6KerQA7WmSTnS99c8lU8WLToB0BsN02mqbPxPuxwDjKf7udZ" # 隐私设置的账号 + async for following in DouyinHandler(kwargs).fetch_user_following( + sec_user_id=sec_user_id + ): + if following.status_code != 0: + logger.error_("错误代码:{0} 错误消息:{1}").format( + following.status_code, following.status_msg + ) + else: + logger.info( + "用户ID:{0} 用户昵称:{1} 用户作品数:{2} 额外内容:{3}".format( + following.sec_uid, + following.nickname, + following.aweme_count, + following.secondary_text, + ) + ) + + # print("=================_to_raw================") + # print(following._to_raw()) + # print("=================_to_dict===============") + # print(following._to_dict()) + # print("=================_to_list===============") + # 数据量多的情况下_to_list这种数据结构比较慢 + # print(following._to_list()) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/snippets/douyin/user-like.py b/docs/snippets/douyin/user-like.py index 9716a3d..e48775d 100644 --- a/docs/snippets/douyin/user-like.py +++ b/docs/snippets/douyin/user-like.py @@ -8,26 +8,22 @@ }, "proxies": {"http": None, "https": None}, "cookie": "YOUR_COOKIE_HERE", + "timeout": 10, } async def main(): - user_sec_id = "YOUR_HOME_PAGE" # 替换开放喜欢列表的sec_user_id - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( - user_sec_id - ) - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( - user_sec_id, 0, 10, 20 - ) - ] - print(results) + sec_user_id = "MS4wLjABAAAAW9FWcqS7RdQAWPd2AA5fL_ilmqsIFUCQ_Iym6Yh9_cUa6ZRqVLjVQSUjlHrfXY1Y" # 开放喜欢列表的sec_user_id + # sec_user_id = "MS4wLjABAAAAkA9Zsx7wNHUWse8xwUt9zzlAUfZ-7ZOBMbPzKhkDYEjUd-f4qS_DM6fNyxP_-9l2" # 未开放喜欢列表的sec_user_id + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_like_videos( + sec_user_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-live-room-id.py b/docs/snippets/douyin/user-live-room-id.py index 24ae1fb..5c427dc 100644 --- a/docs/snippets/douyin/user-live-room-id.py +++ b/docs/snippets/douyin/user-live-room-id.py @@ -12,11 +12,13 @@ async def main(): - print( - await DouyinHandler(kwargs).fetch_user_live_videos_by_room_id( - room_id="7318296342189919011" - ) + live = await DouyinHandler(kwargs).fetch_user_live_videos_by_room_id( + room_id="7318296342189919011" ) + print("=================_to_raw================") + print(live._to_raw()) + # print("=================_to_dict===============") + # print(live._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-live.py b/docs/snippets/douyin/user-live.py index 837e8db..dba3acf 100644 --- a/docs/snippets/douyin/user-live.py +++ b/docs/snippets/douyin/user-live.py @@ -12,7 +12,11 @@ async def main(): - print(await DouyinHandler(kwargs).fetch_user_live_videos(webcast_id="775841227732")) + live = await DouyinHandler(kwargs).fetch_user_live_videos(webcast_id="775841227732") + print("=================_to_raw================") + print(live._to_raw()) + # print("=================_to_dict===============") + # print(live._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-mix.py b/docs/snippets/douyin/user-mix.py index 049418a..8e25520 100644 --- a/docs/snippets/douyin/user-mix.py +++ b/docs/snippets/douyin/user-mix.py @@ -7,31 +7,24 @@ "Referer": "https://www.douyin.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } async def main(): - mix_id = ( - await DouyinHandler(kwargs) - .fetch_one_video(aweme_id="7294914031133969705") - .get("mix_id") - ) - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos( - mix_id - ) - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_collect_videos( - mix_id, 0, 10, 20 - ) - ] - print(results) + mix_id = await DouyinHandler(kwargs).fetch_one_video(aweme_id="7294914031133969705") + # mix_id 为PostDetailFilter对象 + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_mix_videos( + mix_id.mix_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-post.py b/docs/snippets/douyin/user-post.py index 1414141..f4130e5 100644 --- a/docs/snippets/douyin/user-post.py +++ b/docs/snippets/douyin/user-post.py @@ -7,27 +7,23 @@ "Referer": "https://www.douyin.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } async def main(): - user_sec_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE" - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( - user_sec_id - ) - ] - print(results) - print("-------------------") - results = [ - aweme_data_list - async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( - user_sec_id, 0, 10, 20 - ) - ] - print(results) + sec_user_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE" + + async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos( + sec_user_id, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/user-profile.py b/docs/snippets/douyin/user-profile.py index c6da736..5bfe045 100644 --- a/docs/snippets/douyin/user-profile.py +++ b/docs/snippets/douyin/user-profile.py @@ -13,7 +13,11 @@ async def main(): sec_user_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE" - print(await DouyinHandler(kwargs).handler_user_profile(sec_user_id=sec_user_id)) + user = await DouyinHandler(kwargs).handler_user_profile(sec_user_id=sec_user_id) + print("=================_to_raw================") + print(user._to_raw()) + # print("=================_to_dict===============") + # print(user._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/douyin/video-get-add.py b/docs/snippets/douyin/video-get-add.py index 5cae5fd..1829cbe 100644 --- a/docs/snippets/douyin/video-get-add.py +++ b/docs/snippets/douyin/video-get-add.py @@ -21,7 +21,7 @@ async def main(): ) async with AsyncVideoDB("douyin_videos.db") as avdb: await DouyinHandler(kwargs).get_or_add_video_data( - aweme_data, avdb, ignore_fields + aweme_data._to_dict(), avdb, ignore_fields ) diff --git a/docs/snippets/tiktok/format-file-name.py b/docs/snippets/tiktok/format-file-name.py index a55fb5b..7ad5620 100644 --- a/docs/snippets/tiktok/format-file-name.py +++ b/docs/snippets/tiktok/format-file-name.py @@ -17,15 +17,17 @@ async def main(): # 单作品的数据 aweme_data = await TiktokHandler(kwargs).fetch_one_video("7316948869764484384") # 格式化后的文件名 - print(format_file_name(kwargs.get("naming"), aweme_data) + "_video") + print(format_file_name(kwargs.get("naming"), aweme_data._to_dict()) + "_video") # 用户自定义字段 custom_fields = {"location": "New York"} # 格式化后的自定义文件名 - print((kwargs.get("naming"), aweme_data, custom_fields) + "_video") + print((kwargs.get("naming"), aweme_data._to_dict(), custom_fields) + "_video") # 格式化后的自定义文件名,长度限制在100 print( - format_file_name(kwargs.get("naming"), aweme_data, custom_fields, 100) + format_file_name( + kwargs.get("naming"), aweme_data._to_dict(), custom_fields, 100 + ) + "_video" ) diff --git a/docs/snippets/tiktok/one-video.py b/docs/snippets/tiktok/one-video.py index 0f93a9a..fb264d2 100644 --- a/docs/snippets/tiktok/one-video.py +++ b/docs/snippets/tiktok/one-video.py @@ -12,9 +12,13 @@ async def main(): - print(await TiktokHandler(kwargs).fetch_one_video(itemId="7095819783324601605")) - print("-------------------") - print(await TiktokHandler(kwargs).fetch_one_video(itemId="7305827432509082913")) + video = await TiktokHandler(kwargs).fetch_one_video(itemId="7095819783324601605") + print("=================_to_raw================") + print(video._to_raw()) + # print("=================_to_dict================") + # print(video._to_dict()) + # print("=================_to_list================") + # print(video._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-collect.py b/docs/snippets/tiktok/user-collect.py index e23814c..effeac2 100644 --- a/docs/snippets/tiktok/user-collect.py +++ b/docs/snippets/tiktok/user-collect.py @@ -8,6 +8,7 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -16,14 +17,16 @@ async def main(): secUid = await SecUserIdFetcher.get_secuid( "YOUR_HOME_PAGE" ) # 替换开放收藏列表的用户主页 - print( - [ - aweme_data_list - async for aweme_data_list in TiktokHandler( - kwargs - ).fetch_user_collect_videos(secUid) - ] - ) + + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_collect_videos( + secUid, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-like.py b/docs/snippets/tiktok/user-like.py index 8e9bd2e..4517fb6 100644 --- a/docs/snippets/tiktok/user-like.py +++ b/docs/snippets/tiktok/user-like.py @@ -8,6 +8,7 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -16,14 +17,16 @@ async def main(): secUid = await SecUserIdFetcher.get_secuid( "YOUR_HOME_PAGE" ) # 替换开放喜欢列表的用户主页 - print( - [ - aweme_data_list - async for aweme_data_list in TiktokHandler(kwargs).fetch_user_like_videos( - secUid - ) - ] - ) + + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_like_videos( + secUid, 0, 10, 20 + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-mix.py b/docs/snippets/tiktok/user-mix.py index fd5a7f2..bf486de 100644 --- a/docs/snippets/tiktok/user-mix.py +++ b/docs/snippets/tiktok/user-mix.py @@ -10,6 +10,7 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -19,9 +20,13 @@ async def main(): playlist = await TiktokHandler(kwargs).fetch_play_list(secUid) for mixId in playlist.get("mixId", []): - print([ - aweme_data_list async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId) - ]) + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": asyncio.run(main()) @@ -39,6 +44,7 @@ async def main(): "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } @@ -51,9 +57,12 @@ async def main(): if selected_index != 0: # [!code focus] mixId = playlist.get("mixId", [])[selected_index - 1] # [!code focus] - print([ - aweme_data_list async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId) - ]) + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_mix_videos(mixId): + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": asyncio.run(main()) diff --git a/docs/snippets/tiktok/user-playlist.py b/docs/snippets/tiktok/user-playlist.py index cae900f..8418e7c 100644 --- a/docs/snippets/tiktok/user-playlist.py +++ b/docs/snippets/tiktok/user-playlist.py @@ -14,7 +14,11 @@ async def main(): secUid = await SecUserIdFetcher.get_secuid("https://www.tiktok.com/@vantoan___") - print(await TiktokHandler(kwargs).fetch_play_list(secUid, 0, 30)) + playlist = await TiktokHandler(kwargs).fetch_play_list(secUid, 0, 30) + print("=================_to_raw================") + print(playlist._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-post.py b/docs/snippets/tiktok/user-post.py index 3ef4a7e..38cb3dc 100644 --- a/docs/snippets/tiktok/user-post.py +++ b/docs/snippets/tiktok/user-post.py @@ -8,20 +8,23 @@ "Referer": "https://www.tiktok.com/", }, "proxies": {"http": None, "https": None}, + "timeout": 10, "cookie": "YOUR_COOKIE_HERE", } async def main(): secUid = await SecUserIdFetcher.get_secuid("https://www.tiktok.com/@vantoan___") - print( - [ - aweme_data_list - async for aweme_data_list in TiktokHandler(kwargs).fetch_user_post_videos( - secUid, 0, 30, 0 - ) - ] - ) + + async for aweme_data_list in TiktokHandler(kwargs).fetch_user_post_videos( + secUid, 0, 30, None + ): + print("=================_to_raw================") + print(aweme_data_list._to_raw()) + # print("=================_to_dict===============") + # print(aweme_data_list._to_dict()) + # print("=================_to_list===============") + # print(aweme_data_list._to_list()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/user-profile.py b/docs/snippets/tiktok/user-profile.py index d98b6d7..5819a3e 100644 --- a/docs/snippets/tiktok/user-profile.py +++ b/docs/snippets/tiktok/user-profile.py @@ -16,9 +16,17 @@ async def main(): "MS4wLjABAAAAQhcYf_TjRKUku-aF8oqngAfzrYksgGLRz8CKMciBFdfR54HQu3qGs-WoJ-KO7hO8" ) uniqueId = "vantoan___" - print(await TiktokHandler(kwargs).handler_user_profile(secUid=secUid)) - print("-------------------") - print(await TiktokHandler(kwargs).handler_user_profile(uniqueId=uniqueId)) + user = await TiktokHandler(kwargs).handler_user_profile(secUid=secUid) + print("=================_to_raw================") + print(user._to_raw()) + # print("=================_to_dict===============") + # print(user._to_dict()) + + user = await TiktokHandler(kwargs).handler_user_profile(uniqueId=uniqueId) + print("=================_to_raw================") + print(user._to_raw()) + # print("=================_to_dict===============") + # print(user._to_dict()) if __name__ == "__main__": diff --git a/docs/snippets/tiktok/video-get-add.py b/docs/snippets/tiktok/video-get-add.py index abf1b5b..34c6d33 100644 --- a/docs/snippets/tiktok/video-get-add.py +++ b/docs/snippets/tiktok/video-get-add.py @@ -21,7 +21,7 @@ async def main(): ) async with AsyncVideoDB("tiktok_videos.db") as avdb: await TiktokHandler(kwargs).get_or_add_video_data( - aweme_data, avdb, ignore_fields + aweme_data._to_dict(), avdb, ignore_fields ) diff --git a/examples/douyin/VerifyFp.py b/examples/douyin/VerifyFp.py deleted file mode 100644 index 57ece4b..0000000 --- a/examples/douyin/VerifyFp.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.douyin.utils import VerifyFpManager - -if __name__ == "__main__": - print("verify_fp:", VerifyFpManager.gen_verify_fp()) diff --git a/examples/douyin/handler_user_post.py b/examples/douyin/handler_user_post.py deleted file mode 100644 index be69b3a..0000000 --- a/examples/douyin/handler_user_post.py +++ /dev/null @@ -1,32 +0,0 @@ -import asyncio -from f2.apps.douyin.model import UserPost -from f2.apps.douyin.filter import UserPostFilter -from f2.apps.douyin.crawler import DouyinCrawler - -kwargs = { - "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", - "Referer": "https://www.douyin.com/", - }, - "proxies": {"http": None, "https": None}, - "cookie": "YOUR_COOKIE_HERE", -} - - -async def test_user_post_fetcher(): - async with DouyinCrawler(kwargs) as crawler: - params = UserPost( - max_cursor=0, - count=5, - sec_user_id="MS4wLjABAAAAu8qwDm1-muGuMhZZ-tVzyPVWlUxIbQRNJN_9k83OhWU", - ) - response = await crawler.fetch_user_post(params) - - video = UserPostFilter(response) - print( - f"作者:{video.nickname[0]}, 所有作品id:{video.aweme_id}, 每个作品的码率{video.video_bit_rate}" - ) - - -if __name__ == "__main__": - asyncio.run(test_user_post_fetcher()) diff --git a/examples/douyin/msToken.py b/examples/douyin/msToken.py deleted file mode 100644 index aa60a24..0000000 --- a/examples/douyin/msToken.py +++ /dev/null @@ -1,5 +0,0 @@ -from f2.apps.douyin.utils import TokenManager - -if __name__ == '__main__': - print('douyin real msToken:', TokenManager.gen_real_msToken()) - print('douyin fake msToken:', TokenManager.gen_false_msToken()) \ No newline at end of file diff --git a/examples/douyin/ttwid.py b/examples/douyin/ttwid.py deleted file mode 100644 index 996118f..0000000 --- a/examples/douyin/ttwid.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.douyin.utils import TokenManager - -if __name__ == '__main__': - print('douyin ttwid:', TokenManager.gen_ttwid()) \ No newline at end of file diff --git a/examples/f2_progress.py b/examples/f2_progress.py deleted file mode 100644 index 7ecb8eb..0000000 --- a/examples/f2_progress.py +++ /dev/null @@ -1,77 +0,0 @@ -import time -from f2.cli.cli_console import RichConsoleManager, CustomSpinnerColumn, ProgressManager -from f2.cli.cli_console import TextColumn, BarColumn, TimeElapsedColumn - - -if __name__ == "__main__": - - def simulate_progress(progress_manager): - # 启动进度条 - progress_manager.start() - - # 添加一个任务 - task_id = progress_manager.add_task( - "Demo Task: waiting", total=200, state="waiting" - ) - for _ in range(20): - time.sleep(0.1) - if _ == 4: # 模拟开始下载 - progress_manager.update( - task_id, description="Demo Task: starting", state="starting" - ) - elif _ == 8: # 模拟下载中 - progress_manager.update( - task_id, description="Demo Task: downloading", state="downloading" - ) - elif _ == 12: # 模拟暂停 - progress_manager.update( - task_id, description="Demo Task: paused", state="paused" - ) - time.sleep(1) # 暂停一会儿 - progress_manager.update( - task_id, description="Demo Task: downloading", state="downloading" - ) - elif _ == 16: # 模拟出错 - progress_manager.update( - task_id, description="Demo Task: error", state="error" - ) - time.sleep(0.5) - progress_manager.update( - task_id, description="Demo Task: completed", state="completed" - ) - - progress_manager.update(task_id, advance=10) - - # 停止进度条 - progress_manager.stop() - - print("Showing default progress:") - progress_manager_default = ProgressManager() - simulate_progress(progress_manager_default) - - print("\nShowing custom progress:") - my_spinners = { - "waiting": "dots12", - "downloading": "earth", - } - custom_spinner_column = CustomSpinnerColumn(spinner_styles=my_spinners, speed=0.5) - progress_manager_custom = ProgressManager(spinner_column=custom_spinner_column) - simulate_progress(progress_manager_custom) - - print("\nShowing custom 2 progress:") - custom_columns = { - "description": TextColumn("{task.description}"), - "bar": BarColumn( - complete_style="bright_magenta black", finished_style="bright_white green" - ), - "custom_percentage": TextColumn( - "[progress.custom_percentage]{task.percentage:>2.0f}%", style="bright_cyan" - ), - "elapsed": TimeElapsedColumn(), - } - progress_manager_custom2 = ProgressManager( - spinner_column=custom_spinner_column, - custom_columns=custom_columns, - expand=False, - ) - simulate_progress(progress_manager_custom2) diff --git a/examples/tiktok/msToken.py b/examples/tiktok/msToken.py deleted file mode 100644 index 0d54d50..0000000 --- a/examples/tiktok/msToken.py +++ /dev/null @@ -1,5 +0,0 @@ -from f2.apps.tiktok.utils import TokenManager - -if __name__ == "__main__": - print("tiktok real msToken:", TokenManager.gen_real_msToken()) - print("tiktok fake msToken:", TokenManager.gen_false_msToken()) diff --git a/examples/tiktok/odin_tt.py b/examples/tiktok/odin_tt.py deleted file mode 100644 index 0aa87ea..0000000 --- a/examples/tiktok/odin_tt.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.tiktok.utils import TokenManager - -if __name__ == "__main__": - print("tiktok odin_tt:", TokenManager.gen_odin_tt()) diff --git a/examples/tiktok/postDetail.py b/examples/tiktok/postDetail.py deleted file mode 100644 index 945306c..0000000 --- a/examples/tiktok/postDetail.py +++ /dev/null @@ -1,23 +0,0 @@ -import asyncio -from f2.apps.tiktok.handler import TiktokHandler - -kwargs = { - "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", - "Referer": "https://www.tiktok.com/", - }, - "proxies": {"http": None, "https": None}, - "cookie": "YOUR_COOKIE_HERE", -} - - -async def main(): - post = await TiktokHandler(kwargs).fetch_one_video(itemId="7095819783324601605") - print(post) - print("-------------------") - post = await TiktokHandler(kwargs).fetch_one_video(itemId="7305827432509082913") - print(post) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tiktok/secUid.py b/examples/tiktok/secUid.py deleted file mode 100644 index a42f531..0000000 --- a/examples/tiktok/secUid.py +++ /dev/null @@ -1,11 +0,0 @@ -import asyncio -from f2.apps.tiktok.utils import SecUserIdFetcher - - -async def main(): - secUid = await SecUserIdFetcher.get_secuid("https://www.tiktok.com/@vantoan___") - print(secUid) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tiktok/ttwid.py b/examples/tiktok/ttwid.py deleted file mode 100644 index c2bd857..0000000 --- a/examples/tiktok/ttwid.py +++ /dev/null @@ -1,4 +0,0 @@ -from f2.apps.tiktok.utils import TokenManager - -if __name__ == "__main__": - print("tiktok ttwid:", TokenManager.gen_ttwid()) diff --git a/examples/tiktok/userProfile.py b/examples/tiktok/userProfile.py deleted file mode 100644 index f9d5ed4..0000000 --- a/examples/tiktok/userProfile.py +++ /dev/null @@ -1,25 +0,0 @@ -import asyncio -from f2.apps.tiktok.handler import TiktokHandler - -kwargs = { - "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36", - "Referer": "https://www.tiktok.com/", - }, - "proxies": {"http": None, "https": None}, - "cookie": "YOUR_COOKIE_HERE", -} - - -async def main(): - user = await TiktokHandler(kwargs).handler_user_profile( - secUid="MS4wLjABAAAAQhcYf_TjRKUku-aF8oqngAfzrYksgGLRz8CKMciBFdfR54HQu3qGs-WoJ-KO7hO8" - ) - print(user) - print("-------------------") - user = await TiktokHandler(kwargs).handler_user_profile(uniqueId="sophia.ilysm") - print(user) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/f2/__init__.py b/f2/__init__.py index 7f8ba0b..609517b 100644 --- a/f2/__init__.py +++ b/f2/__init__.py @@ -1,5 +1,5 @@ __author__ = "JohnserfSeed " -__version__ = "0.0.1.4" +__version__ = "0.0.1.5" __description_cn__ = "基于[red]异步[/red]的[green]全平台下载工具." __description_en__ = "[yellow]Asynchronous based [/yellow]full-platform download tool." __reponame__ = "f2" @@ -8,3 +8,29 @@ APP_CONFIG_FILE_PATH = "conf/app.yaml" F2_CONFIG_FILE_PATH = "conf/conf.yaml" F2_DEFAULTS_FILE_PATH = "conf/defaults.yaml" + +BROWSER_LIST = [ + "chrome", + "firefox", + "edge", + "opera", + "opera_gx", + "safari", + "chromium", + "brave", + "vivaldi", + "librewolf", +] + +DOUYIN_MODE_LIST = [ + "one", + "post", + "like", + "collection", + "collects", + "music", + "mix", + "live", +] + +TIKTOK_MODE_LIST = ["one", "post", "like", "collect", "mix"] diff --git a/f2/apps/douyin/__init__.py b/f2/apps/douyin/__init__.py deleted file mode 100644 index 11af569..0000000 --- a/f2/apps/douyin/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# path: f2/apps/douyin/__init__.py - -from f2.apps.douyin.help import help diff --git a/f2/apps/douyin/api.py b/f2/apps/douyin/api.py index b6eea0e..b8a30f1 100644 --- a/f2/apps/douyin/api.py +++ b/f2/apps/douyin/api.py @@ -54,6 +54,12 @@ class DouyinAPIEndpoints: # 用户喜欢B (User Like B) USER_FAVORITE_B = f"{IESDOUYIN_DOMAIN}/web/api/v2/aweme/like/" + # 关注用户(User Following) + USER_FOLLOWING = f"{DOUYIN_DOMAIN}/aweme/v1/web/user/following/list/" + + # 粉丝用户 (User Follower) + USER_FOLLOWER = f"{DOUYIN_DOMAIN}/aweme/v1/web/user/follower/list/" + # 合集作品 MIX_AWEME = f"{DOUYIN_DOMAIN}/aweme/v1/web/mix/aweme/" @@ -63,6 +69,15 @@ class DouyinAPIEndpoints: # 用户收藏 (User Collection) USER_COLLECTION = f"{DOUYIN_DOMAIN}/aweme/v1/web/aweme/listcollection/" + # 用户收藏夹 (User Collects) + USER_COLLECTS = f"{DOUYIN_DOMAIN}/aweme/v1/web/collects/list/" + + # 用户收藏夹作品 (User Collects Posts) + USER_COLLECTS_VIDEO = f"{DOUYIN_DOMAIN}/aweme/v1/web/collects/video/list/" + + # 用户音乐收藏 (User Music Collection) + USER_MUSIC_COLLECTION = f"{DOUYIN_DOMAIN}/aweme/v1/web/music/listcollection/" + # 首页朋友作品 (Friend Feed) FRIEND_FEED = f"{DOUYIN_DOMAIN}/aweme/v1/web/familiar/feed/" diff --git a/f2/apps/douyin/cli.py b/f2/apps/douyin/cli.py index 97aeb06..688b71a 100644 --- a/f2/apps/douyin/cli.py +++ b/f2/apps/douyin/cli.py @@ -4,20 +4,25 @@ import click import typing import asyncio -import browser_cookie3 from pathlib import Path from f2 import helps from f2.cli.cli_commands import set_cli_config from f2.log.logger import logger -from f2.utils.utils import split_dict_cookie, get_resource_path +from f2.utils.utils import ( + split_dict_cookie, + get_resource_path, + get_cookie_from_browser, + check_invalid_naming, + merge_config, +) from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ from f2.apps.douyin.handler import handle_sso_login -def handle_help( +def handler_help( ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, @@ -53,61 +58,28 @@ def handler_auto_cookie( param: 提供的参数或选项 (The provided parameter or option) value: 参数或选项的值 (The value of the parameter or option) """ - if not value or ctx.resilient_parsing: - return - - # 如果用户明确设置了 --cookie,那么跳过自动获取过程 - if ctx.params.get("cookie"): + # 如果没有提供值或者用户已经设置了 resilient_parsing 或者提供了 --cookie 参数则跳过 + if not value or ctx.resilient_parsing or ctx.params.get("cookie"): return # 根据浏览器选择获取cookie - if value in ["chrome", "firefox", "edge", "opera"]: - try: - cookie_value = split_dict_cookie(get_cookie_from_browser(value)) - manager = ConfigManager(ctx.params.get("config", "conf/app.yaml")) - manager.update_config_with_args("douyin", cookie=cookie_value) - except PermissionError: - message = _("请关闭所有已打开的浏览器重试, 并且你有适当的权限访问浏览器 !") - logger.error(message) - click.echo(message) - ctx.abort() - except Exception as e: - message = _("自动获取Cookie失败: {0}".format(str(e))) - logger.error(message) - click.echo(message) - ctx.abort() - - -def get_cookie_from_browser(browser_choice: str): - """ - 根据用户选择的浏览器获取douyin.com的cookie。 - - Args: - browser_choice (str): 用户选择的浏览器名称 - - Returns: - str: *.douyin.com的cookie值 - """ - - BROWSER_FUNCTIONS = { - "chrome": browser_cookie3.chrome, - "firefox": browser_cookie3.firefox, - "edge": browser_cookie3.edge, - "opera": browser_cookie3.opera, - } - cj_function = BROWSER_FUNCTIONS.get(browser_choice) - if not cj_function: - raise ValueError(_("不支持的浏览器选项, 输入f2 dy --help查看更多帮助!")) - - cj = cj_function(domain_name="douyin.com") - - # cookie_value = next((c.value for c in cj if c.name == 'ttwid'), None) - cookie_value = {c.name: c.value for c in cj if c.domain.endswith("douyin.com")} + try: + cookie_value = split_dict_cookie(get_cookie_from_browser(value, "douyin.com")) - if not cookie_value: - raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(browser_choice)) + if not cookie_value: + raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(value)) - return cookie_value + # 如果没有提供配置文件,那么使用高频配置文件 + manager = ConfigManager( + ctx.params.get("config", get_resource_path(f2.APP_CONFIG_FILE_PATH)) + ) + manager.update_config_with_args("douyin", cookie=cookie_value) + except PermissionError: + logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) + ctx.abort() + except Exception as e: + logger.error(_("自动获取Cookie失败:{0}").format(str(e))) + ctx.abort() def handler_language( @@ -115,8 +87,16 @@ def handler_language( param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> typing.Any: - """用于设置语言 (For setting the language)""" + """用于设置语言 (For setting the language) + Args: + ctx: click的上下文对象 (Click's context object) + param: 提供的参数或选项 (The provided parameter or option) + value: 参数或选项的值 (The value of the parameter or option) + """ + + if not value or ctx.resilient_parsing: + return TranslationManager.get_instance().set_language(value) global _ _ = TranslationManager.get_instance().gettext @@ -142,42 +122,20 @@ def handler_naming( value: 命名模式模板 (Naming pattern template) """ # 避免和配置文件参数冲突 - if value is None: + if not value or ctx.resilient_parsing: return # 允许的模式和分隔符 ALLOWED_PATTERNS = ["{nickname}", "{create}", "{aweme_id}", "{desc}", "{uid}"] ALLOWED_SEPARATORS = ["-", "_"] - temp_naming = value - invalid_patterns = [] - - # 检查提供的模式是否有效 - for pattern in ALLOWED_PATTERNS: - if pattern in temp_naming: - temp_naming = temp_naming.replace(pattern, "") - - # 此时,temp_naming应只包含分隔符 - for char in temp_naming: - if char not in ALLOWED_SEPARATORS: - invalid_patterns.append(char) - - # 检查连续的无效模式或分隔符 - for pattern in ALLOWED_PATTERNS: - # 检查像"{aweme_id}{aweme_id}"这样的模式 - if pattern + pattern in value: - invalid_patterns.append(pattern + pattern) - for sep in ALLOWED_SEPARATORS: - # 检查像"{aweme_id}-{aweme_id}"这样的模式 - if pattern + sep + pattern in value: - invalid_patterns.append(pattern + sep + pattern) + # 检查命名是否符合命名规范 + invalid_patterns = check_invalid_naming(value, ALLOWED_PATTERNS, ALLOWED_SEPARATORS) if invalid_patterns: raise click.BadParameter( - _( - "`{0}` 中的 `{1}` 不符合命名模式".format( - value, "".join(invalid_patterns) - ) + _("`{0}` 中的 `{1}` 不符合命名模式").format( + value, "".join(invalid_patterns) ) ) @@ -219,35 +177,6 @@ def handler_sso_login( raise click.UsageError(_("SSO登录失败,请重试!")) -def merge_config(main_conf, custom_conf, **kwargs): - """ - 合并配置参数,使 CLI 参数优先级高于自定义配置,自定义配置优先级高于主配置,最终生成完整配置参数字典。 - Args: - main_conf (dict): 主配置参数字典 - custom_conf (dict): 自定义配置参数字典 - **kwargs: CLI 参数和其他额外的配置参数 - - Returns: - dict: 合并后的配置参数字典 - """ - # 合并主配置和自定义配置 - merged_conf = {} - for key, value in main_conf.items(): - merged_conf[key] = value # 将主配置复制到合并后的配置中 - for key, value in custom_conf.items(): - if value is not None and value != "": # 只有值不为 None 和 空值,才进行合并 - merged_conf[key] = value # 自定义配置参数会覆盖主配置中的同名参数 - - # 合并 CLI 参数与合并后的配置,确保 CLI 参数的优先级最高 - for key, value in kwargs.items(): - if key not in merged_conf: # 如果合并后的配置中没有这个键,则直接添加 - merged_conf[key] = value - elif value is not None and value != "": # 如果值不为 None 和 空值,则进行合并 - merged_conf[key] = value # CLI 参数会覆盖自定义配置和主配置中的同名参数 - - return merged_conf - - @click.command(name="douyin", help=_("抖音无水印解析")) @click.option( "--config", @@ -269,44 +198,44 @@ def merge_config(main_conf, custom_conf, **kwargs): "-m", type=bool, # default="yes", - help=_("是否保存视频原声。可选:'yes'、'no'"), + help=_("是否保存视频原声"), ) @click.option( "--cover", "-v", type=bool, # default="yes", - help=_("是否保存视频封面。可选:'yes'、'no'"), + help=_("是否保存视频封面"), ) @click.option( "--desc", "-d", type=bool, # default="yes", - help=_("是否保存视频文案。可选:'yes'、'no'"), + help=_("是否保存视频文案"), ) @click.option( "--path", "-p", type=str, # default="Download", - help=_("作品保存位置,支持绝对与相对路径。"), + help=_("作品保存位置,支持绝对与相对路径"), ) @click.option( "--folderize", "-f", type=bool, # default="yes", - help=_("是否将作品保存到单独的文件夹。可选:'yes'、'no'"), + help=_("是否将作品保存到单独的文件夹"), ) @click.option( "--mode", "-M", - type=click.Choice(["one", "post", "like", "collect", "mix", "live"]), + type=click.Choice(f2.DOUYIN_MODE_LIST), # default="post", # required=True, help=_( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collect),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),收藏夹作品(collects),收藏音乐(music),合辑(mix),直播(live)" ), ) @click.option( @@ -336,28 +265,28 @@ def merge_config(main_conf, custom_conf, **kwargs): "-e", type=int, # default=10, - help=_("网络请求超时时间。"), + help=_("网络请求超时时间"), ) @click.option( "--max_retries", "-r", type=int, # default=5, - help=_("网络请求超时重试数。"), + help=_("网络请求超时重试数"), ) @click.option( "--max-connections", "-x", type=int, # default=5, - help=_("网络请求并发连接数。"), + help=_("网络请求并发连接数"), ) @click.option( "--max-tasks", "-t", type=int, # default=10, - help=_("异步的任务数。"), + help=_("异步的任务数"), ) @click.option( "--max-counts", @@ -371,14 +300,14 @@ def merge_config(main_conf, custom_conf, **kwargs): "-s", type=int, # default=20, - help=_("从接口每页可获取作品数,不建议超过20。"), + help=_("从接口每页可获取作品数,不建议超过20"), ) @click.option( "--languages", "-l", type=click.Choice(["zh_CN", "en_US"]), default="zh_CN", - help=_("显示语言。默认为 'zh_CN'。可选:'zh_CN'、'en_US'。不支持配置文件修改。"), + help=_("显示语言。默认为 'zh_CN',可选:'zh_CN'、'en_US',不支持配置文件修改"), callback=handler_language, ) @click.option( @@ -390,6 +319,7 @@ def merge_config(main_conf, custom_conf, **kwargs): "代理服务器,最多 2 个参数,http与https。空格区分 2 个参数 http://x.x.x.x https://x.x.x.x" ), ) +@click.option("--lyric", "-L", type=bool, help=_("是否保存原声歌词")) @click.option( "--update-config", type=bool, @@ -401,17 +331,15 @@ def merge_config(main_conf, custom_conf, **kwargs): ) @click.option( "--auto-cookie", - type=click.Choice(["none", "chrome", "firefox", "edge", "opera"]), + type=click.Choice(f2.BROWSER_LIST), # default="none", - help=_( - "自动从浏览器获取[yellow]cookie[/yellow]。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" - ), + help=_("自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器"), callback=handler_auto_cookie, ) @click.option( "--sso-login", is_flag=True, - help=_("使用SSO扫码登录获取[yellow]cookie[/yellow],保存低频主配置文件"), + help=_("使用SSO扫码登录获取cookie,保存低频主配置文件"), callback=handler_sso_login, ) @click.option( @@ -420,10 +348,16 @@ def merge_config(main_conf, custom_conf, **kwargs): is_eager=True, expose_value=False, help=_("显示富文本帮助"), - callback=handle_help, + callback=handler_help, ) @click.pass_context -def douyin(ctx, config, init_config, update_config, **kwargs): +def douyin( + ctx: click.Context, + config: str, + init_config: str, + update_config: bool, + **kwargs, +): ################## # f2 存在2个主配置文件,分别是app低频配置(app.yaml)和f2低频配置(conf.yaml) # app低频配置存放app相关的参数 @@ -467,7 +401,7 @@ def douyin(ctx, config, init_config, update_config, **kwargs): # 如果没有初始化配置文件,但是更新配置文件,则需要提供配置文件路径 elif update_config and not config: raise click.UsageError( - _("要更新配置, 首先需要使用'-c'选项提供一个自定义配置文件路径") + _("要更新配置,首先需要使用'-c'选项提供一个自定义配置文件路径") ) # 读取自定义配置文件 @@ -493,16 +427,16 @@ def douyin(ctx, config, init_config, update_config, **kwargs): # 从低频配置开始到高频配置再到cli参数,逐级覆盖,如果键值不存在使用父级的键值 kwargs = merge_config(main_conf, custom_conf, **kwargs) - logger.info(_("主配置路径: {0}".format(main_conf_path))) - logger.info(_("自定义配置路径: {0}".format(Path.cwd() / config))) - logger.debug(_("主配置参数:{0}".format(main_conf))) - logger.debug(_("自定义配置参数:{0}".format(custom_conf))) - logger.debug(_("CLI参数:{0}".format(kwargs))) + logger.info(_("主配置路径:{0}").format(main_conf_path)) + logger.info(_("自定义配置路径:{0}").format(Path.cwd() / config)) + logger.debug(_("主配置参数:{0}").format(main_conf)) + logger.debug(_("自定义配置参数:{0}").format(custom_conf)) + logger.debug(_("CLI参数:{0}").format(kwargs)) # 尝试从命令行参数或kwargs中获取URL if not kwargs.get("url"): logger.error("缺乏URL参数,详情看命令帮助") - handle_help(ctx, None, True) + handler_help(ctx, None, True) # 添加app_name到kwargs kwargs["app_name"] = "douyin" diff --git a/f2/apps/douyin/crawler.py b/f2/apps/douyin/crawler.py index 565b888..da37917 100644 --- a/f2/apps/douyin/crawler.py +++ b/f2/apps/douyin/crawler.py @@ -11,7 +11,10 @@ UserProfile, UserPost, UserLike, - UserCollect, + UserCollection, + UserCollects, + UserCollectsVideo, + UserMusicCollection, PostDetail, UserMix, UserLive, @@ -19,12 +22,17 @@ FollowUserLive, LoginGetQr, LoginCheckQr, + UserFollowing, + UserFollower, ) from f2.apps.douyin.utils import XBogusManager class DouyinCrawler(BaseCrawler): - def __init__(self, kwargs: dict = {}): + def __init__( + self, + kwargs: dict = ..., + ): f2_manager = ConfigManager(f2.F2_CONFIG_FILE_PATH) f2_conf = f2_manager.get_config("f2").get("douyin") proxies_conf = kwargs.get("proxies", {"http": None, "https": None}) @@ -43,86 +51,137 @@ def __init__(self, kwargs: dict = {}): async def fetch_user_profile(self, params: UserProfile): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_DETAIL, params.dict() - ) # fmt: off - logger.debug(_("用户信息接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.USER_DETAIL, + params.dict(), + ) + logger.debug(_("用户信息接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_POST, params.dict() - ) # fmt: off - logger.debug(_("主页作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.USER_POST, + params.dict(), + ) + logger.debug(_("主页作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_FAVORITE_A, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_FAVORITE_A, + params.dict(), ) - logger.debug(_("喜欢作品接口地址:" + endpoint)) + logger.debug(_("主页喜欢作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) - async def fetch_user_collect(self, params: UserCollect): + async def fetch_user_collection(self, params: UserCollection): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.USER_COLLECTION, params.dict() + self.headers.get("User-Agent"), + dyendpoint.USER_COLLECTION, + params.dict(), ) - logger.debug(_("收藏作品接口地址:" + endpoint)) + logger.debug(_("主页收藏作品接口地址:{0}").format(endpoint)) return await self._fetch_post_json(endpoint, params.dict()) + async def fetch_user_collects(self, params: UserCollects): + endpoint = XBogusManager.model_2_endpoint( + self.headers.get("User-Agent"), + dyendpoint.USER_COLLECTS, + params.dict(), + ) + logger.debug(_("收藏夹接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + + async def fetch_user_collects_video(self, params: UserCollectsVideo): + endpoint = XBogusManager.model_2_endpoint( + self.headers.get("User-Agent"), + dyendpoint.USER_COLLECTS_VIDEO, + params.dict(), + ) + logger.debug(_("收藏夹作品接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + + async def fetch_user_music_collection(self, params: UserMusicCollection): + endpoint = XBogusManager.model_2_endpoint( + self.headers.get("User-Agent"), + dyendpoint.USER_MUSIC_COLLECTION, + params.dict(), + ) + logger.debug(_("音乐收藏接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.MIX_AWEME, params.dict() - ) # fmt: off - logger.debug(_("合集作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.MIX_AWEME, + params.dict(), + ) + logger.debug(_("合集作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_detail(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.POST_DETAIL, params.dict() - ) # fmt: off - logger.debug(_("作品详情接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.POST_DETAIL, + params.dict(), + ) + logger.debug(_("作品详情接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_comment(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.POST_COMMENT, params.dict() + self.headers.get("User-Agent"), + dyendpoint.POST_COMMENT, + params.dict(), ) - logger.debug(_("作品评论接口地址:" + endpoint)) + logger.debug(_("作品评论接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.TAB_FEED, params.dict() - ) # fmt: off - logger.debug(_("首页推荐作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.TAB_FEED, + params.dict(), + ) + logger.debug(_("首页推荐作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_follow_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.FOLLOW_FEED, params.dict() - ) # fmt: off - logger.debug(_("关注作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.FOLLOW_FEED, + params.dict(), + ) + logger.debug(_("关注作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_friend_feed(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.FRIEND_FEED, params.dict() - ) # fmt: off - logger.debug(_("朋友作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.FRIEND_FEED, + params.dict(), + ) + logger.debug(_("朋友作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_related(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.POST_RELATED, params.dict() + self.headers.get("User-Agent"), + dyendpoint.POST_RELATED, + params.dict(), ) - logger.debug(_("相关推荐作品接口地址:" + endpoint)) + logger.debug(_("相关推荐作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_live(self, params: UserLive): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.LIVE_INFO, params.dict() - ) # fmt: off - logger.debug(_("直播接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.LIVE_INFO, + params.dict(), + ) + logger.debug(_("直播接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_live_room_id(self, params: UserLive2): @@ -131,46 +190,76 @@ async def fetch_live_room_id(self, params: UserLive2): # 避免invalid session self.aclient.headers.update({"Cookie": ""}) endpoint = XBogusManager.model_2_endpoint( - dyendpoint.LIVE_INFO_ROOM_ID, params.dict() + self.headers.get("User-Agent"), + dyendpoint.LIVE_INFO_ROOM_ID, + params.dict(), ) - logger.debug(_("直播接口地址(room_id):" + endpoint)) + logger.debug(_("直播接口地址(room_id):{0}").format(endpoint)) return await self._fetch_get_json(endpoint) finally: self.aclient.headers = original_headers async def fetch_follow_live(self, params: FollowUserLive): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.FOLLOW_USER_LIVE, params.dict() + self.headers.get("User-Agent"), + dyendpoint.FOLLOW_USER_LIVE, + params.dict(), ) - logger.debug(_("关注用户直播接口地址:" + endpoint)) + logger.debug(_("关注用户直播接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_locate_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.LOCATE_POST, params.dict() - ) # fmt: off - logger.debug(_("定位上一次作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + dyendpoint.LOCATE_POST, + params.dict(), + ) + logger.debug(_("定位上一次作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_login_qrcode(self, parms: LoginGetQr): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.SSO_LOGIN_GET_QR, parms.dict() + self.headers.get("User-Agent"), + dyendpoint.SSO_LOGIN_GET_QR, + parms.dict(), ) - logger.debug(_("SSO获取二维码接口地址:" + endpoint)) + logger.debug(_("SSO获取二维码接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_check_qrcode(self, parms: LoginCheckQr): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.SSO_LOGIN_CHECK_QR, parms.dict() + self.headers.get("User-Agent"), + dyendpoint.SSO_LOGIN_CHECK_QR, + parms.dict(), ) - logger.debug(_("SSO检查扫码状态接口地址:" + endpoint)) + logger.debug(_("SSO检查扫码状态接口地址:{0}").format(endpoint)) return await self._fetch_response(endpoint) async def fetch_check_login(self, parms: LoginCheckQr): endpoint = XBogusManager.model_2_endpoint( - dyendpoint.SSO_LOGIN_CHECK_LOGIN, parms.dict() + self.headers.get("User-Agent"), + dyendpoint.SSO_LOGIN_CHECK_LOGIN, + parms.dict(), + ) + logger.debug(_("SSO检查登录状态接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + + async def fetch_user_following(self, params: UserFollowing): + endpoint = XBogusManager.model_2_endpoint( + self.headers.get("User-Agent"), + dyendpoint.USER_FOLLOWING, + params.dict(), + ) + logger.debug(_("用户关注列表接口地址:{0}").format(endpoint)) + return await self._fetch_get_json(endpoint) + + async def fetch_user_follower(self, params: UserFollower): + endpoint = XBogusManager.model_2_endpoint( + self.headers.get("User-Agent"), + dyendpoint.USER_FOLLOWER, + params.dict(), ) - logger.debug(_("SSO检查登录状态接口地址:" + endpoint)) + logger.debug(_("用户粉丝列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def __aenter__(self): diff --git a/f2/apps/douyin/db.py b/f2/apps/douyin/db.py index 43da126..b42c70c 100644 --- a/f2/apps/douyin/db.py +++ b/f2/apps/douyin/db.py @@ -1,6 +1,5 @@ # path: f2/apps/douyin/db.py -import aiosqlite from f2.db.base_db import BaseDB @@ -32,10 +31,12 @@ async def _create_table(self) -> None: "mix_count INTEGER", "mplatform_followers_count INTEGER", "nickname TEXT", + "nickname_raw TEXT", "room_id TEXT", "school_name TEXT", "short_id TEXT", "signature TEXT", + "signature_raw TEXT", "total_favorited INTEGER", "uid TEXT", "unique_id TEXT", @@ -77,7 +78,7 @@ async def add_user_info(self, ignore_fields=None, **kwargs) -> None: # VALUES (?, {placeholders})', (kwargs.get('sec_user_id'), *values)) await self.commit() - async def update_user_info(self, sec_user_id, **kwargs) -> None: + async def update_user_info(self, sec_user_id: str, **kwargs) -> None: """ 更新用户信息 @@ -147,6 +148,7 @@ async def _create_table(self) -> None: "aweme_id TEXT PRIMARY KEY", "aweme_type TEXT", "nickname TEXT", + "nickname_raw TEXT", "sec_user_id TEXT", "short_id TEXT", "uid TEXT", @@ -158,6 +160,7 @@ async def _create_table(self) -> None: "comment_gid TEXT", "create_time TEXT", "desc TEXT", + "desc_raw TEXT", "duration TEXT", "is_ads TEXT", "is_story TEXT", @@ -174,6 +177,7 @@ async def _create_table(self) -> None: "is_long_video TEXT", "media_type TEXT", "mix_desc TEXT", + "mix_desc_raw TEXT", "mix_create_time TEXT", "mix_id TEXT", "mix_name TEXT", @@ -186,17 +190,22 @@ async def _create_table(self) -> None: "is_original_sound TEXT", "is_pgc TEXT", "music_author TEXT", + "music_author_raw TEXT", "music_author_deleted TEXT", "music_duration TEXT", "music_id TEXT", "music_mid TEXT", "pgc_author TEXT", + "pgc_author_raw TEXT", "pgc_author_title TEXT", + "pgc_author_title_raw TEXT", "pgc_music_type TEXT", "music_status TEXT", "music_owner_handle TEXT", + "music_owner_handle_raw TEXT", "music_owner_id TEXT", "music_owner_nickname TEXT", + "music_owner_nickname_raw TEXT", "music_play_url TEXT", "position TEXT", "region TEXT", @@ -255,29 +264,26 @@ async def batch_insert_videos( video_data_list (list): 视频信息列表 ignore_fields (list): 要忽略的字段列表,例如 ["field1", "field2"] """ - try: - # 如果 ignore_fields 未提供或者为 None,将其设置为空列表 - ignore_fields = ignore_fields or [] + # 如果 ignore_fields 未提供或者为 None,将其设置为空列表 + ignore_fields = ignore_fields or [] - # 删除要忽略的字段 - for field in ignore_fields: - for video_data in video_data_list: - if field in video_data: - del video_data[field] + # 删除要忽略的字段 + for field in ignore_fields: + for video_data in video_data_list: + if field in video_data: + del video_data[field] - keys = ", ".join(video_data_list[0].keys()) - placeholders = ", ".join(["?" for _ in range(len(video_data_list[0]))]) + keys = ", ".join(video_data_list[0].keys()) + placeholders = ", ".join(["?" for _ in range(len(video_data_list[0]))]) - # 构建插入数据的元组列表 - values = [tuple(video_data.values()) for video_data in video_data_list] + # 构建插入数据的元组列表 + values = [tuple(video_data.values()) for video_data in video_data_list] - await self.execute( - f"INSERT OR REPLACE INTO {self.TABLE_NAME} ({keys}) VALUES ({placeholders})", - values, - ) - await self.commit() - except aiosqlite.Error as e: - print(f"Error batch inserting videos: {e}") + await self.execute( + f"INSERT OR REPLACE INTO {self.TABLE_NAME} ({keys}) VALUES ({placeholders})", + values, + ) + await self.commit() async def get_video_info(self, aweme_id: str) -> dict: """ diff --git a/f2/apps/douyin/dl.py b/f2/apps/douyin/dl.py index 1eb6c00..e2224dd 100644 --- a/f2/apps/douyin/dl.py +++ b/f2/apps/douyin/dl.py @@ -9,7 +9,7 @@ from f2.dl.base_downloader import BaseDownloader from f2.utils.utils import get_timestamp, timestamp_2_str from f2.apps.douyin.db import AsyncUserDB -from f2.apps.douyin.utils import format_file_name +from f2.apps.douyin.utils import format_file_name, json_2_lrc class DouyinDownloader(BaseDownloader): @@ -17,8 +17,7 @@ def __init__(self, kwargs: dict = {}): if kwargs["cookie"] is None: raise ValueError( _( - "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 f2 -d dy --help,如扫码登录请保留双引号cookie: " - ",再使用--sso-login命令。" + "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取。如 `--auto-cookie edge`" ) ) @@ -175,11 +174,11 @@ async def handler_download( logger.debug(f"========{aweme_id}========") logger.debug(aweme_data_dict) - logger.debug("================") + logger.debug("===================================") # 检查作品是否被屏蔽 if aweme_prohibited: - logger.warning(_("{0} 该作品已被屏蔽,无法下载").format(aweme_id)) + logger.warning(_("该 {0} 作品已被屏蔽,无法下载").format(aweme_id)) return # 检查作品是否可见 @@ -245,7 +244,7 @@ async def handler_download( ) + "_video" ) - + # video_play_addr 现在为一个list,第一个链接下载失败,则下载第二个链接 video_url = aweme_data_dict.get("video_play_addr") if video_url != None: await self.initiate_download( @@ -271,6 +270,79 @@ async def handler_download( # 保存最后一个aweme_id await self.save_last_aweme_id(sec_user_id, aweme_id) + async def create_music_download_tasks( + self, kwargs: dict, music_datas: Union[list, dict], user_path: Any + ) -> None: + """ + 创建音乐下载任务 + + Args: + kwargs (dict): 命令行参数 + music_datas (list, dict): 音乐数据列表或字典 + user_path (Any): 用户目录路径 + """ + + if ( + not kwargs + or not music_datas + or not isinstance(music_datas, (list, dict)) + or not user_path + ): + return + + if isinstance(music_datas, dict): + await self.handler_music_download(kwargs, music_datas, user_path) + else: + for music_data in music_datas: + await self.handler_music_download(kwargs, music_data, user_path) + + # 执行下载任务 + await self.execute_tasks() + + async def handler_music_download( + self, kwargs: dict, music_data_dict: dict, user_path: Any + ) -> None: + """ + 处理音乐下载任务 + + Args: + kwargs (dict): 命令行参数 + music_data_dict (dict): 音乐数据字典 + user_path (Any): 用户目录路径 + """ + + # 构建文件夹路径 + base_path = ( + user_path / music_data_dict.get("title") + if kwargs.get("folderize") + else user_path + ) + music_name = music_data_dict.get("title") + "_music" + music_url = music_data_dict.get("play_url") + lyric_name = music_data_dict.get("title") + "_lyric" + lyric_url = music_data_dict.get("lyric_url") + + if music_url != None: + await self.initiate_download( + _("音乐"), music_url, base_path, music_name, ".mp3" + ) + + if kwargs.get("lyric"): + if lyric_url is None: + return + + # 下载str格式的json歌词文件 + lyric = await self.get_fetch_data(lyric_url) + + # 如果json歌词文件下载成功,则读取并处理成lrc格式 + if lyric.status_code != 200: + return + + lrc_content = json_2_lrc(lyric.json()) + await self.initiate_static_download( + _("歌词"), lrc_content, base_path, lyric_name, ".lrc" + ) + async def create_stream_tasks( self, kwargs: dict, webcast_datas: Union[list, dict], user_path: Any ) -> None: diff --git a/f2/apps/douyin/filter.py b/f2/apps/douyin/filter.py index 312685f..1465ee3 100644 --- a/f2/apps/douyin/filter.py +++ b/f2/apps/douyin/filter.py @@ -1,3 +1,5 @@ +# path: f2/apps/douyin/filter.py + from f2.utils.json_filter import JSONModel from f2.utils.utils import _get_first_item_from_list, timestamp_2_str, replaceT @@ -73,6 +75,10 @@ def mplatform_followers_count(self): def nickname(self): return replaceT(self._get_attr_value("$.user.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.user.nickname") + @property def room_id(self): return self._get_attr_value("$.user.room_id") @@ -82,32 +88,747 @@ def school_name(self): return self._get_attr_value("$.user.school_name") @property - def sec_user_id(self): - return self._get_attr_value("$.user.sec_uid") + def sec_user_id(self): + return self._get_attr_value("$.user.sec_uid") + + @property + def short_id(self): + return self._get_attr_value("$.user.short_id") + + @property + def signature(self): + return replaceT(self._get_attr_value("$.user.signature")) + + @property + def signature_raw(self): + return self._get_attr_value("$.user.signature") + + @property + def total_favorited(self): + return self._get_attr_value("$.user.total_favorited") + + @property + def uid(self): + return self._get_attr_value("$.user.uid") + + @property + def unique_id(self): + return self._get_attr_value("$.user.unique_id") + + @property + def user_age(self): + return self._get_attr_value("$.user.user_age") + + def _to_raw(self) -> dict: + return self._data + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + +class UserPostFilter(JSONModel): + @property + def has_aweme(self) -> bool: + return bool( + self._get_attr_value("$.aweme_list") + ) # 如果aweme_list是空的或None,此属性返回False + + @property + def locate_item_cursor(self): + return self._get_attr_value("$.locate_item_cursor") # 定位作品用 + + @property + def aweme_id(self): + ids = self._get_list_attr_value("$.aweme_list[*].aweme_id") + return ids if isinstance(ids, list) else [ids] + + @property + def aweme_type(self): + return self._get_list_attr_value("$.aweme_list[*].aweme_type") + + @property + def create_time(self): + create_times = self._get_list_attr_value("$.aweme_list[*].create_time") + return ( + [timestamp_2_str(str(ct)) for ct in create_times] + if isinstance(create_times, list) + else timestamp_2_str(str(create_times)) + ) + + @property + def desc(self): + return replaceT(self._get_list_attr_value("$.aweme_list[*].desc")) + + @property + def desc_raw(self): + return self._get_list_attr_value("$.aweme_list[*].desc") + + @property + def uid(self): + return self._get_list_attr_value("$.aweme_list[*].author.uid") + + @property + def sec_user_id(self): + return self._get_list_attr_value("$.aweme_list[*].author.sec_uid") + + @property + def nickname(self): + return replaceT(self._get_list_attr_value("$.aweme_list[*].author.nickname")) + + @property + def nickname_raw(self): + return self._get_list_attr_value("$.aweme_list[*].author.nickname") + + @property + def author_avatar_thumb(self): + return self._get_list_attr_value( + "$.aweme_list[*].author.avatar_thumb.url_list[0]" + ) + + @property + def images(self): + images_list = self._get_list_attr_value("$.aweme_list[*].images") + + return [ + ( + [ + img["url_list"][0] + for img in images + if isinstance(img, dict) and "url_list" in img and img["url_list"] + ] + if images + else None + ) + for images in images_list + ] + + @property + def animated_cover(self): + # 临时办法 + # https://github.com/h2non/jsonpath-ng/issues/82 + + # 获取所有视频 + videos = self._get_list_attr_value("$.aweme_list[*].video") + + # 逐个视频判断是否存在animated_cover + animated_covers = [ + ( + video.get("animated_cover", {}).get("url_list", [None])[0] + if video.get("animated_cover") + else None + ) + for video in videos + ] + + return animated_covers + + @property + def cover(self): + return self._get_list_attr_value( + "$.aweme_list[*].video.origin_cover.url_list[0]" + ) + + @property + def video_play_addr(self): + return self._get_list_attr_value("$.aweme_list[*].video.play_addr.url_list[0]") + + @property + def video_bit_rate(self): + bit_rate_data = self._get_list_attr_value("$.aweme_list[*].video.bit_rate") + + return [ + ( + [aweme["bit_rate"]] + if isinstance(aweme, dict) + else ( + [aweme[0]["bit_rate"]] + if len(aweme) == 1 + else [item["bit_rate"] for item in aweme] + ) + ) + for aweme in bit_rate_data + ] + + @property + def video_duration(self): + return self._get_list_attr_value("$.aweme_list[*].video.duration") + + @property + def part_see(self): + return self._get_list_attr_value("$.aweme_list[*].status.part_see") + + @property + def private_status(self): + return self._get_list_attr_value("$.aweme_list[*].status.private_status") + + @property + def is_prohibited(self): + # true 代表视频侵权 false代表视频未侵权 + return self._get_list_attr_value("$.aweme_list[*].status.is_prohibited") + + @property + def author_deleted(self): + # true 代表作者删除 false 代表作者未删除 + return self._get_list_attr_value("$.aweme_list[*].music.author_deleted") + + @property + def music_status(self): + # 1 代表正常 0 代表异常 + return self._get_list_attr_value("$.aweme_list[*].music.status") + + @property + def music_title(self): + return replaceT(self._get_list_attr_value("$.aweme_list[*].music.title")) + + @property + def music_title_raw(self): + return self._get_list_attr_value("$.aweme_list[*].music.title") + + @property + def music_play_url(self): + url_list = self._get_list_attr_value("$.aweme_list[*].music.play_url.url_list") + return _get_first_item_from_list(url_list) + + @property + def has_more(self) -> bool: + return bool(self._get_attr_value("$.has_more")) + + @property + def max_cursor(self): + return self._get_attr_value("$.max_cursor") + + @property + def min_cursor(self): + return self._get_attr_value("$.min_cursor") + + def _to_raw(self) -> dict: + return self._data + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + def _to_list(self): + exclude_list = [ + "has_more", + "max_cursor", + "min_cursor", + "has_aweme", + "locate_item_cursor", + ] + + keys = [ + prop_name + for prop_name in dir(self) + if not prop_name.startswith("__") + and not prop_name.startswith("_") + and prop_name not in exclude_list + ] + + aweme_entries = self._get_attr_value("$.aweme_list") or [] + + list_dicts = [] + for entry in aweme_entries: + d = { + "has_more": self.has_more, + "max_cursor": self.max_cursor, + "min_cursor": self.min_cursor, + } + for key in keys: + attr_values = getattr(self, key) + index = aweme_entries.index(entry) + d[key] = attr_values[index] if index < len(attr_values) else None + list_dicts.append(d) + return list_dicts + + +class UserCollectionFilter(UserPostFilter): + def __init__(self, data): + super().__init__(data) + + @property + def max_cursor(self): + return self._get_attr_value("$.cursor") + + +class UserCollectsFilter(JSONModel): + + @property + def max_cursor(self): + return self._get_attr_value("$.cursor") + + @property + def status_code(self): + return self._get_attr_value("$.status_code") + + @property + def total_number(self): + return self._get_attr_value("$.total_number") + + @property + def has_more(self): + return bool(self._get_attr_value("$.has_more")) + + @property + def app_id(self): + return self._get_list_attr_value("$.collects_list[*].app_id") + + @property + def collects_cover(self): + return self._get_list_attr_value( + "$.collects_list[*].collects_cover.url_list[0]" + ) + + @property + def collects_id(self): + return self._get_list_attr_value("$.collects_list[*].collects_id") + + @property + def collects_name(self): + return replaceT(self._get_list_attr_value("$.collects_list[*].collects_name")) + + @property + def collects_name_raw(self): + return self._get_list_attr_value("$.collects_list[*].collects_name") + + @property + def create_time(self): + create_times = self._get_list_attr_value("$.collects_list[*].create_time") + return ( + [timestamp_2_str(str(ct)) for ct in create_times] + if isinstance(create_times, list) + else timestamp_2_str(str(create_times)) + ) + + @property + def follow_status(self): + return self._get_list_attr_value("$.collects_list[*].follow_status") + + @property + def followed_count(self): + return self._get_list_attr_value("$.collects_list[*].followed_count") + + @property + def is_normal_status(self): + return self._get_list_attr_value("$.collects_list[*].is_normal_status") + + @property + def item_type(self): + return self._get_list_attr_value("$.collects_list[*].item_type") + + @property + def last_collect_time(self): + create_times = self._get_list_attr_value("$.collects_list[*].last_collect_time") + return ( + [timestamp_2_str(str(ct)) for ct in create_times] + if isinstance(create_times, list) + else timestamp_2_str(str(create_times)) + ) + + @property + def play_count(self): + return self._get_list_attr_value("$.collects_list[*].play_count") + + @property + def states(self): + return self._get_list_attr_value("$.collects_list[*].states") + + @property + def status(self): + return self._get_list_attr_value("$.collects_list[*].status") + + @property + def system_type(self): + return self._get_list_attr_value("$.collects_list[*].system_type") + + @property + def total_number(self): + return self._get_list_attr_value("$.collects_list[*].total_number") + + @property + def user_id(self): + return self._get_list_attr_value("$.collects_list[*].user_id") + + # user_info + @property + def nickname(self): + return replaceT( + self._get_list_attr_value("$.collects_list[*].user_info.nickname") + ) + + @property + def nickname_raw(self): + return self._get_list_attr_value("$.collects_list[*].user_info.nickname") + + @property + def uid(self): + return self._get_list_attr_value("$.collects_list[*].user_info.uid") + + def _to_raw(self) -> dict: + return self._data + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + +class UserMusicCollectionFilter(JSONModel): + + @property + def max_cursor(self): + return self._get_attr_value("$.cursor") + + @property + def has_more(self): + return self._get_attr_value("$.has_more") + + @property + def status_code(self): + return self._get_attr_value("$.status_code") + + @property + def msg(self): + return self._get_attr_value("$.msg") + + @property + def album(self): + return self._get_list_attr_value("$.mc_list[*].album") + + @property + def audition_duration(self): + return self._get_list_attr_value("$.mc_list[*].audition_duration") + + @property + def duration(self): + return self._get_list_attr_value("$.mc_list[*].duration") + + @property + def author(self): + return replaceT(self._get_list_attr_value("$.mc_list[*].author")) + + @property + def author_raw(self): + return self._get_list_attr_value("$.mc_list[*].author") + + @property + def collect_status(self): + return self._get_list_attr_value("$.mc_list[*].collect_stat") + + @property + def music_status(self): + return self._get_list_attr_value("$.mc_list[*].music_status") + + @property + def cover_hd(self): + return self._get_list_attr_value("$.mc_list[*].cover_hd.url_list[0]") + + @property + def music_id(self): + return self._get_list_attr_value("$.mc_list[*].id") + + @property + def mid(self): + return self._get_list_attr_value("$.mc_list[*].mid") + + @property + def is_commerce_music(self): + return self._get_list_attr_value("$.mc_list[*].is_commerce_music") + + @property + def is_original(self): + return self._get_list_attr_value("$.mc_list[*].is_original") + + @property + def is_original_sound(self): + return self._get_list_attr_value("$.mc_list[*].is_original_sound") + + @property + def lyric_type(self): + return self._get_list_attr_value("$.mc_list[*].lyric_type") + + @property + def lyric_url(self): + # 不是每个作品都有 lyric_url,如果不存在则为 None + lyric_urls = [] + for item in self._data.get("mc_list"): + lyric_urls.append(item.get("lyric_url", None)) + + return lyric_urls + + @property + def play_url(self): + return self._get_list_attr_value("$.mc_list[*].play_url.url_list[0]") + + @property + def title(self): + return replaceT(self._get_list_attr_value("$.mc_list[*].title")) + + @property + def title_raw(self): + return self._get_list_attr_value("$.mc_list[*].title") + + @property + def strong_beat_url(self): + return self._get_list_attr_value("$.mc_list[*].strong_beat_url.url_list[0]") + + @property + def owner_nickname(self): + return replaceT(self._get_list_attr_value("$.mc_list[*].owner_nickname")) + + @property + def owner_nickname_raw(self): + return self._get_list_attr_value("$.mc_list[*].owner_nickname") + + @property + def owner_id(self): + return self._get_list_attr_value("$.mc_list[*].owner_id") + + @property + def sec_uid(self): + return self._get_list_attr_value("$.mc_list[*].sec_uid") + + def _to_raw(self) -> dict: + return self._data + + def _to_dict(self) -> dict: + return { + prop_name: getattr(self, prop_name) + for prop_name in dir(self) + if not prop_name.startswith("__") and not prop_name.startswith("_") + } + + def _to_list(self): + exclude_list = ["has_more", "max_cursor", "status_code", "msg"] + + keys = [ + prop_name + for prop_name in dir(self) + if not prop_name.startswith("__") + and not prop_name.startswith("_") + and prop_name not in exclude_list + ] + + aweme_entries = self._get_attr_value("$.mc_list") or [] + + list_dicts = [] + for entry in aweme_entries: + d = { + "has_more": self.has_more, + "max_cursor": self.max_cursor, + "status_code": self.status_code, + "msg": self.msg, + } + for key in keys: + attr_values = getattr(self, key) + index = aweme_entries.index(entry) + d[key] = attr_values[index] if index < len(attr_values) else None + list_dicts.append(d) + return list_dicts + + +class UserMixFilter(UserPostFilter): + def __init__(self, data): + super().__init__(data) + + @property + def max_cursor(self): + return self._get_attr_value("$.cursor") + + +class UserLikeFilter(UserPostFilter): + def __init__(self, data): + super().__init__(data) + + +class UserFollowingFilter(JSONModel): + + @property + def status_code(self): # 1 正常,2096 用户隐私设置不允许查看 + return self._get_attr_value("$.status_code") + + @property + def status_msg(self): + return self._get_attr_value("$.status_msg") + + @property + def has_more(self): + return self._get_attr_value("$.has_more") + + @property + def total(self): + return self._get_attr_value("$.total") + + @property + def mix_count(self): + return self._get_attr_value("$.mix_count") + + @property + def offset(self): + return self._get_attr_value("$.offset") + + @property + def myself_user_id(self): + return self._get_attr_value("$.myself_user_id") + + @property + def max_time(self): + return self._get_attr_value("$.max_time") + + @property + def min_time(self): + return self._get_attr_value("$.min_time") + + # following_list + @property + def avatar_larger(self): + return self._get_list_attr_value("$.followings[*].avatar_larger.url_list[0]") + + @property + def can_comment(self): + return self._get_list_attr_value("$.followings[*].aweme_control.can_comment") + + @property + def can_forward(self): + return self._get_list_attr_value("$.followings[*].aweme_control.can_forward") + + @property + def can_share(self): + return self._get_list_attr_value("$.followings[*].aweme_control.can_share") + + @property + def can_show_comment(self): + return self._get_list_attr_value( + "$.followings[*].aweme_control.can_show_comment" + ) + + @property + def aweme_count(self): + return self._get_list_attr_value("$.followings[*].aweme_count") + + @property + def back_cover(self): + return self._get_list_attr_value("$.followings[*].cover_url[0].url_list[0]") + + @property + def register_time(self): + return self._get_list_attr_value("$.followings[*].create_time") + + @property + def secondary_priority(self): + # secondary_priority 6 代表未看过的作品数量 1 代表正在直播 7 代表简介内容 + return self._get_list_attr_value( + "$.followings[*].following_list_secondary_information_struct.secondary_information_priority" + ) + + @property + def secondary_text(self): + return replaceT( + self._get_list_attr_value( + "$.followings[*].following_list_secondary_information_struct.secondary_information_text" + ) + ) + + @property + def secondary_text_raw(self): + return self._get_list_attr_value( + "$.followings[*].following_list_secondary_information_struct.secondary_information_text" + ) + + @property + def is_block(self): + return self._get_list_attr_value("$.followings[*].is_block") + + @property + def is_blocked(self): + return self._get_list_attr_value("$.followings[*].is_blocked") + + @property + def is_gov_media_vip(self): + return self._get_list_attr_value("$.followings[*].is_gov_media_vip") + + @property + def is_mix_user(self): + return self._get_list_attr_value("$.followings[*].is_mix_user") + + @property + def is_phone_binded(self): + return self._get_list_attr_value("$.followings[*].is_phone_binded") + + @property + def is_star(self): + return self._get_list_attr_value("$.followings[*].is_star") + + @property + def is_top(self): + # 超粉? + return self._get_list_attr_value("$.followings[*].is_top") + + @property + def is_verified(self): + # 实名? + return self._get_list_attr_value("$.followings[*].is_verified") + + @property + def language(self): + return self._get_list_attr_value("$.followings[*].language") + + @property + def nickname(self): + return replaceT(self._get_list_attr_value("$.followings[*].nickname")) + + @property + def nickname_raw(self): + return self._get_list_attr_value("$.followings[*].nickname") + + @property + def relation_label(self): + return self._get_list_attr_value("$.followings[*].relation_label") + + @property + def room_id(self): + return self._get_list_attr_value("$.followings[*].room_id") + + @property + def sec_uid(self): + return self._get_list_attr_value("$.followings[*].sec_uid") + + @property + def secret(self): + # 私密? + return self._get_list_attr_value("$.followings[*].secret") @property def short_id(self): - return self._get_attr_value("$.user.short_id") + return self._get_list_attr_value("$.followings[*].short_id") @property def signature(self): - return replaceT(self._get_attr_value("$.user.signature")) + return replaceT(self._get_list_attr_value("$.followings[*].signature")) @property - def total_favorited(self): - return self._get_attr_value("$.user.total_favorited") + def signature_raw(self): + return self._get_list_attr_value("$.followings[*].signature") @property def uid(self): - return self._get_attr_value("$.user.uid") + return self._get_list_attr_value("$.followings[*].uid") @property def unique_id(self): - return self._get_attr_value("$.user.unique_id") + return self._get_list_attr_value("$.followings[*].unique_id") - @property - def user_age(self): - return self._get_attr_value("$.user.user_age") + def _to_raw(self) -> dict: + return self._data def _to_dict(self) -> dict: return { @@ -116,176 +837,187 @@ def _to_dict(self) -> dict: if not prop_name.startswith("__") and not prop_name.startswith("_") } + def _to_list(self): + exclude_list = [ + "status_code", + "status_msg", + "has_more", + "total", + "mix_count", + "offset", + "myself_user_id", + "max_time", + "min_time", + ] -class UserPostFilter(JSONModel): - @property - def has_aweme(self) -> bool: - return bool( - self._get_attr_value("$.aweme_list") - ) # 如果aweme_list是空的或None,此属性返回False + keys = [ + prop_name + for prop_name in dir(self) + if not prop_name.startswith("__") + and not prop_name.startswith("_") + and prop_name not in exclude_list + ] - @property - def locate_item_cursor(self): - return self._get_attr_value("$.locate_item_cursor") # 定位作品用 + following_entries = self._get_attr_value("$.followings") or [] - @property - def aweme_id(self): - ids = self._get_list_attr_value("$.aweme_list[*].aweme_id") - return ids if isinstance(ids, list) else [ids] + list_dicts = [] + for entry in following_entries: + d = { + "has_more": self.has_more, + "total": self.total, + "mix_count": self.mix_count, + "offset": self.offset, + "myself_user_id": self.myself_user_id, + "max_time": self.max_time, + "min_time": self.min_time, + } + for key in keys: + attr_values = getattr(self, key) + index = following_entries.index(entry) + d[key] = attr_values[index] if index < len(attr_values) else None + list_dicts.append(d) + return list_dicts - @property - def aweme_type(self): - return self._get_list_attr_value("$.aweme_list[*].aweme_type") + +class UserFollowerFilter(UserFollowingFilter): + def __init__(self, data): + super().__init__(data) @property - def create_time(self): - create_times = self._get_list_attr_value("$.aweme_list[*].create_time") - return ( - [timestamp_2_str(ct) for ct in create_times] - if isinstance(create_times, list) - else timestamp_2_str(create_times) - ) + def total(self): + return self._get_attr_value("$.total") + # followers @property - def desc(self): - return replaceT(self._get_list_attr_value("$.aweme_list[*].desc")) + def avatar_larger(self): + return self._get_list_attr_value("$.followers[*].avatar_larger.url_list[0]") @property - def uid(self): - return self._get_list_attr_value("$.aweme_list[*].author.uid") + def can_comment(self): + return self._get_list_attr_value("$.followers[*].aweme_control.can_comment") @property - def sec_user_id(self): - return self._get_list_attr_value("$.aweme_list[*].author.sec_uid") + def can_forward(self): + return self._get_list_attr_value("$.followers[*].aweme_control.can_forward") @property - def nickname(self): - return replaceT(self._get_list_attr_value("$.aweme_list[*].author.nickname")) + def can_share(self): + return self._get_list_attr_value( + "$.followersfollowers[*].aweme_control.can_share" + ) @property - def author_avatar_thumb(self): + def can_show_comment(self): return self._get_list_attr_value( - "$.aweme_list[*].author.avatar_thumb.url_list[0]" + "$.followers[*].aweme_control.can_show_comment" ) @property - def images(self): - images_list = self._get_list_attr_value("$.aweme_list[*].images") + def aweme_count(self): + return self._get_list_attr_value("$.followers[*].aweme_count") - return [ - [ - img["url_list"][0] - for img in images - if isinstance(img, dict) and "url_list" in img and img["url_list"] - ] - if images - else None - for images in images_list - ] + @property + def back_cover(self): + return self._get_list_attr_value("$.followers[*].cover_url[0].url_list[0]") @property - def animated_cover(self): - # 临时办法 - # https://github.com/h2non/jsonpath-ng/issues/82 + def register_time(self): + return self._get_list_attr_value("$.followers[*].create_time") - # 获取所有视频 - videos = self._get_list_attr_value("$.aweme_list[*].video") + @property + def is_block(self): + return self._get_list_attr_value("$.followers[*].is_block") - # 逐个视频判断是否存在animated_cover - animated_covers = [ - video.get("animated_cover", {}).get("url_list", [None])[0] - if video.get("animated_cover") - else None - for video in videos - ] + @property + def is_blocked(self): + return self._get_list_attr_value("$.followers[*].is_blocked") - return animated_covers + @property + def is_gov_media_vip(self): + return self._get_list_attr_value("$.followers[*].is_gov_media_vip") @property - def cover(self): - return self._get_list_attr_value( - "$.aweme_list[*].video.origin_cover.url_list[0]" - ) + def is_mix_user(self): + return self._get_list_attr_value("$.followers[*].is_mix_user") @property - def video_play_addr(self): - return self._get_list_attr_value("$.aweme_list[*].video.play_addr.url_list[0]") + def is_phone_binded(self): + return self._get_list_attr_value("$.followers[*].is_phone_binded") @property - def video_bit_rate(self): - bit_rate_data = self._get_list_attr_value("$.aweme_list[*].video.bit_rate") + def is_star(self): + return self._get_list_attr_value("$.followers[*].is_star") - return [ - [aweme["bit_rate"]] - if isinstance(aweme, dict) - else [aweme[0]["bit_rate"]] - if len(aweme) == 1 - else [item["bit_rate"] for item in aweme] - for aweme in bit_rate_data - ] + @property + def is_top(self): + # 超粉? + return self._get_list_attr_value("$.followers[*].is_top") @property - def video_duration(self): - return self._get_list_attr_value("$.aweme_list[*].video.duration") + def is_verified(self): + # 实名? + return self._get_list_attr_value("$.followers[*].is_verified") @property - def part_see(self): - return self._get_list_attr_value("$.aweme_list[*].status.part_see") + def language(self): + return self._get_list_attr_value("$.followers[*].language") @property - def private_status(self): - return self._get_list_attr_value("$.aweme_list[*].status.private_status") + def nickname(self): + return replaceT(self._get_list_attr_value("$.followers[*].nickname")) @property - def is_prohibited(self): - # true 代表视频侵权 false代表视频未侵权 - return self._get_list_attr_value("$.aweme_list[*].status.is_prohibited") + def nickname_raw(self): + return self._get_list_attr_value("$.followers[*].nickname") @property - def author_deleted(self): - # true 代表作者删除 false 代表作者未删除 - return self._get_list_attr_value("$.aweme_list[*].music.author_deleted") + def relation_label(self): + return self._get_list_attr_value("$.followers[*].relation_label") @property - def music_status(self): - # 1 代表正常 0 代表异常 - return self._get_list_attr_value("$.aweme_list[*].music.status") + def room_id(self): + return self._get_list_attr_value("$.followers[*].room_id") @property - def music_title(self): - return replaceT(self._get_list_attr_value("$.aweme_list[*].music.title")) + def sec_uid(self): + return self._get_list_attr_value("$.followers[*].sec_uid") @property - def music_play_url(self): - url_list = self._get_list_attr_value("$.aweme_list[*].music.play_url.url_list") - return _get_first_item_from_list(url_list) + def secret(self): + # 私密? + return self._get_list_attr_value("$.followers[*].secret") @property - def has_more(self) -> bool: - return bool(self._get_attr_value("$.has_more")) + def short_id(self): + return self._get_list_attr_value("$.followers[*].short_id") @property - def max_cursor(self): - return self._get_attr_value("$.max_cursor") + def signature(self): + return replaceT(self._get_list_attr_value("$.followers[*].signature")) @property - def min_cursor(self): - return self._get_attr_value("$.min_cursor") + def signature_raw(self): + return self._get_list_attr_value("$.followers[*].signature") - def _to_dict(self) -> dict: - return { - prop_name: getattr(self, prop_name) - for prop_name in dir(self) - if not prop_name.startswith("__") and not prop_name.startswith("_") - } + @property + def uid(self): + return self._get_list_attr_value("$.followers[*].uid") + + @property + def unique_id(self): + return self._get_list_attr_value("$.followers[*].unique_id") def _to_list(self): exclude_list = [ + "status_code", + "status_msg", "has_more", - "max_cursor", - "min_cursor", - "has_aweme", - "locate_item_cursor", + "total", + "mix_count", + "offset", + "myself_user_id", + "max_time", + "min_time", ] keys = [ @@ -296,132 +1028,28 @@ def _to_list(self): and prop_name not in exclude_list ] - aweme_entries = self._get_attr_value("$.aweme_list") or [] + following_entries = self._get_attr_value("$.followers") or [] list_dicts = [] - for entry in aweme_entries: + for entry in following_entries: d = { "has_more": self.has_more, - "max_cursor": self.max_cursor, - "min_cursor": self.min_cursor, + "total": self.total, + "mix_count": self.mix_count, + "offset": self.offset, + "myself_user_id": self.myself_user_id, + "max_time": self.max_time, + "min_time": self.min_time, } for key in keys: attr_values = getattr(self, key) - index = aweme_entries.index(entry) + index = following_entries.index(entry) d[key] = attr_values[index] if index < len(attr_values) else None list_dicts.append(d) return list_dicts -class UserCollectFilter(UserPostFilter): - def __init__(self, data): - super().__init__(data) - - @property - def max_cursor(self): - return self._get_attr_value("$.cursor") - - -class UserMixFilter(UserPostFilter): - def __init__(self, data): - super().__init__(data) - - @property - def max_cursor(self): - return self._get_attr_value("$.cursor") - - -class UserLikeFilter(UserPostFilter): - def __init__(self, data): - super().__init__(data) - - class PostDetailFilter(JSONModel): - # api_status_code = property(lambda self: self._get_attr_value("$.status_code")) - # # author - # nickname = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.author.nickname"))) - # sec_user_id = property(lambda self: self._get_attr_value("$.aweme_detail.author.sec_uid")) - # short_id = property(lambda self: self._get_attr_value("$.aweme_detail.author.short_id")) - # uid = property(lambda self: self._get_attr_value("$.aweme_detail.author.uid")) - # unique_id = property(lambda self: self._get_attr_value("$.aweme_detail.author.unique_id")) - - # can_comment = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_comment")) - # can_forward = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_forward")) - # can_share = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_share")) - # can_show_comment = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.can_show_comment")) - # aweme_type = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_control.aweme_type")) - # aweme_id = property(lambda self: self._get_attr_value("$.aweme_detail.aweme_id")) - # comment_gid = property(lambda self: self._get_attr_value("$.aweme_detail.comment_gid")) - # create_time = property(lambda self: timestamp_2_str(self._get_attr_value("$.aweme_detail.create_time"))) - # desc = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.desc"))) - # duration = property(lambda self: self._get_attr_value("$.aweme_detail.duration")) - # is_ads = property(lambda self: self._get_attr_value("$.aweme_detail.is_ads")) - # is_story = property(lambda self: self._get_attr_value("$.aweme_detail.is_story")) - # is_top = property(lambda self: self._get_attr_value("$.aweme_detail.is_top")) - # video_bit_rate = property(lambda self: [ - # [aweme['bit_rate']] if isinstance(aweme, dict) - # else [aweme[0]['bit_rate']] if len(aweme) == 1 - # else [item['bit_rate'] for item in aweme] - # for aweme in self._get_list_attr_value("$.aweme_detail.video.bit_rate") - # ]) - # video_play_addr = property(lambda self: self._get_attr_value("$.aweme_detail.video.play_addr.url_list[0]")) - # images = property(lambda self: [ - # [img['url_list'][0] for img in images if isinstance(img, dict) and 'url_list' in img and img['url_list']] - # if images else None - # for images in self._get_list_attr_value("$.aweme_detail.images") - # ]) - - # # aweme status - # is_delete = property(lambda self: self._get_attr_value("$.aweme_detail.status.is_delete")) - # is_prohibited = property(lambda self: self._get_attr_value("$.aweme_detail.status.is_prohibited")) - - # is_long_video = property(lambda self: self._get_attr_value("$.aweme_detail.long_video")) - # media_type = property(lambda self: self._get_attr_value("$.aweme_detail.media_type")) - # # mix - # mix_desc = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.mix_info.mix_desc"))) - # mix_create_time = property(lambda self: timestamp_2_str(self._get_attr_value("$.aweme_detail.mix_info.mix_create_time"))) - # mix_id = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_id")) - # mix_name = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_name")) - # mix_pic_type = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_pic_type")) - # mix_type = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_type")) - # mix_share_url = property(lambda self: self._get_attr_value("$.aweme_detail.mix_info.mix_share_url")) - # mix_update_time = property(lambda self: timestamp_2_str(self._get_attr_value("$.aweme_detail.mix_info.mix_update_time"))) - # # music - # is_commerce_music = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_commerce_music")) - # is_original = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_original")) - # is_original_sound = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_original_sound")) - # is_pgc = property(lambda self: self._get_attr_value("$.aweme_detail.music.is_pgc")) - # music_author = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.author"))) - # music_author_deleted = property(lambda self: self._get_attr_value("$.aweme_detail.music.author_deleted")) - # music_duration = property(lambda self: self._get_attr_value("$.aweme_detail.music.duration")) - # music_id = property(lambda self: self._get_attr_value("$.aweme_detail.music.id")) - # music_id_str = property(lambda self: self._get_attr_value("$.aweme_detail.music.id_str")) - # music_mid = property(lambda self: self._get_attr_value("$.aweme_detail.music.mid")) - # pgc_author = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author"))) - # pgc_author_title = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author_title"))) - # pgc_music_type = property(lambda self: self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_music_type")) - # music_status = property(lambda self: self._get_attr_value("$.aweme_detail.music.status")) - # music_owner_handle = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.owner_handle"))) - # music_owner_id = property(lambda self: self._get_attr_value("$.aweme_detail.music.owner_id")) - # music_owner_nickname = property(lambda self: replaceT(self._get_attr_value("$.aweme_detail.music.owner_nickname"))) - # music_play_url = property(lambda self: self._get_attr_value("$.aweme_detail.music.play_url.url_list[0]")) - - # # position - # position = property(lambda self: self._get_attr_value("$.aweme_detail.position")) - # # region = property(lambda self: self._get_attr_value("$.aweme_detail.region")) - - # # seo_ocr_content - # seo_ocr_content = property(lambda self: self._get_attr_value("$.aweme_detail.seo_info.seo_ocr_content")) - - # admire_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.admire_count")) - # collect_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.collect_count")) - # comment_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.comment_count")) - # digg_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.digg_count")) - # # play_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.play_count")) - # share_count = property(lambda self: self._get_attr_value("$.aweme_detail.statistics.share_count")) - - # hashtag_ids = property(lambda self: self._get_list_attr_value("$.aweme_detail.text_extra[*].hashtag_id")) - # hashtag_names = property(lambda self: self._get_list_attr_value("$.aweme_detail.text_extra[*].hashtag_name")) @property def api_status_code(self): @@ -440,6 +1068,10 @@ def aweme_id(self): def nickname(self): return replaceT(self._get_attr_value("$.aweme_detail.author.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.aweme_detail.author.nickname") + @property def sec_user_id(self): return self._get_attr_value("$.aweme_detail.author.sec_uid") @@ -486,6 +1118,10 @@ def create_time(self): def desc(self): return replaceT(self._get_attr_value("$.aweme_detail.desc")) + @property + def desc_raw(self): + return self._get_attr_value("$.aweme_detail.desc") + @property def duration(self): return self._get_attr_value("$.aweme_detail.duration") @@ -533,6 +1169,10 @@ def media_type(self): def mix_desc(self): return replaceT(self._get_attr_value("$.aweme_detail.mix_info.mix_desc")) + @property + def mix_desc_raw(self): + return self._get_attr_value("$.aweme_detail.mix_info.mix_desc") + @property def mix_create_time(self): return timestamp_2_str( @@ -586,6 +1226,10 @@ def is_pgc(self): def music_author(self): return replaceT(self._get_attr_value("$.aweme_detail.music.author")) + @property + def music_author_raw(self): + return self._get_attr_value("$.aweme_detail.music.author") + @property def music_author_deleted(self): return self._get_attr_value("$.aweme_detail.music.author_deleted") @@ -608,6 +1252,10 @@ def pgc_author(self): self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author") ) + @property + def pgc_author_raw(self): + return self._get_attr_value("$.aweme_detail.music.matched_pgc_sound.pgc_author") + @property def pgc_author_title(self): return replaceT( @@ -616,6 +1264,12 @@ def pgc_author_title(self): ) ) + @property + def pgc_author_title_raw(self): + return self._get_attr_value( + "$.aweme_detail.music.matched_pgc_sound.pgc_author_title" + ) + @property def pgc_music_type(self): return self._get_attr_value( @@ -630,6 +1284,10 @@ def music_status(self): def music_owner_handle(self): return replaceT(self._get_attr_value("$.aweme_detail.music.owner_handle")) + @property + def music_owner_handle_raw(self): + return self._get_attr_value("$.aweme_detail.music.owner_handle") + @property def music_owner_id(self): return self._get_attr_value("$.aweme_detail.music.owner_id") @@ -638,6 +1296,10 @@ def music_owner_id(self): def music_owner_nickname(self): return replaceT(self._get_attr_value("$.aweme_detail.music.owner_nickname")) + @property + def music_owner_nickname_raw(self): + return self._get_attr_value("$.aweme_detail.music.owner_nickname") + @property def music_play_url(self): return self._get_attr_value("$.aweme_detail.music.play_url.url_list[0]") @@ -724,23 +1386,30 @@ def video_bit_rate(self): ) return [ - [aweme["bit_rate"]] - if isinstance(aweme, dict) - else [aweme[0]["bit_rate"]] - if len(aweme) == 1 - else [item["bit_rate"] for item in aweme] + ( + [aweme["bit_rate"]] + if isinstance(aweme, dict) + else ( + [aweme[0]["bit_rate"]] + if len(aweme) == 1 + else [item["bit_rate"] for item in aweme] + ) + ) for aweme in bit_rate_data ] @property def video_play_addr(self): - return self._get_attr_value("$.aweme_detail.video.play_addr.url_list[0]") + return self._get_attr_value("$.aweme_detail.video.play_addr.url_list") # images @property def images(self): return self._get_list_attr_value("$.aweme_detail.images[*].url_list[0]") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -748,35 +1417,6 @@ def _to_dict(self) -> dict: if not prop_name.startswith("__") and not prop_name.startswith("_") } - def _to_list(self): - # 不需要的属性列表 - exclude_list = ["has_more", "max_cursor", "min_cursor"] - # 生成属性名称列表,然后过滤掉不需要的属性 - keys = [ - prop_name - for prop_name in dir(self) - if not prop_name.startswith("__") - and not prop_name.startswith("_") - and prop_name not in exclude_list - ] - - aweme_entries = self._get_attr_value("$.aweme_detail") or [] - - list_dicts = [] - # 遍历每个条目并创建一个字典 - # (Iterate through each entry and create a dict) - for entry in aweme_entries: - d = {} - for key in keys: - attr_values = getattr(self, key) - # 当前aweme_entry在属性列表中的索引 - index = aweme_entries.index(entry) - # 如果属性值的长度足够则赋值,否则赋None - # (Assign value if the length of the attribute value is sufficient, otherwise assign None) - d[key] = attr_values[index] if index < len(attr_values) else None - list_dicts.append(d) - return list_dicts - class UserLiveFilter(JSONModel): # live @@ -796,6 +1436,10 @@ def live_status(self): def live_title(self): return replaceT(self._get_attr_value("$.data.data[0].title")) + @property + def live_title_raw(self): + return self._get_attr_value("$.data.data[0].title") + @property def cover(self): return self._get_attr_value("$.data.data[0].cover.url_list[0]") @@ -833,6 +1477,10 @@ def sec_user_id(self): def nickname(self): return replaceT(self._get_attr_value("$.data.data[0].owner.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.data.data[0].owner.nickname") + @property def avatar_thumb(self): return self._get_attr_value("$.data.data[0].owner.avatar_thumb.url_list[0]") @@ -881,6 +1529,9 @@ def DiggAuth(self): def ShareAuth(self): return self._get_attr_value("$.data.data[0].room_auth.Share") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -888,35 +1539,6 @@ def _to_dict(self) -> dict: if not prop_name.startswith("__") and not prop_name.startswith("_") } - def _to_list(self): - # 不需要的属性列表 - exclude_list = [] - # 生成属性名称列表,然后过滤掉不需要的属性 - keys = [ - prop_name - for prop_name in dir(self) - if not prop_name.startswith("__") - and not prop_name.startswith("_") - and prop_name not in exclude_list - ] - - aweme_entries = self._get_attr_value("$.aweme_list") or [] - - list_dicts = [] - # 遍历每个条目并创建一个字典 - # (Iterate through each entry and create a dict) - for entry in aweme_entries: - d = {} - for key in keys: - attr_values = getattr(self, key) - # 当前aweme_entry在属性列表中的索引 - index = aweme_entries.index(entry) - # 如果属性值的长度足够则赋值,否则赋None - # (Assign value if the length of the attribute value is sufficient, otherwise assign None) - d[key] = attr_values[index] if index < len(attr_values) else None - list_dicts.append(d) - return list_dicts - class UserLive2Filter(JSONModel): # live @@ -940,17 +1562,21 @@ def live_status(self): def live_title(self): return replaceT(self._get_attr_value("$.data.room.title")) + @property + def live_title_raw(self): + return self._get_attr_value("$.data.room.title") + @property def user_count(self): return self._get_attr_value("$.data.room.user_count") @property def create_time(self): - return timestamp_2_str(self._get_attr_value("$.data.room.create_time")) + return timestamp_2_str(str(self._get_attr_value("$.data.room.create_time"))) @property def finish_time(self): - return timestamp_2_str(self._get_attr_value("$.data.room.finish_time")) + return timestamp_2_str(str(self._get_attr_value("$.data.room.finish_time"))) @property def cover(self): @@ -977,14 +1603,26 @@ def hls_pull_url(self): def nickname(self): return replaceT(self._get_attr_value("$.data.room.owner.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.data.room.owner.nickname") + @property def gender(self): return replaceT(self._get_attr_value("$.data.room.owner.gender")) + @property + def gender_raw(self): + return self._get_attr_value("$.data.room.owner.gender") + @property def signature(self): return replaceT(self._get_attr_value("$.data.room.owner.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.data.room.owner.signature") + @property def avatar_large(self): return self._get_attr_value("$.data.room.owner.avatar_large.url_list[0]") @@ -1009,6 +1647,9 @@ def follower_count(self): def sec_uid(self): return self._get_attr_value("$.data.room.owner.sec_uid") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1074,6 +1715,9 @@ def error_code(self): def message(self): return self._get_attr_value("$.message") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -1111,6 +1755,9 @@ def message(self): def verify_ticket(self): return self._get_attr_value("$.verify_ticket") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) diff --git a/f2/apps/douyin/handler.py b/f2/apps/douyin/handler.py index 5bb8f8a..f2af93f 100644 --- a/f2/apps/douyin/handler.py +++ b/f2/apps/douyin/handler.py @@ -1,7 +1,8 @@ # path: f2/apps/douyin/handler.py import asyncio -from typing import AsyncGenerator, Dict, Any, List +from pathlib import Path +from typing import AsyncGenerator, Union, Dict, Any, List from f2.log.logger import logger from f2.i18n.translator import _ @@ -14,24 +15,33 @@ UserPost, UserProfile, UserLike, - UserCollect, + UserCollection, + UserCollects, + UserCollectsVideo, + UserMusicCollection, UserMix, PostDetail, UserLive, UserLive2, LoginGetQr, LoginCheckQr, + UserFollowing, + UserFollower, ) from f2.apps.douyin.filter import ( UserPostFilter, UserProfileFilter, - UserCollectFilter, + UserCollectionFilter, + UserCollectsFilter, + UserMusicCollectionFilter, UserMixFilter, PostDetailFilter, UserLiveFilter, UserLive2Filter, GetQrcodeFilter, CheckQrcodeFilter, + UserFollowingFilter, + UserFollowerFilter, ) from f2.apps.douyin.utils import ( SecUserIdFetcher, @@ -42,6 +52,7 @@ show_qrcode, ) from f2.cli.cli_console import RichConsoleManager +from f2.exceptions.api_exceptions import APIResponseError rich_console = RichConsoleManager().rich_console rich_prompt = RichConsoleManager().rich_prompt @@ -52,11 +63,14 @@ class DouyinHandler: # 需要忽略的字段(需过滤掉有时效性的字段) ignore_fields = ["video_play_addr", "images", "video_bit_rate", "cover"] - def __init__(self, kwargs) -> None: + def __init__(self, kwargs: dict = ...) -> None: self.kwargs = kwargs self.downloader = DouyinDownloader(kwargs) - async def handler_user_profile(self, sec_user_id: str) -> UserProfileFilter: + async def handler_user_profile( + self, + sec_user_id: str, + ) -> UserProfileFilter: """ 用于获取指定用户的个人信息 (Used to get personal info of specified users) @@ -71,9 +85,16 @@ async def handler_user_profile(self, sec_user_id: str) -> UserProfileFilter: async with DouyinCrawler(self.kwargs) as crawler: params = UserProfile(sec_user_id=sec_user_id) response = await crawler.fetch_user_profile(params) + user = UserProfileFilter(response) + if user.nickname is None: + raise APIResponseError(_("API内容请求失败,请更换新cookie后再试")) return UserProfileFilter(response) - async def get_user_nickname(self, sec_user_id: str, db: AsyncUserDB) -> str: + async def get_user_nickname( + self, + sec_user_id: str, + db: AsyncUserDB, + ) -> str: """ 获取指定用户的昵称,如果不存在,则从服务器获取并存储到数据库中 (Used to get personal info of specified users) @@ -93,8 +114,11 @@ async def get_user_nickname(self, sec_user_id: str, db: AsyncUserDB) -> str: return user_dict.get("nickname") async def get_or_add_user_data( - self, kwargs: dict, sec_user_id: str, db: AsyncUserDB - ) -> Any: + self, + kwargs: dict, + sec_user_id: str, + db: AsyncUserDB, + ) -> Path: """ 获取或创建用户数据同时创建用户目录 (Get or create user data and create user directory) @@ -130,7 +154,10 @@ async def get_or_add_user_data( @classmethod async def get_or_add_video_data( - cls, aweme_data: dict, db: AsyncVideoDB, ignore_fields: list = None + cls, + aweme_data: dict, + db: AsyncVideoDB, + ignore_fields: list = None, ): """ 获取或创建作品数据库数据 @@ -154,7 +181,7 @@ async def get_or_add_video_data( @mode_handler("one") async def handle_one_video(self): """ - 用于处理单个视频。 + 用于处理单个作品。 (Used to process a single video.) Args: @@ -167,44 +194,53 @@ async def handle_one_video(self): async with AsyncUserDB("douyin_users.db") as db: user_path = await self.get_or_add_user_data( - self.kwargs, aweme_data.get("sec_user_id"), db + self.kwargs, aweme_data.sec_user_id, db ) async with AsyncVideoDB("douyin_videos.db") as db: - await self.get_or_add_video_data(aweme_data, db, self.ignore_fields) + await self.get_or_add_video_data( + aweme_data._to_dict(), db, self.ignore_fields + ) - logger.debug(_("单个视频数据: {0}".format(aweme_data))) - await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) + logger.debug(_("单个作品数据:{0}").format(aweme_data._to_dict())) + + # 创建下载任务 + await self.downloader.create_download_tasks( + self.kwargs, aweme_data._to_dict(), user_path + ) - async def fetch_one_video(self, aweme_id: str) -> dict: + async def fetch_one_video( + self, + aweme_id: str, + ) -> PostDetailFilter: """ - 用于获取单个视频。 + 用于获取单个作品。 Args: - aweme_id: str: 视频ID + aweme_id: str: 作品ID Return: - video_data: dict: 视频数据字典,包含视频ID、视频文案、作者昵称 + video: PostDetailFilter: 单个作品数据过滤器 """ - logger.debug(_("开始爬取视频: {0}").format(aweme_id)) + logger.info(_("开始爬取作品:{0}").format(aweme_id)) async with DouyinCrawler(self.kwargs) as crawler: params = PostDetail(aweme_id=aweme_id) response = await crawler.fetch_post_detail(params) video = PostDetailFilter(response) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) - return video._to_dict() + return video @mode_handler("post") async def handle_user_post(self): """ - 用于处理用户发布的视频。 + 用于处理用户发布的作品。 (Used to process videos published by users.) Args: @@ -225,12 +261,12 @@ async def handle_user_post(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) - # # 一次性批量插入视频数据到数据库 + # # 一次性批量插入作品数据到数据库 # async with AsyncVideoDB("douyin_videos.db") as db: - # await db.batch_insert_videos(aweme_data_list, ignore_fields) + # await db.batch_insert_videos(aweme_data_list._to_list(), ignore_fields) async def fetch_user_post_videos( self, @@ -238,35 +274,35 @@ async def fetch_user_post_videos( max_cursor: int = 0, page_counts: int = 20, max_counts: int = None, - ): + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户发布的视频列表。 + 用于获取指定用户发布的作品列表。 Args: sec_user_id: str: 用户ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + video: AsyncGenerator[UserPostFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 发布的视频").format(sec_user_id)) + logger.info(_("开始爬取用户:{0} 发布的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserPost( @@ -276,37 +312,39 @@ async def fetch_user_post_videos( ) response = await crawler.fetch_user_post(params) video = UserPostFilter(response) + yield video if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕".format(sec_user_id))) + logger.info(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break max_cursor = video.max_cursor continue - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") - - aweme_data_list = video._to_list() - yield aweme_data_list + logger.debug("===================================") - # 更新已经处理的视频数量 (Update the number of videos processed) + # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) max_cursor = video.max_cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("like") async def handle_user_like(self): """ - 用于处理用户喜欢的视频 (Used to process videos liked by users) + 用于处理用户喜欢的作品 (Used to process videos liked by users) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -326,14 +364,14 @@ async def handle_user_like(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) # async with AsyncVideoDB("douyin_videos.db") as db: # for aweme_data in aweme_data_list: # await get_or_add_video_data(aweme_data, db, ignore_fields) - # # 一次性批量插入视频数据到数据库 + # # 一次性批量插入作品数据到数据库 # async with AsyncVideoDB("douyin_videos.db") as db: # await db.batch_insert_videos(aweme_data_list, ignore_fields) @@ -343,35 +381,35 @@ async def fetch_user_like_videos( max_cursor: int = 0, page_counts: int = 20, max_counts: int = None, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户喜欢的视频列表。 + 用于获取指定用户喜欢的作品列表。 Args: sec_user_id: str: 用户ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + video: AsyncGenerator[UserPostFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 喜欢的视频").format(sec_user_id)) + logger.info(_("开始爬取用户:{0} 喜欢的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserLike( @@ -380,38 +418,40 @@ async def fetch_user_like_videos( sec_user_id=sec_user_id, ) response = await crawler.fetch_user_like(params) - video = UserPostFilter(response) + like = UserPostFilter(response) + yield like - if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) - if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕".format(sec_user_id))) + if not like.has_aweme: + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) + if not like.has_more: + logger.info(_("用户:{0} 所有作品采集完毕").format(sec_user_id)) break - max_cursor = video.max_cursor + max_cursor = like.max_cursor continue - logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( + like.aweme_id, like.desc, like.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(like.aweme_id) + max_cursor = like.max_cursor - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) - max_cursor = video.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个点赞作品").format(videos_collected)) - @mode_handler("collect") - async def handle_user_collect(self): + @mode_handler("music") + async def handle_user_music_collection(self): """ - 用于处理用户收藏的视频 (Used to process videos collected by users) + 用于处理用户收藏的音乐 (Used to process music collected by users) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -421,82 +461,424 @@ async def handle_user_collect(self): page_counts = self.kwargs.get("page_counts", 20) max_counts = self.kwargs.get("max_counts") + # Web端音乐收藏作品的接口只能通过登录的cookie获取,与配置的URL无关。 + # 因此,即使填写了其他人的URL,也只能获取到你自己的音乐收藏作品。 + # 此外,音乐收藏作品的文件夹将根据所配置的URL主页用户名来确定。 + # 为避免将文件下载到其他人的文件夹下,请务必确保填写的URL是你自己的主页URL。 sec_user_id = await SecUserIdFetcher.get_sec_user_id(self.kwargs.get("url")) async with AsyncUserDB("douyin_users.db") as db: user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) - async for aweme_data_list in self.fetch_user_collect_videos( + async for aweme_data_list in self.fetch_user_music_collection( + max_cursor, page_counts, max_counts + ): + # 创建下载任务 + await self.downloader.create_music_download_tasks( + self.kwargs, aweme_data_list._to_list(), user_path + ) + + async def fetch_user_music_collection( + self, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, + ) -> AsyncGenerator[UserMusicCollectionFilter, Any]: + """ + 用于获取指定用户收藏的音乐作品列表。 + + Args: + max_cursor: int: 起始页 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 + + Return: + music: AsyncGenerator[UserMusicCollectionFilter, Any]: 音乐数据过滤器,包含音乐数据的_to_raw、_to_dict、_to_list方法 + """ + + max_counts = max_counts or float("inf") + music_collected = 0 + + logger.info(_("开始爬取用户收藏的音乐作品")) + + while music_collected < max_counts: + current_request_size = min(page_counts, max_counts - music_collected) + + logger.debug("===================================") + logger.debug( + _("最大数量:{0} 每次请求数量:{1}").format( + max_counts, current_request_size + ) + ) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserMusicCollection( + cursor=max_cursor, count=current_request_size + ) + response = await crawler.fetch_user_music_collection(params) + music = UserMusicCollectionFilter(response) + yield music + + if not music.has_more: + logger.info(_("用户收藏的音乐作品采集完毕")) + break + + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) + logger.debug( + _("音乐ID:{0} 音乐标题:{1} 作者:{2}").format( + music.music_id, music.title, music.author + ) + ) + logger.debug("===================================") + + # 更新已经处理的音乐数量 (Update the number of music processed) + music_collected += len(music.music_id) + max_cursor = music.max_cursor + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个音乐作品").format(music_collected)) + + @mode_handler("collection") + async def handle_user_collection(self): + """ + 用于处理用户收藏的作品 (Used to process videos collected by users) + + Args: + kwargs: dict: 参数字典 (Parameter dictionary) + """ + + max_cursor = self.kwargs.get("max_cursor", 0) + page_counts = self.kwargs.get("page_counts", 20) + max_counts = self.kwargs.get("max_counts") + # 由于Web端收藏作品的接口只能通过登录的cookie获取,而与配置的URL无关。 + # 因此,即使填写了其他人的URL,也只能获取到你自己的收藏作品。 + # 此外,收藏作品的文件夹将根据所配置的URL主页用户名来确定。 + # 为避免将文件下载到其他人的文件夹下,请务必确保填写的URL是你自己的主页URL。 + sec_user_id = await SecUserIdFetcher.get_sec_user_id(self.kwargs.get("url")) + + async with AsyncUserDB("douyin_users.db") as db: + user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) + + async for aweme_data_list in self.fetch_user_collection_videos( max_cursor, page_counts, max_counts ): await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) - async def fetch_user_collect_videos( - self, max_cursor: int = 0, page_counts: int = 20, max_counts: int = None - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + async def fetch_user_collection_videos( + self, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, + ) -> AsyncGenerator[UserCollectionFilter, Any]: """ - 用于获取指定用户收藏的视频列表。 + 用于获取指定用户收藏的作品列表。 (Used to get the list of videos collected by the specified user.) - 该接口需要用POST且只靠cookie来获取数据。 - (This interface needs to be POST and only relies on cookies to get data.) Args: max_cursor: int: 起始页 (Start page) - page_counts: int: 每页视频数 (Number of videos per page) - max_counts: int: 最大视频数 (Maximum number of videos) + page_counts: int: 每页作品数 (Number of videos per page) + max_counts: int: 最大作品数 (Maximum number of videos) Return: - aweme_data: dict: 视频数据字典, 包含视频ID列表、视频文案、作者昵称、起始页 - (Video data dictionary, including video ID list, video description, - author nickname, start page) + collection: AsyncGenerator[UserCollectionFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 + + Note: + 该接口需要用POST且只靠cookie来获取数据。 + (This interface needs to use POST and only rely on cookies to obtain data.) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户收藏的视频")) + logger.info(_("开始爬取用户收藏的作品")) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: - params = UserCollect(cursor=max_cursor, count=current_request_size) - response = await crawler.fetch_user_collect(params) - video = UserCollectFilter(response) + params = UserCollection(cursor=max_cursor, count=current_request_size) + response = await crawler.fetch_user_collection(params) + collection = UserCollectionFilter(response) + yield collection + + if not collection.has_more: + logger.info(_("用户收藏的作品采集完毕")) + break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + collection.aweme_id, collection.desc, collection.nickname + ) + ) + logger.debug("===================================") + + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(collection.aweme_id) + max_cursor = collection.max_cursor + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个收藏作品").format(videos_collected)) + + @mode_handler("collects") + async def handle_user_collects(self): + """ + 用于处理用户收藏夹的作品 (Used to process videos in user collections) + + Args: + kwargs: dict: 参数字典 (Parameter dictionary) + """ + + max_cursor = self.kwargs.get("max_cursor", 0) + page_counts = self.kwargs.get("page_counts", 20) + max_counts = self.kwargs.get("max_counts") + # 由于无法在Web端获取收藏夹的URL,因此无法通过URL来获取收藏夹作品。 + # Web端收藏夹作品的接口只能通过登录的cookie获取,与配置的URL无关。 + # 因此,即使填写了其他人的URL,也只能获取到你自己的收藏夹作品。 + # 此外,收藏夹作品的文件夹将根据所配置的URL主页用户名来确定。 + # 为避免将文件下载到其他人的文件夹下,请务必确保填写的URL是你自己的主页URL。 + sec_user_id = await SecUserIdFetcher.get_sec_user_id(self.kwargs.get("url")) + + async with AsyncUserDB("douyin_users.db") as db: + user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) + + async for collects in self.fetch_user_collects( + max_cursor, page_counts, max_counts + ): + choose_collects_id = await self.select_user_collects(collects) + + if isinstance(choose_collects_id, str): + choose_collects_id = [choose_collects_id] + + for collects_id in choose_collects_id: + # 由于收藏夹作品包含在用户名下且存在收藏夹名,因此将额外创建收藏夹名的文件夹 + # 将会根据是否设置了 --folderize 参数来决定是否创建收藏夹名的文件夹 + # 例如: 用户名/收藏夹名/作品名.mp4 + if self.kwargs.get("folderize"): + tmp_user_path = user_path + tmp_user_path = ( + tmp_user_path + / collects.collects_name[ + collects.collects_id.index(int(collects_id)) + ] + ) + else: + tmp_user_path = user_path + + async for aweme_data_list in self.fetch_user_collects_videos( + collects_id, max_cursor, page_counts, max_counts + ): + await self.downloader.create_download_tasks( + self.kwargs, aweme_data_list._to_list(), tmp_user_path + ) + + logger.info( + _("爬取结束,共爬取 {0} 个收藏夹").format(len(choose_collects_id)) + ) + + async def select_user_collects( + self, collects: UserCollectsFilter + ) -> Union[str, List[str]]: + """ + 用于选择收藏夹 + (Used to select the collection) + + Args: + collects: UserCollectsFilter: 收藏夹列表过滤器 (Collection list Filter) + + Return: + collects_id: Union[str, List[str]]: 选择的收藏夹ID (Selected collects_id) + """ + + rich_console.print(_("0: [bold]全部下载[/bold]")) + for i in range(len(collects.collects_id)): + rich_console.print( + _( + "{0}:{1} (包含 {2} 个作品[以网页实际数量为准],收藏夹ID {3})" + ).format( + i + 1, + collects.collects_name[i], + collects.total_number[i], + collects.collects_id[i], + ) + ) + + # rich_prompt 会有字符刷新问题,暂时使用rich_print + rich_console.print(_("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]")) + selected_index = int( + rich_prompt.ask( + # _("[bold yellow]请输入希望下载的收藏夹序号:[/bold yellow]"), + choices=[str(i) for i in range(len(collects.collects_id) + 1)], + ) + ) + + if selected_index == 0: + return collects.collects_id + else: + return str(collects.collects_id[selected_index - 1]) + + async def fetch_user_collects( + self, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, + ) -> AsyncGenerator[UserCollectsFilter, Any]: + """ + 用于获取指定用户收藏夹。 + (Used to get the list of videos in the specified user's collection.) + + Args: + max_cursor: int: 起始页 (Page cursor) + page_counts: int: 每页收藏夹数 (Page counts) + max_counts: int: 最大收藏夹数 (Max counts) + + Return: + collects: AsyncGenerator[UserCollectsFilter, Any]: 收藏夹数据过滤器,包含收藏夹数据的_to_raw、_to_dict、_to_list方法) + """ + + max_counts = max_counts or float("inf") + collected = 0 + + logger.info(_("开始爬取用户收藏夹")) + + while collected < max_counts: + logger.debug("===================================") + logger.debug( + _("当前请求的max_cursor:{0}, max_counts:{1}").format( + max_cursor, max_counts ) ) - logger.debug("=====================================") - aweme_data_list = video._to_list() - yield aweme_data_list + async with DouyinCrawler(self.kwargs) as crawler: + params = UserCollects(cursor=max_cursor, count=page_counts) + response = await crawler.fetch_user_collects(params) + collects = UserCollectsFilter(response) + yield collects + + # 更新已经处理的收藏夹数量 (Update the number of collections processed) + collected += len(collects.collects_id) - if not video.has_more: - logger.debug(_("用户收藏的视频采集完毕")) + if not collects.has_more: break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) - max_cursor = video.max_cursor + logger.debug( + _("收藏夹ID:{0} 收藏夹标题:{1}").format( + collects.collects_id, collects.collects_name + ) + ) + logger.debug("===================================") + + max_cursor = collects.max_cursor + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共找到 {0} 个收藏夹").format(collected)) + + async def fetch_user_collects_videos( + self, + collects_id: str, + max_cursor: int = 0, + page_counts: int = 20, + max_counts: int = None, + ) -> AsyncGenerator[UserCollectionFilter, Any]: + """ + 用于获取指定用户收藏夹的作品列表。 + (Used to get the list of videos in the specified user's collection.) + + Args: + collects_id: str: 收藏夹ID (Collection ID) + max_cursor: int: 起始页 (Page cursor) + page_counts: int: 每页作品数 (Number of videos per page) + max_counts: int: 最大作品数 (Maximum number of videos) + + Return: + video: AsyncGenerator[UserCollectionFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 + """ + + max_counts = max_counts or float("inf") + videos_collected = 0 + + logger.info(_("开始爬取收藏夹:{0} 的作品").format(collects_id)) + + while videos_collected < max_counts: + current_request_size = min(page_counts, max_counts - videos_collected) + + logger.debug("===================================") + logger.debug( + _("最大数量:{0} 每次请求数量:{1}").format( + max_counts, current_request_size + ) + ) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserCollectsVideo( + cursor=max_cursor, + count=current_request_size, + collects_id=collects_id, + ) + response = await crawler.fetch_user_collects_video(params) + video = UserCollectionFilter(response) + + # 更新已处理视频数量 + videos_collected += len(video.aweme_id) + + if video.has_aweme: + if not video.has_more: + yield video + break + + logger.debug(_("当前请求的max_cursor:{0}").format(max_cursor)) + logger.debug( + _("视频ID:{0} 视频文案:{1} 作者:{2}").format( + video.aweme_id, video.desc, video.nickname + ) + ) + logger.debug("=====================================") + + yield video + max_cursor = video.max_cursor + else: + logger.info(_("{0} 页没有找到作品").format(max_cursor)) + + if not video.has_more: + break + + max_cursor = video.max_cursor + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info( + _("收藏夹:{0} 所有作品采集完毕,共爬取 {1} 个作品").format( + collects_id, videos_collected + ) + ) @mode_handler("mix") async def handle_user_mix(self): """ - 用于处理用户合集的视频 (Used to process videos of users' collections) + 用于处理用户合集的作品 (Used to process videos of users' mix) Args: kwargs: dict: 参数字典 (Parameter dictionary) @@ -519,11 +901,11 @@ async def handle_user_mix(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) # async with AsyncVideoDB("douyin_videos.db") as db: - # for aweme_data in aweme_data_list: + # for aweme_data in aweme_data_list._to_list(): # await get_or_add_video_data(aweme_data, db, ignore_fields) async def fetch_user_mix_videos( @@ -532,63 +914,65 @@ async def fetch_user_mix_videos( max_cursor: int = 0, page_counts: int = 20, max_counts: int = None, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserMixFilter, Any]: """ - 用于获取指定用户合集的视频列表。 + 用于获取指定用户合集的作品列表。 Args: mix_id: str: 合集ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + mix: AsyncGenerator[UserMixFilter, Any]: 合集作品数据过滤器,包含合集作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取合集: {0} 的视频").format(mix_id)) + logger.info(_("开始爬取合集: {0} 的作品").format(mix_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserMix( cursor=max_cursor, count=current_request_size, mix_id=mix_id ) response = await crawler.fetch_user_mix(params) - video = UserMixFilter(response) + mix = UserMixFilter(response) + yield mix + + if not mix.has_more: + logger.info(_("合集: {0} 所有作品采集完毕").format(mix_id)) + break logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + mix.aweme_id, mix.desc, mix.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(mix.aweme_id) + max_cursor = mix.max_cursor - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(aweme_data_list) - max_cursor = video.max_cursor - - if not video.has_more: - logger.debug(_("合集: {0} 所有作品采集完毕").format(mix_id)) - break + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个合集作品").format(videos_collected)) @mode_handler("live") async def handle_user_live(self): @@ -604,18 +988,26 @@ async def handle_user_live(self): # 然后下载直播推流 webcast_data = await self.fetch_user_live_videos(webcast_id) - live_status = webcast_data.get("live_status") + + live_status = webcast_data.live_status + sec_user_id = webcast_data.sec_user_id + # 是否正在直播 if live_status != 2: - logger.debug(_("直播已结束")) + logger.info(_("当前 {0} 直播已结束").format(webcast_id)) return - sec_user_id = webcast_data.get("sec_user_id") async with AsyncUserDB("douyin_users.db") as db: user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db) - await self.downloader.create_stream_tasks(self.kwargs, webcast_data, user_path) - async def fetch_user_live_videos(self, webcast_id: str): + await self.downloader.create_stream_tasks( + self.kwargs, webcast_data._to_dict(), user_path + ) + + async def fetch_user_live_videos( + self, + webcast_id: str, + ) -> UserLiveFilter: """ 用于获取指定用户直播列表。 (Used to get the list of videos collected by the specified user.) @@ -629,8 +1021,8 @@ async def fetch_user_live_videos(self, webcast_id: str): sub-partition, anchor nickname) """ - logger.debug(_("开始爬取直播: {0} 的数据").format(webcast_id)) - logger.debug("=====================================") + logger.info(_("开始爬取直播: {0} 的数据").format(webcast_id)) + logger.debug("===================================") async with DouyinCrawler(self.kwargs) as crawler: params = UserLive(web_rid=webcast_id, room_id_str="") @@ -647,13 +1039,15 @@ async def fetch_user_live_videos(self, webcast_id: str): live.sub_partition_title, live.nickname ) ) - logger.debug("=====================================") - logger.debug(_("直播信息爬取结束")) + logger.debug("===================================") + logger.info(_("直播信息爬取结束")) - webcast_data = live._to_dict() - return webcast_data + return live - async def fetch_user_live_videos_by_room_id(self, room_id: str): + async def fetch_user_live_videos_by_room_id( + self, + room_id: str, + ) -> UserLive2Filter: """ 使用room_id获取指定用户直播列表。 (Used to get the list of videos collected by the specified user) @@ -667,8 +1061,8 @@ async def fetch_user_live_videos_by_room_id(self, room_id: str): anchor nickname) """ - logger.debug(_("开始爬取房间号: {0} 的数据").format(room_id)) - logger.debug("=====================================") + logger.info(_("开始爬取房间号: {0} 的数据").format(room_id)) + logger.debug("===================================") async with DouyinCrawler(self.kwargs) as crawler: params = UserLive2(room_id=room_id) @@ -689,11 +1083,10 @@ async def fetch_user_live_videos_by_room_id(self, room_id: str): ), ) ) - logger.debug("=====================================") - logger.debug(_("直播信息爬取结束")) + logger.debug("===================================") + logger.info(_("直播信息爬取结束")) - webcast_data = live._to_dict() - return webcast_data + return live @mode_handler("feed") async def handle_user_feed(self): @@ -718,7 +1111,7 @@ async def handle_user_feed(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_feed_videos( @@ -727,35 +1120,35 @@ async def fetch_user_feed_videos( max_cursor: int = 0, page_counts: int = 20, max_counts: int = None, - ) -> AsyncGenerator[List[Dict[str, Any]], None]: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户feed的视频列表。 + 用于获取指定用户feed的作品列表。 Args: sec_user_id: str: 用户ID max_cursor: int: 起始页 - page_counts: int: 每页视频数 - max_counts: int: 最大视频数 + page_counts: int: 每页作品数 + max_counts: int: 最大作品数 Return: - aweme_data: dict: 视频数据字典,包含视频ID列表、视频文案、作者昵称、起始页 + video: AsyncGenerator[UserPostFilter, Any]: 作品数据过滤器,包含作品数据的_to_raw、_to_dict、_to_list方法 """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} feed的视频").format(sec_user_id)) + logger.info(_("开始爬取用户: {0} feed的作品").format(sec_user_id)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size ) ) - logger.debug(_("开始爬取第 {0} 页").format(max_cursor)) + logger.info(_("开始爬取第 {0} 页").format(max_cursor)) async with DouyinCrawler(self.kwargs) as crawler: params = UserPost( @@ -764,33 +1157,201 @@ async def fetch_user_feed_videos( sec_user_id=sec_user_id, ) response = await crawler.fetch_user_post(params) - video = UserPostFilter(response) + feed = UserPostFilter(response) + yield feed - if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(max_cursor))) - if not video.has_more: - logger.debug(_("用户: {0} 所有作品采集完毕".format(sec_user_id))) + if not feed.has_aweme: + logger.info(_("第 {0} 页没有找到作品").format(max_cursor)) + if not feed.has_more: + logger.info(_("用户: {0} 所有作品采集完毕").format(sec_user_id)) break - max_cursor = video.max_cursor + max_cursor = feed.max_cursor continue logger.debug(_("当前请求的max_cursor: {0}").format(max_cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + feed.aweme_id, feed.desc, feed.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(feed.aweme_id) + max_cursor = feed.max_cursor - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - max_cursor = video.max_cursor + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个首页推荐作品").format(videos_collected)) + + async def fetch_user_following( + self, + user_id: str = "", + sec_user_id: str = "", + offset: int = 0, + count: int = 20, + source_type: int = 4, + min_time: int = 0, + max_time: int = 0, + max_counts: float = float("inf"), + ) -> AsyncGenerator[UserFollowingFilter, Any]: + """ + 用于获取指定用户关注的用户的作品列表。 + + Args: + user_id: str: 用户ID + sec_user_id: str: 用户ID + offset: int: 起始页 + count: int: 每页关注用户数 + source_type: int: 排序类型 + min_time: int: 最小时间戳 + max_time: int: 最大时间戳 + Return: + following: AsyncGenerator[UserFollowingFilter, Any]: 关注用户数据过滤器,包含关注用户数据的_to_raw、_to_dict、_to_list方法 + """ + + if not user_id and not sec_user_id: + raise ValueError(_("至少提供 user_id 或 sec_user_id 中的一个参数")) + + max_counts = max_counts or float("inf") + users_collected = 0 + + logger.info(_("开始爬取用户:{0} 的关注用户").format(sec_user_id)) + + while users_collected < max_counts: + current_request_size = min(count, max_counts - users_collected) + + logger.debug("===================================") + logger.debug( + _("最大数量:{0} 每次请求数量:{1}").format(count, current_request_size) + ) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserFollowing( + offset=offset, + count=current_request_size, + user_id=user_id, + sec_user_id=sec_user_id, + source_type=source_type, + min_time=min_time, + max_time=max_time, + ) + response = await crawler.fetch_user_following(params) + following = UserFollowingFilter(response) + yield following + + if not following.has_more: + logger.info(_("用户:{0} 所有关注用户采集完毕").format(sec_user_id)) + break + + logger.info(_("当前请求的offset:{0}").format(offset)) + logger.info(_("爬取了 {0} 个关注用户").format(offset + 1)) + logger.debug( + _("用户ID:{0} 用户昵称:{1} 用户作品数:{2} 额外内容:{3}").format( + following.sec_uid, + following.nickname, + following.aweme_count, + following.secondary_text, + ) + ) + logger.debug("===================================") + + # 更新已经处理的用户数量 (Update the number of users processed) + users_collected += len(following.sec_uid) + offset = following.offset + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) + + logger.info(_("爬取结束,共爬取 {0} 个用户").format(users_collected)) + + async def fetch_user_follower( + self, + user_id: str = "", + sec_user_id: str = "", + offset: int = 0, + count: int = 20, + source_type: int = 1, + min_time: int = 0, + max_time: int = 0, + max_counts: float = float("inf"), + ) -> AsyncGenerator[UserFollowerFilter, Any]: + """ + 用于获取指定用户的粉丝列表。 + + Args: + user_id: str: 用户ID + sec_user_id: str: 用户ID + offset: int: 起始页 + count: int: 每页粉丝数 + source_type: int: 排序类型 + min_time: int: 最小时间戳 + max_time: int: 最大时间戳 + Return: + follower: AsyncGenerator[UserFollowerFilter, Any]: 粉丝数据过滤器,包含用户ID列表、用户昵称、用户头像、起始页 + """ + + if not user_id and not sec_user_id: + raise ValueError(_("至少提供 user_id 或 sec_user_id 中的一个参数")) + + max_counts = max_counts or float("inf") + users_collected = 0 + + logger.info(_("开始爬取用户:{0} 的粉丝").format(sec_user_id)) + + while users_collected < max_counts: + current_request_size = min(count, max_counts - users_collected) + + logger.debug("===================================") + logger.debug( + _("最大数量:{0} 每次请求数量:{1}").format(count, current_request_size) + ) + + async with DouyinCrawler(self.kwargs) as crawler: + params = UserFollower( + offset=offset, + count=current_request_size, + user_id=user_id, + sec_user_id=sec_user_id, + source_type=source_type, + min_time=min_time, + max_time=max_time, + ) + response = await crawler.fetch_user_follower(params) + follower = UserFollowerFilter(response) + yield follower + + if not follower.has_more: + logger.info(_("用户:{0} 所有粉丝采集完毕").format(sec_user_id)) + break + + logger.info( + _("当前请求的offset:{0} max_time:{1}").format(offset, max_time) + ) + logger.info(_("爬取了 {0} 个粉丝用户").format(users_collected + 1)) + logger.debug( + _("用户ID:{0} 用户昵称:{1} 用户作品数:{2}").format( + follower.sec_uid, follower.nickname, follower.aweme_count + ) + ) + logger.debug("===================================") + + # 更新已经处理的用户数量 (Update the number of users processed) + users_collected += len(follower.sec_uid) + offset = follower.offset + + # 更新最大(最早)时间戳,避免重复返回相同的用户 + max_time = follower.min_time + + # 避免请求过于频繁 + logger.info(_("等待 {0} 秒后继续").format(self.kwargs.get("timeout", 5))) + await asyncio.sleep(self.kwargs.get("timeout", 5)) - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.info(_("爬取结束,共爬取 {0} 个用户").format(users_collected)) async def handle_sso_login(): diff --git a/f2/apps/douyin/help.py b/f2/apps/douyin/help.py index f225451..8929832 100644 --- a/f2/apps/douyin/help.py +++ b/f2/apps/douyin/help.py @@ -24,20 +24,20 @@ def help() -> None: "根据模式提供相应的链接。例如:主页、点赞、收藏作品填入主页链接,单作品填入作品链接,合辑与直播同上" ), ), - ("-m --music", "[dark_cyan]Choice", _("是否保存视频原声。可选:'yes'、'no'")), - ("-v --cover", "[dark_cyan]Choice", _("是否保存视频封面。可选:'yes'、'no'")), - ("-d --desc", "[dark_cyan]Choice", _("是否保存视频文案。可选:'yes'、'no'")), + ("-m --music", "[dark_cyan]Bool", _("是否保存视频原声")), + ("-v --cover", "[dark_cyan]Bool", _("是否保存视频封面")), + ("-d --desc", "[dark_cyan]Bool", _("是否保存视频文案")), ("-p --path", "[dark_cyan]str", _("作品保存位置,支持绝对与相对路径。")), ( "-f --folderize", - "[dark_cyan]Choice", - _("是否将作品保存到单独的文件夹。可选:'yes'、'no'"), + "[dark_cyan]Bool", + _("是否将作品保存到单独的文件夹"), ), ( "-M --mode", "[dark_cyan]Choice", _( - "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collect),合辑(mix),直播(live)" + "下载模式:单个作品(one),主页作品(post),点赞作品(like),收藏作品(collection),收藏夹作品(collects),合辑(mix),直播(live)" ), ), ( @@ -59,15 +59,15 @@ def help() -> None: "下载日期区间发布的作品,格式:2022-01-01|2023-01-01,'all' 为下载所有作品" ), ), - ("-e --timeout", "[dark_cyan]int", _("网络请求超时时间。")), - ("-r --max-retries", "[dark_cyan]int", _("网络请求超时重试数。")), - ("-x --max-connections", "[dark_cyan]int", _("网络请求并发连接数。")), - ("-t --max-tasks", "[dark_cyan]int", _("异步的任务数。")), + ("-e --timeout", "[dark_cyan]int", _("网络请求超时时间")), + ("-r --max-retries", "[dark_cyan]int", _("网络请求超时重试数")), + ("-x --max-connections", "[dark_cyan]int", _("网络请求并发连接数")), + ("-t --max-tasks", "[dark_cyan]int", _("异步的任务数")), ("-o --max-counts", "[dark_cyan]int", _("最大作品下载数。0 表示无限制")), ( "-s --page-counts", "[dark_cyan]int", - _("从接口每页可获取作品数,不建议超过20。"), + _("从接口每页可获取作品数,不建议超过20"), ), ( "-l --languages", @@ -81,6 +81,7 @@ def help() -> None: "代理服务器,最多 2 个参数,http与https。空格区分 2 个参数 http://x.x.x.x https://x.x.x.x" ), ), + ("-L --lyric", "[dark_cyan]Bool", _("是否保存视频歌词")), ( "--update-config", "[dark_cyan]Flag", @@ -95,7 +96,7 @@ def help() -> None: "--auto-cookie", "[dark_cyan]Choice", _( - "自动从浏览器获取[yellow]cookie[/yellow]。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取[yellow]cookie[/yellow],使用该命令前请确保关闭所选的浏览器" ), ), ( diff --git a/f2/apps/douyin/model.py b/f2/apps/douyin/model.py index da02197..8714a99 100644 --- a/f2/apps/douyin/model.py +++ b/f2/apps/douyin/model.py @@ -12,18 +12,18 @@ class BaseRequestModel(BaseModel): aid: str = "6383" channel: str = "channel_pc_web" pc_client_type: int = 1 - version_code: str = "170400" - version_name: str = "17.4.0" + version_code: str = "190500" + version_name: str = "19.5.0" cookie_enabled: str = "true" screen_width: int = 1920 screen_height: int = 1080 browser_language: str = "zh-CN" browser_platform: str = "Win32" browser_name: str = "Edge" - browser_version: str = "117.0.2045.47" + browser_version: str = "122.0.0.0" browser_online: str = "true" engine_name: str = "Blink" - engine_version: str = "117.0.0.0" + engine_version: str = "122.0.0.0" os_name: str = "Windows" os_version: str = "10" cpu_core_num: int = 12 @@ -63,6 +63,7 @@ class BaseLiveModel2(BaseModel): app_id: str = "1128" msToken: str = TokenManager.gen_real_msToken() + class BaseLoginModel(BaseModel): service: str = "https://www.douyin.com" need_logo: str = "false" @@ -91,12 +92,31 @@ class UserLike(BaseRequestModel): sec_user_id: str -class UserCollect(BaseRequestModel): +class UserCollection(BaseRequestModel): # POST cursor: int count: int +class UserCollects(BaseRequestModel): + # GET + cursor: int + count: int + + +class UserCollectsVideo(BaseRequestModel): + # GET + cursor: int + count: int + collects_id: str + + +class UserMusicCollection(BaseRequestModel): + # GET + cursor: int + count: int + + class UserMix(BaseRequestModel): cursor: int count: int @@ -175,6 +195,7 @@ class UserLive(BaseLiveModel): web_rid: str room_id_str: str + class UserLive2(BaseLiveModel2): room_id: str @@ -210,8 +231,37 @@ class LoginGetQr(BaseLoginModel): fp: str = "" # msToken: str = TokenManager.gen_real_msToken() + class LoginCheckQr(BaseLoginModel): token: str = "" verifyFp: str = "" fp: str = "" - # msToken: str = TokenManager.gen_real_msToken() \ No newline at end of file + # msToken: str = TokenManager.gen_real_msToken() + + +class UserFollowing(BaseRequestModel): + user_id: str = "" + sec_user_id: str = "" + offset: int = 0 # 相当于cursor + min_time: int = 0 + max_time: int = 0 + count: int = 20 + # source_type = 1: 最近关注 需要指定max_time(s) 3: 最早关注 需要指定min_time(s) 4: 综合排序 + source_type: int = 4 + gps_access: int = 0 + address_book_access: int = 0 + is_top: int = 1 + + +class UserFollower(BaseRequestModel): + user_id: str + sec_user_id: str + offset: int = 0 # 相当于cursor 但只对source_type: = 2 有效,其他情况为 0 即可 + min_time: int = 0 + max_time: int = 0 + count: int = 20 + # source_type = 1: 最近关注 需要指定max_time(s) 2: 综合关注(意义不明) + source_type: int = 1 + gps_access: int = 0 + address_book_access: int = 0 + is_top: int = 1 diff --git a/f2/apps/douyin/test/test_lrc.py b/f2/apps/douyin/test/test_lrc.py new file mode 100644 index 0000000..3fa49c3 --- /dev/null +++ b/f2/apps/douyin/test/test_lrc.py @@ -0,0 +1,97 @@ +from f2.apps.douyin.utils import json_2_lrc + + +def test_gen_lrc_from_json(): + data = [ + {"text": "CB on the beat,ho", "timeId": "5.700"}, + {"text": "Wasted CTA lovees wasted", "timeId": "10.210"}, + {"text": "Wasted I'm on these drugsI feel wasted", "timeId": "12.760"}, + {"text": "Wasted get her off my mind when I'm wasted", "timeId": "15.350"}, + {"text": "Wasted I'm waste all my time when I'm wasted", "timeId": "17.740"}, + {"text": "Wasted CTA lovees wasted", "timeId": "20.790"}, + {"text": "Wasted I'm on these drugsI feel wasted", "timeId": "22.900"}, + {"text": "Wasted get her off my mind when I'm wasted", "timeId": "25.850"}, + {"text": "Wasted I'm waste all my time when I'm wasted", "timeId": "28.210"}, + {"text": "Wasted", "timeId": "30.830"}, + {"text": "Damn why is she so demonic", "timeId": "31.320"}, + {"text": "She medusa with a little pocahontas", "timeId": "33.510"}, + {"text": "She been lacin all my drugs or sosomethin", "timeId": "36.150"}, + { + "text": "Cause every time that we're together I'm unconscious", + "timeId": "38.560", + }, + {"text": "Hold upuhlet me be honest", "timeId": "41.100"}, + {"text": "I know l saw her put the percs in my chronic", "timeId": "43.760"}, + {"text": "Smokintil my eyes roll back like the omen", "timeId": "46.370"}, + {"text": "Just another funeral for hergod damn", "timeId": "48.370"}, + {"text": "Wasted CTA lovees wasted", "timeId": "61.320"}, + {"text": "Wasted I'm on these drugsI feel wasted", "timeId": "63.890"}, + {"text": "Wasted get her off my mind when I'm wasted", "timeId": "66.400"}, + {"text": "Wasted I'm waste all my time when I'm wasted", "timeId": "68.970"}, + {"text": "Wasted", "timeId": "71.160"}, + {"text": "She do cocaine in my basement", "timeId": "72.170"}, + {"text": "I'm a doctorsbut I'm runninout of patience", "timeId": "74.270"}, + {"text": "She told me that she tryna get closer to satan", "timeId": "76.760"}, + {"text": "She be talkin to him when she in the matrix", "timeId": "79.450"}, + { + "text": "Rockstarthat's our stylethere boys can't take it", + "timeId": "81.770", + }, + {"text": "Hatin but they're still tryna take our cadence", "timeId": "83.930"}, + {"text": "No basicbrand new rari when I'm racin", "timeId": "86.870"}, + { + "text": "Take itlet you roll my weedplease don't lace ityeah", + "timeId": "89.340", + }, + {"text": "That's a bum that you chasinayy", "timeId": "92.330"}, + {"text": "Foreign with meshe a dominatrix", "timeId": "95.220"}, + {"text": "I love that girls and I do like her body", "timeId": "97.270"}, + {"text": "I don't what the moneyI just want the molly", "timeId": "98.820"}, + { + "text": "That's what she say when she livesd in the valley", + "timeId": "100.160", + }, + {"text": "Lil boyI'm your fatherhakuna matata", "timeId": "101.380"}, + {"text": "I made that girl girls all of that top up", "timeId": "102.220"}, + { + "text": "Got dreadrs in my headused to pray for the lock up", + "timeId": "103.360", + }, + { + "text": "I htit from the back and my legs start to lock up", + "timeId": "104.850", + }, + {"text": "Jacuzzi thar bootyI gave that girl flakka", "timeId": "106.540"}, + {"text": "I'm talkinblue caps that keep tweakinmy chakra", "timeId": "107.520"}, + {"text": "Rose on my chainthere's no hint like no copper", "timeId": "108.860"}, + {"text": "Take in the middle my head like I'm avatar", "timeId": "110.190"}, + {"text": "That's the reason that I ride on my appas", "timeId": "111.510"}, + {"text": "Wasted", "timeId": "112.710"}, + {"text": "WastedGTA lovees wasted", "timeId": "122.290"}, + {"text": "WastedI'm on these drugsI feel wasted", "timeId": "124.800"}, + {"text": "Wastedget her off my mind when I'm wasted", "timeId": "127.380"}, + {"text": "WastedI waste all my time when I'm wasted", "timeId": "130.120"}, + {"text": "My eyes closedhopinthis ain't makebelieve", "timeId": "132.850"}, + { + "text": "And she don't know hate all her demons like in me", + "timeId": "135.150", + }, + {"text": "L don't know l don't know", "timeId": "137.730"}, + {"text": "Don't know what she been onI don't know", "timeId": "143.870"}, + {"text": "All that lean l ain't have to let her in", "timeId": "146.470"}, + { + "text": "She ain't take my heart,but she took my medicine", + "timeId": "148.580", + }, + {"text": "Least somebody gon'take lthate to waste it", "timeId": "151.330"}, + {"text": "WastedGTA lovees wasted", "timeId": "152.980"}, + {"text": "WastedI'm on these drugsI feel wasted", "timeId": "155.610"}, + {"text": "Wastedget her off my mind when I'm wasted", "timeId": "158.070"}, + {"text": "WastedI waste all my time when I'm wasted", "timeId": "160.820"}, + ] + + print(json_2_lrc(data)) + + +if __name__ == "__main__": + test_gen_lrc_from_json() diff --git a/f2/apps/douyin/utils.py b/f2/apps/douyin/utils.py index 0ad9702..fad2ed8 100644 --- a/f2/apps/douyin/utils.py +++ b/f2/apps/douyin/utils.py @@ -67,40 +67,45 @@ def gen_real_msToken(cls) -> str: with httpx.Client(transport=transport, proxies=cls.proxies) as client: try: response = client.post( - cls.token_conf["url"], headers=headers, content=payload + cls.token_conf["url"], content=payload, headers=headers ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("由于某些错误, 无法获取msToken")) - elif response.status_code == 404: - raise APINotFoundError(_("无法找到API端点")) + response.raise_for_status() msToken = str(httpx.Cookies(response.cookies).get("msToken")) - if len(msToken) not in [120, 128]: - raise APIResponseError( - _( - "msToken: 请检查并更新 f2 中 conf.yaml 配置文件中的 msToken,以匹配 douyin 新规则。" - ) - ) + raise APIResponseError(_("{0} 内容不符合要求").format("msToken")) return msToken - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.token_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.token_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if e.response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("msToken", "douyin") + ) + + elif e.response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点").format("msToken")) + else: + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text + ) + ) except APIError as e: + # 返回虚假的msToken (Return a fake msToken) + logger.error(_("msToken API错误:{0}").format(e)) logger.info(_("生成虚假的msToken")) return cls.gen_false_msToken() @@ -122,28 +127,36 @@ def gen_ttwid(cls) -> str: response = client.post( cls.ttwid_conf["url"], content=cls.ttwid_conf["data"] ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("由于某些错误, 无法获取ttwid")) - elif response.status_code == 404: - raise APINotFoundError(_("无法找到API端点")) + response.raise_for_status() ttwid = str(httpx.Cookies(response.cookies).get("ttwid")) return ttwid - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if e.response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("ttwid", "douyin") + ) + + elif e.response.status_code == 404: + raise APINotFoundError(_("ttwid无法找到API端点")) + else: + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text + ) + ) class VerifyFpManager: @@ -184,23 +197,32 @@ def gen_s_v_web_id(cls) -> str: class XBogusManager: @classmethod - def str_2_endpoint(cls, endpoint: str) -> str: + def str_2_endpoint( + cls, + user_agent: str, + endpoint: str, + ) -> str: try: - final_endpoint = XB().getXBogus(endpoint) + final_endpoint = XB(user_agent).getXBogus(endpoint) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) return final_endpoint[0] @classmethod - def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: + def model_2_endpoint( + cls, + user_agent: str, + base_endpoint: str, + params: dict, + ) -> str: if not isinstance(params, dict): raise TypeError(_("参数必须是字典类型")) param_str = "&".join([f"{k}={v}" for k, v in params.items()]) try: - xb_value = XB().getXBogus(param_str) + xb_value = XB(user_agent).getXBogus(param_str) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) @@ -237,7 +259,7 @@ async def get_sec_user_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) pattern = ( @@ -252,7 +274,7 @@ async def get_sec_user_id(cls, url: str) -> str: transport=transport, proxies=TokenManager.proxies, timeout=10 ) as client: response = await client.get(url, follow_redirects=True) - + # 444一般为Nginx拦截,不返回状态 (444 is generally intercepted by Nginx and does not return status) if response.status_code in {200, 444}: match = pattern.search(str(response.url)) if match: @@ -260,39 +282,34 @@ async def get_sec_user_id(cls, url: str) -> str: else: raise APIResponseError( _( - "未在响应的地址中找到sec_user_id, 检查链接是否为用户主页类名: {0}".format( - cls.__name__ - ) - ) + "未在响应的地址中找到sec_user_id,检查链接是否为用户主页类名:{0}" + ).format(cls.__name__) ) elif response.status_code == 401: raise APIUnauthorizedError( - _("未授权的请求。类名: {0}".format(cls.__name__)) + _("未授权的请求。类名:{0}").format(cls.__name__) ) elif response.status_code == 404: raise APINotFoundError( - _("未找到API端点。类名: {0}".format(cls.__name__)) + _("未找到API端点。类名:{0}").format(cls.__name__) ) elif response.status_code == 503: raise APIUnavailableError( - _("API服务不可用。类名: {0}".format(cls.__name__)) + _("API服务不可用。类名:{0}").format(cls.__name__) ) else: - raise APIError( - _("API错误码:{0}。类名: {1}").format( - response.status_code, cls.__name__ + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + response.url, response.status_code, response.text ) ) - except httpx.RequestError: + except httpx.RequestError as exc: raise APIConnectionError( _( - "连接到API时发生错误,请检查URL或网络情况。类名: {0}".format( - cls.__name__ - ) - ), - url, + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) @classmethod @@ -316,7 +333,7 @@ async def get_all_sec_user_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -349,7 +366,7 @@ async def get_aweme_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) # 重定向到完整链接 @@ -359,6 +376,7 @@ async def get_aweme_id(cls, url: str) -> str: ) as client: try: response = await client.get(url, follow_redirects=True) + response.raise_for_status() video_pattern = cls._DOUYIN_VIDEO_URL_PATTERN note_pattern = cls._DOUYIN_NOTE_URL_PATTERN @@ -372,18 +390,22 @@ async def get_aweme_id(cls, url: str) -> str: aweme_id = match.group(1) else: raise APIResponseError( - _("未在响应的地址中找到aweme_id, 检查链接是否为作品页") + _("未在响应的地址中找到aweme_id,检查链接是否为作品页") ) return aweme_id - except httpx.RequestError: + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format( - url, - TokenManager.proxies, - cls.__name__, + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) + ) + + except httpx.HTTPStatusError as e: + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text ) ) @@ -408,7 +430,7 @@ async def get_all_aweme_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -417,6 +439,7 @@ async def get_all_aweme_id(cls, urls: list) -> list: class MixIdFetcher: + # 获取方法同AwemeIdFetcher @classmethod async def get_mix_id(cls, url: str) -> str: return @@ -427,7 +450,7 @@ class WebCastIdFetcher: _DOUYIN_LIVE_URL_PATTERN = re.compile(r"live/([^/?]*)") # https://live.douyin.com/766545142636?cover_type=0&enter_from_merge=web_live&enter_method=web_card&game_name=&is_recommend=1&live_type=game&more_detail=&request_id=20231110224012D47CD00C18B4AE4BFF9B&room_id=7299828646049827596&stream_type=vertical&title_type=1&web_live_page=hot_live&web_live_tab=all # https://live.douyin.com/766545142636 - _DOUYIN_LIVE_URL_PATTERN2 = re.compile(r"https://live.douyin.com/(\d+)") + _DOUYIN_LIVE_URL_PATTERN2 = re.compile(r"http[s]?://live.douyin.com/(\d+)") # https://webcast.amemv.com/douyin/webcast/reflow/7318296342189919011?u_code=l1j9bkbd&did=MS4wLjABAAAAEs86TBQPNwAo-RGrcxWyCdwKhI66AK3Pqf3ieo6HaxI&iid=MS4wLjABAAAA0ptpM-zzoliLEeyvWOCUt-_dQza4uSjlIvbtIazXnCY&with_sec_did=1&use_link_command=1&ecom_share_track_params=&extra_params={"from_request_id":"20231230162057EC005772A8EAA0199906","im_channel_invite_id":"0"}&user_id=3644207898042206&liveId=7318296342189919011&from=share&style=share&enter_method=click_share&roomId=7318296342189919011&activity_info={} _DOUYIN_LIVE_URL_PATTERN3 = re.compile(r"reflow/([^/?]*)") @@ -451,39 +474,55 @@ async def get_webcast_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) + try: + # 重定向到完整链接 + transport = httpx.AsyncHTTPTransport(retries=5) + async with httpx.AsyncClient( + transport=transport, proxies=TokenManager.proxies, timeout=10 + ) as client: + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + url = str(response.url) + + live_pattern = cls._DOUYIN_LIVE_URL_PATTERN + live_pattern2 = cls._DOUYIN_LIVE_URL_PATTERN2 + live_pattern3 = cls._DOUYIN_LIVE_URL_PATTERN3 + + if live_pattern.search(url): + match = live_pattern.search(url) + elif live_pattern2.search(url): + match = live_pattern2.search(url) + elif live_pattern3.search(url): + match = live_pattern3.search(url) + logger.warning( + _( + "该链接返回的是room_id,请使用`fetch_user_live_videos_by_room_id`接口" + ) + ) + else: + raise APIResponseError( + _("未在响应的地址中找到webcast_id,检查链接是否为直播页") + ) - # 重定向到完整链接 - transport = httpx.AsyncHTTPTransport(retries=5) - async with httpx.AsyncClient( - transport=transport, proxies=TokenManager.proxies, timeout=10 - ) as client: - response = await client.get(url, follow_redirects=True) - url = str(response.url) - - live_pattern = cls._DOUYIN_LIVE_URL_PATTERN - live_pattern2 = cls._DOUYIN_LIVE_URL_PATTERN2 - live_pattern3 = cls._DOUYIN_LIVE_URL_PATTERN3 - - if live_pattern.search(url): - match = live_pattern.search(url) - elif live_pattern2.search(url): - match = live_pattern2.search(url) - elif live_pattern3.search(url): - match = live_pattern3.search(url) - logger.debug( + return match.group(1) + + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) + raise APIConnectionError( _( - "该链接返回的是room_id,请使用`fetch_user_live_videos_by_room_id`接口" - ) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) - else: + + except httpx.HTTPStatusError as e: raise APIResponseError( - _("未在响应的地址中找到webcast_id, 检查链接是否为直播页") + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text + ) ) - return match.group(1) - @classmethod async def get_all_webcast_id(cls, urls: list) -> list: """ @@ -505,7 +544,7 @@ async def get_all_webcast_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -564,7 +603,7 @@ def format_file_name( try: return naming_template.format(**fields) except KeyError as e: - raise KeyError(_("文件名模板字段 {0} 不存在,请检查".format(e))) + raise KeyError(_("文件名模板字段 {0} 不存在,请检查").format(e)) def create_user_folder(kwargs: dict, nickname: Union[str, int]) -> Path: @@ -656,11 +695,12 @@ def create_or_rename_user_folder( def show_qrcode(qrcode_url: str, show_image: bool = False) -> None: """ - 显示二维码 + 显示二维码 (Show QR code) Args: - qrcode_url (str): 登录二维码链接 + qrcode_url (str): 登录二维码链接 (Login QR code link) show_image (bool): 是否显示图像,True 表示显示,False 表示在控制台显示 + (Whether to display the image, True means display, False means display in the console) """ if show_image: # 创建并显示QR码图像 @@ -673,3 +713,33 @@ def show_qrcode(qrcode_url: str, show_image: bool = False) -> None: qr.make(fit=True) # 在控制台以 ASCII 形式打印二维码 qr.print_ascii(invert=True) + + +def json_2_lrc(data: Union[str, list, dict]) -> str: + """ + 从抖音原声json格式歌词生成lrc格式歌词 + (Generate lrc lyrics format from Douyin original json lyrics format) + + Args: + data (Union[str, list, dict]): 抖音原声json格式歌词 (Douyin original json lyrics format) + + Returns: + str: 生成的lrc格式歌词 (Generated lrc format lyrics) + """ + try: + lrc_lines = [] + for item in data: + text = item["text"] + time_seconds = float(item["timeId"]) + minutes = int(time_seconds // 60) + seconds = int(time_seconds % 60) + milliseconds = int((time_seconds % 1) * 1000) + time_str = f"{minutes:02}:{seconds:02}.{milliseconds:03}" + lrc_lines.append(f"[{time_str}] {text}") + except KeyError as e: + raise KeyError(_("歌词数据字段错误:{0}").format(e)) + except RuntimeError as e: + raise RuntimeError(_("生成歌词文件失败:{0},请检查歌词 `data` 内容").format(e)) + except TypeError as e: + raise TypeError(_("歌词数据类型错误:{0}").format(e)) + return "\n".join(lrc_lines) diff --git a/f2/apps/tiktok/__init__.py b/f2/apps/tiktok/__init__.py deleted file mode 100644 index 256d3a8..0000000 --- a/f2/apps/tiktok/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# path: f2/apps/tiktok/__init__.py - -from f2.apps.tiktok.help import help diff --git a/f2/apps/tiktok/cli.py b/f2/apps/tiktok/cli.py index 8122a6e..35b1ee3 100644 --- a/f2/apps/tiktok/cli.py +++ b/f2/apps/tiktok/cli.py @@ -3,19 +3,24 @@ import f2 import click import typing -import browser_cookie3 from pathlib import Path from f2 import helps from f2.cli.cli_commands import set_cli_config from f2.log.logger import logger -from f2.utils.utils import split_dict_cookie, get_resource_path +from f2.utils.utils import ( + split_dict_cookie, + get_resource_path, + get_cookie_from_browser, + check_invalid_naming, + merge_config, +) from f2.utils.conf_manager import ConfigManager from f2.i18n.translator import TranslationManager, _ -def handle_help( +def handler_help( ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, @@ -50,61 +55,28 @@ def handler_auto_cookie( param: 提供的参数或选项 (The provided parameter or option) value: 参数或选项的值 (The value of the parameter or option) """ - if not value or ctx.resilient_parsing: - return - - # 如果用户明确设置了 --cookie,那么跳过自动获取过程 - if ctx.params.get("cookie"): + # 如果用户没有提供值或者设置了 resilient_parsing 或者设置了 --cookie,那么跳过自动获取过程 + if not value or ctx.resilient_parsing or ctx.params.get("cookie"): return # 根据浏览器选择获取cookie - if value in ["chrome", "firefox", "edge", "opera"]: - try: - cookie_value = split_dict_cookie(get_cookie_from_browser(value)) - manager = ConfigManager(ctx.params.get("config", "conf/app.yaml")) - manager.update_config_with_args("tiktok", cookie=cookie_value) - except PermissionError: - message = _("请关闭所有已打开的浏览器重试, 并且你有适当的权限访问浏览器 !") - logger.error(message) - click.echo(message) - ctx.abort() - except Exception as e: - message = _("自动获取Cookie失败: {0}".format(str(e))) - logger.error(message) - click.echo(message) - ctx.abort() - - -def get_cookie_from_browser(browser_choice: str): - """ - 根据用户选择的浏览器获取tiktok.com的cookie。 + try: + cookie_value = split_dict_cookie(get_cookie_from_browser(value, "tiktok.com")) - Args: - browser_choice (str): 用户选择的浏览器名称 + if not cookie_value: + raise ValueError(_("无法从 {0} 浏览器中获取cookie").format(value)) - Returns: - str: *.tiktok.com的cookie值 - """ - - BROWSER_FUNCTIONS = { - "chrome": browser_cookie3.chrome, - "firefox": browser_cookie3.firefox, - "edge": browser_cookie3.edge, - "opera": browser_cookie3.opera, - } - cj_function = BROWSER_FUNCTIONS.get(browser_choice) - if not cj_function: - raise ValueError(_("不支持的浏览器选项, 输入f2 dy --help查看更多帮助!")) - - cj = cj_function(domain_name="tiktok.com") - - # cookie_value = next((c.value for c in cj if c.name == 'ttwid'), None) - cookie_value = {c.name: c.value for c in cj if c.domain.endswith("tiktok.com")} - - if not cookie_value: - raise ValueError(_("无法从{0}浏览器中获取cookie").format(browser_choice)) - - return cookie_value + # 如果没有提供配置文件,那么使用高频配置文件 + manager = ConfigManager( + ctx.params.get("config", get_resource_path(f2.APP_CONFIG_FILE_PATH)) + ) + manager.update_config_with_args("tiktok", cookie=cookie_value) + except PermissionError: + logger.error(_("请关闭所有已打开的浏览器重试,并且你有适当的权限访问浏览器!")) + ctx.abort() + except Exception as e: + logger.error(_("自动获取Cookie失败:{0}").format(str(e))) + ctx.abort() def handler_language( @@ -112,8 +84,16 @@ def handler_language( param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> typing.Any: - """用于设置语言 (For setting the language)""" + """用于设置语言 (For setting the language) + Args: + ctx: click的上下文对象 (Click's context object) + param: 提供的参数或选项 (The provided parameter or option) + value: 参数或选项的值 (The value of the parameter or option) + """ + + if not value or ctx.resilient_parsing: + return TranslationManager.get_instance().set_language(value) global _ _ = TranslationManager.get_instance().gettext @@ -139,77 +119,26 @@ def handler_naming( value: 命名模式模板 (Naming pattern template) """ # 避免和配置文件参数冲突 - if value is None: + if not value or ctx.resilient_parsing: return # 允许的模式和分隔符 ALLOWED_PATTERNS = ["{nickname}", "{create}", "{aweme_id}", "{desc}", "{uid}"] ALLOWED_SEPARATORS = ["-", "_"] - temp_naming = value - invalid_patterns = [] - - # 检查提供的模式是否有效 - for pattern in ALLOWED_PATTERNS: - if pattern in temp_naming: - temp_naming = temp_naming.replace(pattern, "") - - # 此时,temp_naming应只包含分隔符 - for char in temp_naming: - if char not in ALLOWED_SEPARATORS: - invalid_patterns.append(char) - - # 检查连续的无效模式或分隔符 - for pattern in ALLOWED_PATTERNS: - # 检查像"{aweme_id}{aweme_id}"这样的模式 - if pattern + pattern in value: - invalid_patterns.append(pattern + pattern) - for sep in ALLOWED_SEPARATORS: - # 检查像"{aweme_id}-{aweme_id}"这样的模式 - if pattern + sep + pattern in value: - invalid_patterns.append(pattern + sep + pattern) + # 检查命名是否符合命名规范 + invalid_patterns = check_invalid_naming(value, ALLOWED_PATTERNS, ALLOWED_SEPARATORS) if invalid_patterns: raise click.BadParameter( - _( - "`{0}` 中的 `{1}` 不符合命名模式".format( - value, "".join(invalid_patterns) - ) + _("`{0}` 中的 `{1}` 不符合命名模式").format( + value, "".join(invalid_patterns) ) ) return value -def merge_config(main_conf, custom_conf, **kwargs): - """ - 合并配置参数,使 CLI 参数优先级高于自定义配置,自定义配置优先级高于主配置,最终生成完整配置参数字典。 - Args: - main_conf (dict): 主配置参数字典 - custom_conf (dict): 自定义配置参数字典 - **kwargs: CLI 参数和其他额外的配置参数 - - Returns: - dict: 合并后的配置参数字典 - """ - # 合并主配置和自定义配置 - merged_conf = {} - for key, value in main_conf.items(): - merged_conf[key] = value # 将主配置复制到合并后的配置中 - for key, value in custom_conf.items(): - if value is not None and value != "": # 只有值不为 None 和 空值,才进行合并 - merged_conf[key] = value # 自定义配置参数会覆盖主配置中的同名参数 - - # 合并 CLI 参数与合并后的配置,确保 CLI 参数的优先级最高 - for key, value in kwargs.items(): - if key not in merged_conf: # 如果合并后的配置中没有这个键,则直接添加 - merged_conf[key] = value - elif value is not None and value != "": # 如果值不为 None 和 空值,则进行合并 - merged_conf[key] = value # CLI 参数会覆盖自定义配置和主配置中的同名参数 - - return merged_conf - - @click.command(name="tiktok", help=_("TikTok无水印解析")) @click.option( "--config", @@ -231,40 +160,40 @@ def merge_config(main_conf, custom_conf, **kwargs): "-m", type=bool, # default="yes", - help=_("是否保存视频原声。可选:'yes'、'no'"), + help=_("是否保存视频原声"), ) @click.option( "--cover", "-v", type=bool, # default="yes", - help=_("是否保存视频封面。可选:'yes'、'no'"), + help=_("是否保存视频封面"), ) @click.option( "--desc", "-d", type=bool, # default="yes", - help=_("是否保存视频文案。可选:'yes'、'no'"), + help=_("是否保存视频文案"), ) @click.option( "--path", "-p", type=str, # default="Download", - help=_("作品保存位置,支持绝对与相对路径。"), + help=_("作品保存位置,支持绝对与相对路径"), ) @click.option( "--folderize", "-f", type=bool, # default="yes", - help=_("是否将作品保存到单独的文件夹。可选:'yes'、'no'"), + help=_("是否将作品保存到单独的文件夹"), ) @click.option( "--mode", "-M", - type=click.Choice(["one", "post", "like", "collect", "mix"]), + type=click.Choice(f2.TIKTOK_MODE_LIST), # default="post", # required=True, help=_( @@ -367,11 +296,9 @@ def merge_config(main_conf, custom_conf, **kwargs): # @click.confirmation_option(prompt='是否要使用命令行的参数更新配置文件?') @click.option( "--auto-cookie", - type=click.Choice(["none", "chrome", "firefox", "edge", "opera"]), + type=click.Choice(f2.BROWSER_LIST), # default="none", - help=_( - "自动从浏览器获取cookie。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" - ), + help=_("自动从浏览器获取cookie,使用该命令前请确保关闭所选的浏览器"), callback=handler_auto_cookie, ) @click.option( @@ -379,11 +306,17 @@ def merge_config(main_conf, custom_conf, **kwargs): is_flag=True, is_eager=True, expose_value=False, - help="显示富文本帮助", - callback=handle_help, + help=_("显示富文本帮助"), + callback=handler_help, ) @click.pass_context -def tiktok(ctx, config, init_config, update_config, **kwargs): +def tiktok( + ctx: click.Context, + config: str, + init_config: str, + update_config: bool, + **kwargs, +) -> None: ################## # f2 存在2个主配置文件,分别是app低频配置(app.yaml)和f2低频配置(conf.yaml) # app低频配置存放app相关的参数 @@ -453,16 +386,16 @@ def tiktok(ctx, config, init_config, update_config, **kwargs): # 从低频配置开始到高频配置再到cli参数,逐级覆盖,如果键值不存在使用父级的键值 kwargs = merge_config(main_conf, custom_conf, **kwargs) - logger.info(_("主配置路径: {0}".format(main_conf_path))) - logger.info(_("自定义配置路径: {0}".format(Path.cwd() / config))) - logger.debug(_("主配置参数:{0}".format(main_conf))) - logger.debug(_("自定义配置参数:{0}".format(custom_conf))) - logger.debug(_("CLI参数:{0}".format(kwargs))) + logger.info(_("主配置路径:{0}").format(main_conf_path)) + logger.info(_("自定义配置路径:{0}").format(Path.cwd() / config)) + logger.debug(_("主配置参数:{0}").format(main_conf)) + logger.debug(_("自定义配置参数:{0}").format(custom_conf)) + logger.debug(_("CLI参数:{0}").format(kwargs)) # 尝试从命令行参数或kwargs中获取URL if not kwargs.get("url"): logger.error("缺乏URL参数,详情看命令帮助") - handle_help(ctx, None, True) + handler_help(ctx, None, True) # 添加app_name到kwargs kwargs["app_name"] = "tiktok" diff --git a/f2/apps/tiktok/crawler.py b/f2/apps/tiktok/crawler.py index 1e84c96..cb65c29 100644 --- a/f2/apps/tiktok/crawler.py +++ b/f2/apps/tiktok/crawler.py @@ -21,7 +21,10 @@ class TiktokCrawler(BaseCrawler): - def __init__(self, kwargs: dict = {}): + def __init__( + self, + kwargs: dict = ..., + ): f2_manager = ConfigManager(f2.F2_CONFIG_FILE_PATH) f2_conf = f2_manager.get_config("f2").get("tiktok") proxies_conf = kwargs.get("proxies", {"http": None, "https": None}) @@ -40,65 +43,83 @@ def __init__(self, kwargs: dict = {}): async def fetch_user_profile(self, params: UserProfile): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_DETAIL, params.dict() - ) # fmt: off - logger.debug(_("用户信息接口地址:" + endpoint)) + self.headers.get("User-Agent"), + tkendpoint.USER_DETAIL, + params.dict(), + ) + logger.debug(_("用户信息接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_post(self, params: UserPost): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_POST, params.dict() - ) # fmt: off - logger.debug(_("主页作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + tkendpoint.USER_POST, + params.dict(), + ) + logger.debug(_("主页作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_like(self, params: UserLike): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_LIKE, params.dict() - ) # fmt: off - logger.debug(_("喜欢作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + tkendpoint.USER_LIKE, + params.dict(), + ) + logger.debug(_("喜欢作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_collect(self, params: UserCollect): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_COLLECT, params.dict() + self.headers.get("User-Agent"), + tkendpoint.USER_COLLECT, + params.dict(), ) - logger.debug(_("收藏作品接口地址:" + endpoint)) + logger.debug(_("收藏作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_play_list(self, params: UserPlayList): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_PLAY_LIST, params.dict() + self.headers.get("User-Agent"), + tkendpoint.USER_PLAY_LIST, + params.dict(), ) - logger.debug(_("合辑列表接口地址:" + endpoint)) + logger.debug(_("合辑列表接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_user_mix(self, params: UserMix): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.USER_MIX, params.dict() - ) # fmt: off - logger.debug(_("合辑作品接口地址:" + endpoint)) + self.headers.get("User-Agent"), + tkendpoint.USER_MIX, + params.dict(), + ) + logger.debug(_("合辑作品接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_detail(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.AWEME_DETAIL, params.dict() + self.headers.get("User-Agent"), + tkendpoint.AWEME_DETAIL, + params.dict(), ) - logger.debug(_("作品详情接口地址:" + endpoint)) + logger.debug(_("作品详情接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_comment(self, params: PostComment): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.POST_COMMENT, params.dict() + self.headers.get("User-Agent"), + tkendpoint.POST_COMMENT, + params.dict(), ) - logger.debug(_("作品评论接口地址:" + endpoint)) + logger.debug(_("作品评论接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def fetch_post_recommend(self, params: PostDetail): endpoint = XBogusManager.model_2_endpoint( - tkendpoint.HOME_RECOMMEND, params.dict() + self.headers.get("User-Agent"), + tkendpoint.HOME_RECOMMEND, + params.dict(), ) - logger.debug(_("首页推荐接口地址:" + endpoint)) + logger.debug(_("首页推荐接口地址:{0}").format(endpoint)) return await self._fetch_get_json(endpoint) async def __aenter__(self): diff --git a/f2/apps/tiktok/db.py b/f2/apps/tiktok/db.py index 3d28679..89690b3 100644 --- a/f2/apps/tiktok/db.py +++ b/f2/apps/tiktok/db.py @@ -1,6 +1,5 @@ # path: f2/apps/tiktok/db.py -import aiosqlite from f2.db.base_db import BaseDB @@ -24,6 +23,7 @@ async def _create_table(self) -> None: "videoCount INTEGER", "uid TEXT", "nickname TEXT", + "nickname_raw TEXT", "uniqueId TEXT", "commentSetting BOOLEAN", "followingVisibility BOOLEAN", @@ -32,6 +32,7 @@ async def _create_table(self) -> None: "showPlayListTab BOOLEAN", "relation BOOLEAN", "signature TEXT", + "signature_raw TEXT", "ttSeller BOOLEAN", "verified BOOLEAN", "last_aweme_id TEXT", diff --git a/f2/apps/tiktok/dl.py b/f2/apps/tiktok/dl.py index 3b7fe3e..8555967 100644 --- a/f2/apps/tiktok/dl.py +++ b/f2/apps/tiktok/dl.py @@ -17,8 +17,7 @@ def __init__(self, kwargs: dict = {}): if kwargs["cookie"] is None: raise ValueError( _( - "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取 f2 -d dy --help,如扫码登录请保留双引号cookie: " - ",再使用--sso-login命令。" + "cookie不能为空。请提供有效的 cookie 参数,或自动从浏览器获取。如 `--auto-cookie edge`" ) ) @@ -82,7 +81,9 @@ async def filter_aweme_datas_by_interval( ) return aweme_datas else: - logger.warning(_("作品发布时间不在指定区间内:{0}").format(aweme_date_str)) + logger.warning( + _("作品发布时间不在指定区间内:{0}").format(aweme_date_str) + ) return None elif isinstance(aweme_datas, list): @@ -159,17 +160,19 @@ async def handler_download( ) secUid = str(aweme_data_dict.get("secUid")) # 用户ID - aweme_privateItem = aweme_data_dict.get("privateItem") # 作品权限 false公开, true私密 + aweme_privateItem = aweme_data_dict.get( + "privateItem" + ) # 作品权限 false公开, true私密 aweme_secret = aweme_data_dict.get("secret") # 作品权限 false公开, true私密 aweme_id = str(aweme_data_dict.get("aweme_id")) # 视频ID logger.debug(f"========{aweme_id}========") logger.debug(aweme_data_dict) - logger.debug("================") + logger.debug("===================================") # 检查作品是否被屏蔽 if aweme_privateItem: - logger.warning(_("{0} 该作品已被屏蔽,无法下载").format(aweme_id)) + logger.warning(_("该 {0} 作品已被屏蔽,无法下载").format(aweme_id)) return # 检查作品是否可见 diff --git a/f2/apps/tiktok/filter.py b/f2/apps/tiktok/filter.py index 58192a7..1f68492 100644 --- a/f2/apps/tiktok/filter.py +++ b/f2/apps/tiktok/filter.py @@ -1,7 +1,5 @@ # path: f2/apps/tiktok/filter.py -from typing import List, Union - from f2.utils.json_filter import JSONModel from f2.utils.utils import _get_first_item_from_list, timestamp_2_str, replaceT @@ -45,6 +43,10 @@ def uid(self): def nickname(self): return replaceT(self._get_attr_value("$.userInfo.user.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.userInfo.user.nickname") + @property def secUid(self): return self._get_attr_value("$.userInfo.user.secUid") @@ -81,6 +83,10 @@ def relation(self) -> bool: # follow 1, no follow 0 def signature(self): return replaceT(self._get_attr_value("$.userInfo.user.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.userInfo.user.signature") + @property def ttSeller(self) -> bool: return bool(self._get_attr_value("$.userInfo.user.ttSeller")) @@ -89,6 +95,9 @@ def ttSeller(self) -> bool: def verified(self) -> bool: return bool(self._get_attr_value("$.userInfo.user.verified")) + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -132,6 +141,10 @@ def createTime(self): def desc(self): return replaceT(self._get_list_attr_value("$.itemList[*].desc")) + @property + def desc_raw(self): + return self._get_list_attr_value("$.itemList[*].desc") + @property def textExtra(self): return self._get_list_attr_value("$.itemList[*].textExtra") @@ -142,6 +155,10 @@ def textExtra(self): def nickname(self): return replaceT(self._get_list_attr_value("$.itemList[*].author.nickname")) + @property + def nickname_raw(self): + return self._get_list_attr_value("$.itemList[*].author.nickname") + @property def uid(self): return self._get_list_attr_value("$.itemList[*].author.id") @@ -221,6 +238,10 @@ def music_album(self): def music_authorName(self): return replaceT(self._get_list_attr_value("$.itemList[*].music.authorName")) + @property + def music_authorName_raw(self): + return self._get_list_attr_value("$.itemList[*].music.authorName") + @property def music_coverLarge(self): return self._get_list_attr_value("$.itemList[*].music.coverLarge") @@ -245,6 +266,10 @@ def music_playUrl(self): def music_title(self): return replaceT(self._get_list_attr_value("$.itemList[*].music.title")) + @property + def music_title_raw(self): + return self._get_list_attr_value("$.itemList[*].music.title") + # video @property def video_bitrate(self): @@ -266,11 +291,17 @@ def video_bitrate(self): def video_bitrateInfo(self): bit_rate_data = self._get_list_attr_value("$.itemList[*].video.bitrateInfo") return [ - [aweme.get("Bitrate", "")] # 使用 get 方法以处理字典中没有 "Bitrate" 键的情况 - if isinstance(aweme, dict) - else [aweme[0].get("Bitrate", "")] - if len(aweme) == 1 - else [item.get("Bitrate", "") for item in aweme] + ( + [ + aweme.get("Bitrate", "") + ] # 使用 get 方法以处理字典中没有 "Bitrate" 键的情况 + if isinstance(aweme, dict) + else ( + [aweme[0].get("Bitrate", "")] + if len(aweme) == 1 + else [item.get("Bitrate", "") for item in aweme] + ) + ) for aweme in bit_rate_data ] @@ -306,6 +337,9 @@ def video_height(self): def video_width(self): return self._get_list_attr_value("$.itemList[*].video.width") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -383,6 +417,9 @@ def mixName(self): def videoCount(self): return self._get_attr_value("$.playList[*].videoCount") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) @@ -417,6 +454,10 @@ def uid(self): def nickname(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.author.nickname")) + @property + def nickname_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.author.nickname") + @property def secUid(self): return self._get_attr_value("$.itemInfo.itemStruct.author.secUid") @@ -429,6 +470,10 @@ def uniqueId(self): def signature(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.author.signature")) + @property + def signature_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.author.signature") + @property def openFavorite(self): return self._get_attr_value("$.itemInfo.itemStruct.author.openFavorite") @@ -461,6 +506,10 @@ def createTime(self): def desc(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.desc")) + @property + def desc_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.desc") + @property def textExtra(self): return self._get_attr_value("$.itemInfo.itemStruct.textExtra") @@ -531,6 +580,10 @@ def videoSuggestWordsList(self): def music_authorName(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.music.authorName")) + @property + def music_authorName_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.music.authorName") + @property def music_coverLarge(self): return self._get_attr_value("$.itemInfo.itemStruct.music.coverLarge") @@ -555,6 +608,10 @@ def music_playUrl(self): def music_title(self): return replaceT(self._get_attr_value("$.itemInfo.itemStruct.music.title")) + @property + def music_title_raw(self): + return self._get_attr_value("$.itemInfo.itemStruct.music.title") + # video @property def video_bitrate(self): @@ -599,6 +656,9 @@ def video_height(self): def video_width(self): return self._get_attr_value("$.itemInfo.itemStruct.video.width") + def _to_raw(self) -> dict: + return self._data + def _to_dict(self) -> dict: return { prop_name: getattr(self, prop_name) diff --git a/f2/apps/tiktok/handler.py b/f2/apps/tiktok/handler.py index 8caf343..9cd3705 100644 --- a/f2/apps/tiktok/handler.py +++ b/f2/apps/tiktok/handler.py @@ -1,7 +1,9 @@ # path: f2/apps/tiktok/handler.py +import sys + from pathlib import Path -from typing import AsyncGenerator +from typing import AsyncGenerator, Union, List, Any from f2.i18n.translator import _ from f2.log.logger import logger @@ -31,6 +33,7 @@ create_or_rename_user_folder, ) from f2.cli.cli_console import RichConsoleManager +from f2.exceptions.api_exceptions import APIResponseError rich_console = RichConsoleManager().rich_console rich_prompt = RichConsoleManager().rich_prompt @@ -41,12 +44,14 @@ class TiktokHandler: # 需要忽略的字段(需过滤掉有时效性的字段) ignore_fields = ["video_play_addr", "images", "video_bit_rate", "cover"] - def __init__(self, kwargs) -> None: + def __init__(self, kwargs: dict = ...) -> None: self.kwargs = kwargs self.downloader = TiktokDownloader(kwargs) async def handler_user_profile( - self, secUid: str, uniqueId: str = "" + self, + secUid: str = "", + uniqueId: str = "", ) -> UserProfileFilter: """ 用于获取指定用户的个人信息 @@ -54,6 +59,7 @@ async def handler_user_profile( Args: secUid: str: 用户ID (User ID) + uniqueId: str: 用户唯一ID (User unique ID) Return: user: UserProfileFilter: 用户信息过滤器 (User info filter) @@ -63,17 +69,25 @@ async def handler_user_profile( raise ValueError(_("至少提供 secUid 或 uniqueId 中的一个参数")) async with TiktokCrawler(self.kwargs) as crawler: - params = UserProfile(region="SG", secUid=secUid, uniqueId=uniqueId) + params = UserProfile(secUid=secUid, uniqueId=uniqueId) response = await crawler.fetch_user_profile(params) + user = UserProfileFilter(response) + if user.nickname is None: + raise APIResponseError(_("API内容请求失败,请更换新cookie后再试")) return UserProfileFilter(response) - async def get_user_nickname(self, secUid: str, db: AsyncUserDB) -> str: + async def get_user_nickname( + self, + secUid: str, + db: AsyncUserDB, + ) -> str: """ 用于获取指定用户的昵称 (Used to get nickname of specified users) Args: secUid: str: 用户ID (User ID) + db: AsyncUserDB: 用户数据库 (User database) Return: nick_name: str: 用户昵称 (User nickname) @@ -85,7 +99,11 @@ async def get_user_nickname(self, secUid: str, db: AsyncUserDB) -> str: await db.add_user_info(**user_dict._to_dict()) return user_dict.get("nickname", "") - async def get_or_add_user_data(self, secUid: str, db: AsyncUserDB) -> Path: + async def get_or_add_user_data( + self, + secUid: str, + db: AsyncUserDB, + ) -> Path: """ 获取或创建用户数据同时创建用户目录 (Get or create user data and create user directory) @@ -121,7 +139,10 @@ async def get_or_add_user_data(self, secUid: str, db: AsyncUserDB) -> Path: @classmethod async def get_or_add_video_data( - cls, aweme_data: dict, db: AsyncVideoDB, ignore_fields: list = [] + cls, + aweme_data: dict, + db: AsyncVideoDB, + ignore_fields: list = None, ): """ 获取或创建作品数据同时创建用户目录 @@ -147,9 +168,9 @@ async def fetch_play_list( secUid: str, cursor: int, page_counts: int, - ) -> dict: + ) -> UserPlayListFilter: """ - 用于获取指定用户的视频合集列表 + 用于获取指定用户的作品合集列表 (Used to get video mix list of specified user) Args: @@ -158,10 +179,10 @@ async def fetch_play_list( page_counts: int: 分页数量 (Page counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + playlist: UserPlayListFilter: 作品合集列表 (Video mix list) """ - logger.debug(_("开始爬取用户: {0} 的视频合集列表").format(secUid)) + logger.debug(_("开始爬取用户:{0} 的作品合集列表").format(secUid)) async with TiktokCrawler(self.kwargs) as crawler: params = UserPlayList(secUid=secUid, cursor=cursor, count=page_counts) @@ -169,52 +190,66 @@ async def fetch_play_list( playlist = UserPlayListFilter(response) if not playlist.hasPlayList: - logger.debug(_("用户: {0} 没有视频合集").format(secUid)) + logger.info(_("用户:{0} 没有作品合集").format(secUid)) return {} - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频合集ID: {0} 视频合集标题: {1}").format( + _("作品合集ID:{0} 作品合集标题:{1}").format( playlist.mixId, playlist.mixName ) ) - logger.debug("=====================================") - return playlist._to_dict() + logger.debug("===================================") + return playlist - async def select_playlist(playlists: dict) -> int: + async def select_playlist( + self, playlists: Union[dict, UserPlayListFilter] + ) -> Union[str, List[str]]: """ - 用于选择要下载的视频合辑 + 用于选择要下载的作品合辑 (Used to select the video mix to download) Args: - playlists: dict: 视频合辑列表 (Video mix list) + playlists: Union[dict, UserPlayListFilter]: 作品合辑列表 (Video mix list) Return: - selected_index: str: 选择的视频合辑序号 (Selected video mix index) + selected_index: Union[str, List[str]]: 选择的作品合辑序号 (Selected video mix index) """ - rich_console.print("[bold]请选择要下载的合辑:[/bold]") + if playlists == {}: + sys.exit(_("用户没有作品合辑")) + + rich_console.print("[bold]请选择要下载的合辑:[/bold]") + rich_console.print("0: [bold]全部下载[/bold]") - for i, mix_id in enumerate(playlists.get("mixId", [])): - mix_name = playlists.get("mixName", [""])[i] - video_count = int(playlists.get("videoCount", [""])[i]) + for i in range(len(playlists.mixId)): rich_console.print( - f"[cyan]{i + 1}[/cyan]: {mix_name} ({video_count} videos)" + _("{0}: {1} (包含 {2} 个作品,收藏夹ID {3})").format( + i + 1, + playlists.mixName[i], + playlists.videoCount[i], + playlists.mixId[i], + ) ) - rich_console.print(f"[cyan]0[/cyan]: [bold]全部下载[/bold]") - - selected_index = rich_prompt.ask( - "[bold yellow]请输入希望下载的合辑序号:[/bold yellow]", - choices=[str(i) for i in range(len(playlists) + 1)], + # rich_prompt 会有字符刷新问题,暂时使用rich_print + rich_console.print(_("[bold yellow]请输入希望下载的合辑序号:[/bold yellow]")) + selected_index = int( + rich_prompt.ask( + # _("[bold yellow]请输入希望下载的合辑序号:[/bold yellow]"), + choices=[str(i) for i in range(len(playlists) + 1)], + ) ) - return int(selected_index) + if selected_index == 0: + return playlists.mixId + else: + return playlists.mixId[selected_index - 1] @mode_handler("one") async def handler_one_video(self): """ - 用于获取指定视频的信息 + 用于获取指定作品的信息 (Used to get video info of specified video) Args: @@ -226,48 +261,50 @@ async def handler_one_video(self): aweme_data = await self.fetch_one_video(aweme_id) async with AsyncUserDB("tiktok_users.db") as udb: - user_path = await self.get_or_add_user_data( - str(aweme_data.get("secUid")), udb - ) + user_path = await self.get_or_add_user_data(aweme_data.secUid, udb) async with AsyncVideoDB("tiktok_videos.db") as vdb: - await self.get_or_add_video_data(aweme_data, vdb) + await self.get_or_add_video_data( + aweme_data._to_dict(), vdb, self.ignore_fields + ) - logger.debug(_("单个视频数据: {0}".format(aweme_data))) + logger.debug(_("单个作品数据:{0}").format(aweme_data._to_dict())) # 创建下载任务 - await self.downloader.create_download_tasks(self.kwargs, aweme_data, user_path) + await self.downloader.create_download_tasks( + self.kwargs, aweme_data._to_dict(), user_path + ) - async def fetch_one_video(self, itemId: str) -> dict: + async def fetch_one_video(self, itemId: str) -> PostDetailFilter: """ - 用于获取指定视频的详细信息 + 用于获取指定作品的详细信息 (Used to get detailed information of specified video) Args: - itemId: str: 视频ID (Video ID) + itemId: str: 作品ID (Video ID) Return: - post: dict: 视频信息 (Video info) + video: PostDetailFilter: 作品信息过滤器 (Video info filter) """ - logger.debug(_("开始爬取视频: {0}").format(itemId)) + logger.debug(_("开始爬取作品:{0}").format(itemId)) async with TiktokCrawler(self.kwargs) as crawler: params = PostDetail(itemId=itemId) response = await crawler.fetch_post_detail(params) video = PostDetailFilter(response) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) - return video._to_dict() + return video @mode_handler("post") async def handler_user_post(self): """ - 用于获取指定用户的视频信息 + 用于获取指定用户的作品信息 (Used to get video info of specified user) Args: @@ -288,14 +325,14 @@ async def handler_user_post(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_post_videos( self, secUid: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户发布的视频列表 + 用于获取指定用户发布的作品列表 (Used to get video list of specified user) Args: @@ -305,18 +342,18 @@ async def fetch_user_post_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + video: AsyncGenerator[UserPostFilter, Any]: 用户发布作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 发布的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 发布的作品").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -330,34 +367,34 @@ async def fetch_user_post_videos( video = UserPostFilter(response) if not video.has_aweme: - logger.debug(_("{0} 页没有找到作品".format(cursor))) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕".format(secUid))) + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: cursor = video.cursor continue - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( video.aweme_id, video.desc, video.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - yield video._to_list() + yield video - # 更新已经处理的视频数量 (Update the number of videos processed) + # 更新已经处理的作品数量 (Update the number of videos processed) videos_collected += len(video.aweme_id) cursor = video.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("like") async def handler_user_like(self): """ - 用于获取指定用户的点赞视频信息 + 用于获取指定用户的点赞作品信息 (Used to get liked video info of specified user) Args: @@ -378,14 +415,14 @@ async def handler_user_like(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_like_videos( self, secUid: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户点赞的视频列表 + 用于获取指定用户点赞的作品列表 (Used to get liked video list of specified user) Args: @@ -395,20 +432,20 @@ async def fetch_user_like_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + like: AsyncGenerator[UserPostFilter, Any]: 用户点赞作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 点赞的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 点赞的作品").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -417,44 +454,43 @@ async def fetch_user_like_videos( async with TiktokCrawler(self.kwargs) as crawler: params = UserLike(secUid=secUid, cursor=cursor, count=page_counts) response = await crawler.fetch_user_like(params) - video = UserPostFilter(response) + like = UserPostFilter(response) - if video.has_aweme: - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + if like.has_aweme: + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( + like.aweme_id, like.desc, like.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield like - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(like.aweme_id) - if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + if not like.hasMore and str(like.api_status_code) == "0": + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) - if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + if not like.hasMore and str(like.api_status_code) == "0": + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - cursor = video.cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(like.aweme_id) + cursor = like.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("collect") async def handler_user_collect(self): """ - 用于获取指定用户的收藏视频信息 + 用于获取指定用户的收藏作品信息 (Used to get collected video info of specified user) Args: @@ -475,14 +511,14 @@ async def handler_user_collect(self): ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_collect_videos( self, secUid: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserPostFilter, Any]: """ - 用于获取指定用户收藏的视频列表 + 用于获取指定用户收藏的作品列表 (Used to get collected video list of specified user) Args: @@ -492,20 +528,20 @@ async def fetch_user_collect_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + collect: AsyncGenerator[UserPostFilter, Any]: 收藏作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 收藏的视频").format(secUid)) + logger.debug(_("开始爬取用户:{0} 收藏的作品").format(secUid)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( - _("最大数量: {0} 每次请求数量: {1}").format( + _("最大数量:{0} 每次请求数量:{1}").format( max_counts, current_request_size ) ) @@ -514,44 +550,43 @@ async def fetch_user_collect_videos( async with TiktokCrawler(self.kwargs) as crawler: params = UserCollect(secUid=secUid, cursor=cursor, count=page_counts) response = await crawler.fetch_user_collect(params) - video = UserPostFilter(response) + collect = UserPostFilter(response) - if video.has_aweme: - logger.debug(_("当前请求的cursor: {0}").format(cursor)) + if collect.has_aweme: + logger.debug(_("当前请求的cursor:{0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID:{0} 作品文案:{1} 作者:{2}").format( + collect.aweme_id, collect.desc, collect.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield collect - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(collect.aweme_id) - if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + if not collect.hasMore and str(collect.api_status_code) == "0": + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break else: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) - if not video.hasMore and str(video.api_status_code) == "0": - logger.debug(_("用户: {0} 所有作品采集完毕").format(secUid)) + if not collect.hasMore and str(collect.api_status_code) == "0": + logger.debug(_("用户:{0} 所有作品采集完毕").format(secUid)) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - cursor = video.cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(collect.aweme_id) + cursor = collect.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) @mode_handler("mix") async def handler_user_mix(self): """ - 用于获取指定用户的合集视频信息 + 用于获取指定用户的合集作品信息 (Used to get mix video info of specified user) Args: @@ -564,35 +599,28 @@ async def handler_user_mix(self): secUid = await SecUserIdFetcher.get_secuid(self.kwargs.get("url")) playlist = await self.fetch_play_list(secUid, cursor, page_counts) - selected_index = await self.select_playlist(playlist) + mixId = await self.select_playlist(playlist) async with AsyncUserDB("tiktok_users.db") as audb: user_path = await self.get_or_add_user_data(secUid, audb) - if selected_index == 0: - for mixId in playlist.get("mixId", []): - async for aweme_data_list in self.fetch_user_mix_videos( - mixId, cursor, page_counts, max_counts - ): - # 创建下载任务 - await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path - ) - else: - mixId = playlist.get("mixId", [])[selected_index - 1] + if isinstance(mixId, str): + mixId = [mixId] + + for mixId in playlist.get("mixId", []): async for aweme_data_list in self.fetch_user_mix_videos( mixId, cursor, page_counts, max_counts ): # 创建下载任务 await self.downloader.create_download_tasks( - self.kwargs, aweme_data_list, user_path + self.kwargs, aweme_data_list._to_list(), user_path ) async def fetch_user_mix_videos( self, mixId: str, cursor: int, page_counts: int, max_counts: float - ) -> AsyncGenerator: + ) -> AsyncGenerator[UserMixFilter, Any]: """ - 用于获取指定用户合集的视频列表 + 用于获取指定用户合集的作品列表 (Used to get mix video list of specified user) Args: @@ -602,18 +630,18 @@ async def fetch_user_mix_videos( max_counts: float: 最大数量 (Max counts) Return: - aweme_data: dict: 视频数据字典 (Video data dict) + mix: AsyncGenerator[UserMixFilter, Any]: 合集作品信息过滤器 (Video info filter) """ max_counts = max_counts or float("inf") videos_collected = 0 - logger.debug(_("开始爬取用户: {0} 合集的视频").format(mixId)) + logger.debug(_("开始爬取用户: {0} 合集的作品").format(mixId)) while videos_collected < max_counts: current_request_size = min(page_counts, max_counts - videos_collected) - logger.debug("=====================================") + logger.debug("===================================") logger.debug( _("最大数量: {0} 每次请求数量: {1}").format( max_counts, current_request_size @@ -624,39 +652,38 @@ async def fetch_user_mix_videos( async with TiktokCrawler(self.kwargs) as crawler: params = UserMix(mixId=mixId, cursor=cursor, count=page_counts) response = await crawler.fetch_user_mix(params) - video = UserMixFilter(response) + mix = UserMixFilter(response) - if video.has_aweme: + if mix.has_aweme: logger.debug(_("当前请求的cursor: {0}").format(cursor)) logger.debug( - _("视频ID: {0} 视频文案: {1} 作者: {2}").format( - video.aweme_id, video.desc, video.nickname + _("作品ID: {0} 作品文案: {1} 作者: {2}").format( + mix.aweme_id, mix.desc, mix.nickname ) ) - logger.debug("=====================================") + logger.debug("===================================") - aweme_data_list = video._to_list() - yield aweme_data_list + yield mix - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(mix.aweme_id) - if not video.hasMore and str(video.api_status_code) == "0": + if not mix.hasMore and str(mix.api_status_code) == "0": logger.debug(_("合辑: {0} 所有作品采集完毕").format(mixId)) break else: - logger.debug(_("{0} 页没有找到作品").format(cursor)) + logger.debug(_("第 {0} 页没有找到作品").format(cursor)) - if not video.hasMore and str(video.api_status_code) == "0": + if not mix.hasMore and str(mix.api_status_code) == "0": logger.debug(_("合辑: {0} 所有作品采集完毕").format(mixId)) break - # 更新已经处理的视频数量 (Update the number of videos processed) - videos_collected += len(video.aweme_id) - cursor = video.cursor + # 更新已经处理的作品数量 (Update the number of videos processed) + videos_collected += len(mix.aweme_id) + cursor = mix.cursor - logger.debug(_("爬取结束,共爬取{0}个视频").format(videos_collected)) + logger.debug(_("爬取结束,共爬取 {0} 个作品").format(videos_collected)) async def main(kwargs): diff --git a/f2/apps/tiktok/help.py b/f2/apps/tiktok/help.py index 664088c..c862894 100644 --- a/f2/apps/tiktok/help.py +++ b/f2/apps/tiktok/help.py @@ -99,7 +99,7 @@ def help() -> None: "--auto-cookie", "[dark_cyan]Choice", _( - "自动从浏览器获取[yellow]cookie[/yellow]。可选项:chrome、firefox、edge、opera。使用该命令前请确保关闭所选的浏览器" + "自动从浏览器获取[yellow]cookie[/yellow],使用该命令前请确保关闭所选的浏览器" ), ), ("--help", "[dark_cyan]Flag", _("显示经典帮助信息")), diff --git a/f2/apps/tiktok/utils.py b/f2/apps/tiktok/utils.py index f625b73..2688313 100644 --- a/f2/apps/tiktok/utils.py +++ b/f2/apps/tiktok/utils.py @@ -67,39 +67,44 @@ def gen_real_msToken(cls) -> str: response = client.post( cls.token_conf["url"], headers=headers, content=payload ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("由于某些错误, 无法获取msToken")) - elif response.status_code == 404: - raise APINotFoundError(_("无法找到API端点")) + response.raise_for_status() msToken = str(httpx.Cookies(response.cookies).get("msToken")) if len(msToken) not in [148]: - raise APIResponseError( - _( - "msToken: 请检查并更新 f2 中 conf.yaml 配置文件中的 msToken,以匹配 tiktok 新规则。" - ) - ) + raise APIResponseError(_("{0} 内容不符合要求").format("msToken")) return msToken - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.token_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.token_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("msToken", "tiktok") + ) + + elif response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点").format("msToken")) + else: + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text + ) + ) except APIError as e: # 返回虚假的msToken (Return a fake msToken) + logger.error(_("msToken API错误:{0}").format(e)) logger.info(_("生成虚假的msToken")) return cls.gen_false_msToken() @@ -118,17 +123,13 @@ def gen_ttwid(cls) -> str: try: response = client.post( cls.ttwid_conf["url"], + content=cls.ttwid_conf["data"], headers={ "Cookie": cls.ttwid_conf.get("cookie"), "Content-Type": "text/plain", }, - content=cls.ttwid_conf["data"], ) - - if response.status_code == 401: - raise APIUnauthorizedError(_("401 由于某些错误, 无法获取ttwid")) - elif response.status_code == 404: - raise APINotFoundError(_("404 无法找到API端点")) + response.raise_for_status() ttwid = httpx.Cookies(response.cookies).get("ttwid") @@ -139,19 +140,31 @@ def gen_ttwid(cls) -> str: return ttwid - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.ttwid_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("ttwid", "tiktok") + ) + + elif response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点").format("ttwid")) + else: + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text + ) + ) @classmethod def gen_odin_tt(cls): @@ -162,48 +175,63 @@ def gen_odin_tt(cls): with httpx.Client(transport=transport, proxies=cls.proxies) as client: try: response = client.get(cls.odin_tt_conf["url"]) - - if response.status_code == 401: - raise APIUnauthorizedError(_("401 由于某些错误, 无法获取ttwid")) - elif response.status_code == 404: - raise APINotFoundError(_("404 无法找到API端点")) + response.raise_for_status() odin_tt = httpx.Cookies(response.cookies).get("odin_tt") if odin_tt is None: - raise APIResponseError( - _("odin_tt: 检查没有通过, 请更新配置文件中的odin_tt") - ) + raise APIResponseError(_("{0} 内容不符合要求").format("odin_tt")) return odin_tt - except httpx.RequestError: + except httpx.RequestError as exc: # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(cls.odin_tt_conf["url"], cls.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(cls.odin_tt_conf["url"], cls.proxies, cls.__name__, exc) ) except httpx.HTTPStatusError as e: # 捕获 httpx 的状态代码错误 (captures specific status code errors from httpx) - raise APIResponseError( - f"HTTP Status Code {e.response.status_code}: {e.response.text}" - ) + if response.status_code == 401: + raise APIUnauthorizedError( + _( + "参数验证失败,请更新 F2 配置文件中的 {0},以匹配 {1} 新规则" + ).format("odin_tt", "tiktok") + ) + + elif response.status_code == 404: + raise APINotFoundError(_("{0} 无法找到API端点").format("odin_tt")) + else: + raise APIResponseError( + _("链接:{0},状态码 {1}:{2} ").format( + e.response.url, e.response.status_code, e.response.text + ) + ) class XBogusManager: @classmethod - def str_2_endpoint(cls, endpoint: str) -> str: + def str_2_endpoint( + cls, + user_agent: str, + endpoint: str, + ) -> str: try: - final_endpoint = XB().getXBogus(endpoint) + final_endpoint = XB(user_agent).getXBogus(endpoint) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) return final_endpoint[0] @classmethod - def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: + def model_2_endpoint( + cls, + user_agent: str, + base_endpoint: str, + params: dict, + ) -> str: # 检查params是否是一个字典 (Check if params is a dict) if not isinstance(params, dict): raise TypeError(_("参数必须是字典类型")) @@ -211,7 +239,7 @@ def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: param_str = "&".join([f"{k}={v}" for k, v in params.items()]) try: - xb_value = XB().getXBogus(param_str) + xb_value = XB(user_agent).getXBogus(param_str) except Exception as e: raise RuntimeError(_("生成X-Bogus失败: {0})").format(e)) @@ -224,6 +252,7 @@ def model_2_endpoint(cls, base_endpoint: str, params: dict) -> str: class SecUserIdFetcher: + # 预编译正则表达式 _TIKTOK_SECUID_PARREN = re.compile( r"" ) @@ -249,7 +278,7 @@ async def get_secuid(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) transport = httpx.AsyncHTTPTransport(retries=5) @@ -258,24 +287,21 @@ async def get_secuid(cls, url: str) -> str: ) as client: try: response = await client.get(url, follow_redirects=True) - + # 444一般为Nginx拦截,不返回状态 (444 is generally intercepted by Nginx and does not return status) if response.status_code in {200, 444}: if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): raise APINotFoundError( _( - "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}".format( - cls.__name__ - ) - ) + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}" + ).format(cls.__name__) ) + match = cls._TIKTOK_SECUID_PARREN.search(str(response.text)) if not match: raise APIResponseError( _( - "未在响应的地址中找到sec_uid, 检查链接是否为用户主页类名: {0}".format( - cls.__name__ - ) - ) + "未在响应中找到 {0},检查链接是否为用户主页。类名: {1}" + ).format("sec_uid", cls.__name__) ) # 提取SIGI_STATE对象中的sec_uid @@ -286,17 +312,20 @@ async def get_secuid(cls, url: str) -> str: sec_uid = user_info.get("secUid") if sec_uid is None: - raise RuntimeError(_("获取sec_uid失败, {0}".format(user_info))) + raise RuntimeError( + _("获取 {0} 失败,{1}").format(sec_uid, user_info) + ) return sec_uid else: raise ConnectionError(_("接口状态码异常, 请检查重试")) - except httpx.RequestError: + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format(url, TokenManager.proxies, cls.__name__) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) @classmethod @@ -320,7 +349,7 @@ async def get_all_secuid(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -346,7 +375,7 @@ async def get_uniqueid(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) transport = httpx.AsyncHTTPTransport(retries=5) @@ -357,15 +386,24 @@ async def get_uniqueid(cls, url: str) -> str: response = await client.get(url, follow_redirects=True) if response.status_code in {200, 444}: + if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): + raise APINotFoundError( + _( + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}" + ).format(cls.__name__) + ) + match = cls._TIKTOK_UNIQUEID_PARREN.search(str(response.url)) if not match: - raise APIResponseError(_("未在响应中找到unique_id")) + raise APIResponseError( + _("未在响应中找到 {0}").format("unique_id") + ) unique_id = match.group(1) if unique_id is None: raise RuntimeError( - _("获取unique_id失败, {0}".format(response.url)) + _("获取 {0} 失败,{1}").format("unique_id", response.url) ) return unique_id @@ -402,7 +440,7 @@ async def get_all_uniqueid(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -416,6 +454,7 @@ class AwemeIdFetcher: # 预编译正则表达式 _TIKTOK_AWEMEID_PARREN = re.compile(r"video/(\d*)") + _TIKTOK_NOTFOUND_PARREN = re.compile(r"notfound") @classmethod async def get_aweme_id(cls, url: str) -> str: @@ -436,7 +475,7 @@ async def get_aweme_id(cls, url: str) -> str: if url is None: raise ( - APINotFoundError(_("输入的URL不合法。类名:{0}".format(cls.__name__))) + APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__)) ) transport = httpx.AsyncHTTPTransport(retries=5) @@ -447,32 +486,38 @@ async def get_aweme_id(cls, url: str) -> str: response = await client.get(url, follow_redirects=True) if response.status_code in {200, 444}: + if cls._TIKTOK_NOTFOUND_PARREN.search(str(response.url)): + raise APINotFoundError( + _( + "页面不可用,可能是由于区域限制(代理)造成的。类名: {0}" + ).format(cls.__name__) + ) + match = cls._TIKTOK_AWEMEID_PARREN.search(str(response.url)) if not match: - raise APIResponseError(_("未在响应中找到aweme_id")) + raise APIResponseError( + _("未在响应中找到 {0}").format("aweme_id") + ) aweme_id = match.group(1) if aweme_id is None: raise RuntimeError( - _("获取aweme_id失败, {0}".format(response.url)) + _("获取 {0} 失败,{1}").format("aweme_id", response.url) ) return aweme_id else: raise ConnectionError( - _("接口状态码异常 {0}, 请检查重试").format(response.status_code) + _("接口状态码异常 {0},请检查重试").format(response.status_code) ) - except httpx.RequestError: + except httpx.RequestError as exc: + # 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions) raise APIConnectionError( _( - "连接端点失败,检查网络环境或代理:{0} 代理:{1} 类名:{2}" - ).format( - url, - TokenManager.proxies, - cls.__name__, - ) + "请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}" + ).format(url, TokenManager.proxies, cls.__name__, exc) ) @classmethod @@ -496,7 +541,7 @@ async def get_all_aweme_id(cls, urls: list) -> list: if urls == []: raise ( APINotFoundError( - _("输入的URL List不合法。类名:{0}".format(cls.__name__)) + _("输入的URL List不合法。类名:{0}").format(cls.__name__) ) ) @@ -555,7 +600,7 @@ def format_file_name( try: return naming_template.format(**fields) except KeyError as e: - raise KeyError(_("文件名模板字段 {0} 不存在,请检查".format(e))) + raise KeyError(_("文件名模板字段 {0} 不存在,请检查").format(e)) def create_user_folder(kwargs: dict, nickname: Union[str, int]) -> Path: diff --git a/f2/cli/cli_commands.py b/f2/cli/cli_commands.py index 73b8134..a1da310 100644 --- a/f2/cli/cli_commands.py +++ b/f2/cli/cli_commands.py @@ -1,5 +1,6 @@ # path: f2/cli/cli_command.py +import f2 import click import typing import asyncio @@ -7,7 +8,6 @@ from f2 import helps from f2.apps import __apps__ as apps_module -from f2.utils import __version__ from f2.exceptions import APIError from f2.cli.cli_console import RichConsoleManager from f2.utils._signal import SignalManager @@ -23,7 +23,7 @@ def handle_help( ) -> None: if not value or ctx.resilient_parsing: return - helps.f2() + helps.main() ctx.exit() @@ -35,8 +35,8 @@ def handle_version( ) -> None: if not value or ctx.resilient_parsing: return - logger.debug(f"Version {__version__._version}") - print(f"Version {__version__._version}") + + click.echo(f"Version {f2.__version__}") ctx.exit() @@ -134,10 +134,11 @@ def set_cli_config(ctx, **kwargs): try: asyncio.run(run_app(kwargs)) except APIError as e: - logger.error(e.display_error()) + logger.error(e) async def run_app(kwargs): + logger.info(f"Version {f2.__version__}") app_name = kwargs["app_name"] app_module = importlib.import_module(f"f2.apps.{app_name}.handler") await app_module.main(kwargs) diff --git a/f2/conf/app.yaml b/f2/conf/app.yaml index 941dca4..fae14ec 100644 --- a/f2/conf/app.yaml +++ b/f2/conf/app.yaml @@ -5,6 +5,7 @@ douyin: path: Download timeout: 10 max_retries: 5 + lyric: yes max_connections: 5 max_counts: 0 max_tasks: 10 diff --git a/f2/conf/defaults.yaml b/f2/conf/defaults.yaml index 106b169..2f7c0c1 100644 --- a/f2/conf/defaults.yaml +++ b/f2/conf/defaults.yaml @@ -1,6 +1,7 @@ douyin: url: music: + lyric: cover: desc: path: diff --git a/f2/crawlers/base_crawler.py b/f2/crawlers/base_crawler.py index 791f7b0..bd8cb1b 100644 --- a/f2/crawlers/base_crawler.py +++ b/f2/crawlers/base_crawler.py @@ -175,7 +175,7 @@ async def get_fetch_data(self, url: str): self.handle_http_status_error(http_error, url, attempt + 1) except APIError as e: - e.display_error() + logger.error(e) async def post_fetch_data(self, url: str, params: dict = {}): """ @@ -223,7 +223,7 @@ async def post_fetch_data(self, url: str, params: dict = {}): self.handle_http_status_error(http_error, url, attempt + 1) except APIError as e: - e.display_error() + logger.error(e) async def head_fetch_data(self, url: str): """ @@ -252,7 +252,7 @@ async def head_fetch_data(self, url: str): self.handle_http_status_error(http_error, url, 1) except APIError as e: - e.display_error() + logger.error(e) def handle_http_status_error(self, http_error, url: str, attempt): """ @@ -277,31 +277,33 @@ def handle_http_status_error(self, http_error, url: str, attempt): if response is None or status_code is None: logger.error( - _("HTTP状态错误: {0}, URL: {1}, 尝试次数: {2}").format( + _("HTTP状态错误:{0}, URL:{1}, 尝试次数:{2}").format( http_error, url, attempt ) ) - raise APIResponseError(f"处理HTTP错误时遇到意外情况: {http_error}") + raise APIResponseError( + _("处理HTTP错误时遇到意外情况:{0}").format(http_error) + ) if status_code == 302: pass elif status_code == 404: - raise APINotFoundError(f"HTTP Status Code {status_code}") + raise APINotFoundError(_("HTTP状态码错误:"), status_code) elif status_code == 503: - raise APIUnavailableError(f"HTTP Status Code {status_code}") + raise APIUnavailableError(_("HTTP状态码错误:"), status_code) elif status_code == 408: - raise APITimeoutError(f"HTTP Status Code {status_code}") + raise APITimeoutError(_("HTTP状态码错误:"), status_code) elif status_code == 401: - raise APIUnauthorizedError(f"HTTP Status Code {status_code}") + raise APIUnauthorizedError(_("HTTP状态码错误:"), status_code) elif status_code == 429: - raise APIRateLimitError(f"HTTP Status Code {status_code}") + raise APIRateLimitError(_("HTTP状态码错误:"), status_code) else: logger.error( - _("HTTP状态错误: {0}, URL: {1}, 尝试次数: {2}").format( - status_code, url, attempt + _("HTTP状态错误:{0}, URL:{1}, 尝试次数:{2}").format( + http_error, url, attempt ) ) - raise APIResponseError(f"HTTP状态错误: {status_code}") + raise APIResponseError(_("HTTP状态码错误:"), status_code) async def close(self): await self.aclient.aclose() diff --git a/f2/dl/base_downloader.py b/f2/dl/base_downloader.py index fb4957a..ec12d48 100644 --- a/f2/dl/base_downloader.py +++ b/f2/dl/base_downloader.py @@ -7,7 +7,7 @@ import traceback from pathlib import Path from rich.progress import TaskID -from typing import Union, Optional, Any +from typing import Union, Optional, Any, List from f2.log.logger import logger from f2.i18n.translator import _ @@ -26,7 +26,7 @@ class BaseDownloader(BaseCrawler): """基础下载器 (Base Downloader Class)""" - def __init__(self, kwargs: dict = {}): + def __init__(self, kwargs: dict = ...): proxies_conf = kwargs.get("proxies", {"http": None, "https": None}) proxies = { "http://": proxies_conf.get("http", None), @@ -42,7 +42,6 @@ def __init__(self, kwargs: dict = {}): super().__init__(proxies=proxies, crawler_headers=self.headers) self.progress = RichConsoleManager().progress self.download_tasks = [] - logger.debug(_("BaseDownloader 请求头headers:{0}".format(self.headers))) @staticmethod def _ensure_path(path: Union[str, Path]) -> Path: @@ -77,89 +76,143 @@ async def _download_chunks( task_id, advance=len(chunk), total=int(content_length) ) except httpx.ReadTimeout as e: - logger.warning(_("文件区块下载超时: {0}".format(e))) + logger.warning(_("文件区块下载超时:{0}").format(e)) except Exception as e: - logger.error(_("文件区块下载失败: {0}".format(e))) + logger.error(_("文件区块下载失败:{0}").format(e)) async def download_file( - self, task_id: TaskID, url: str, full_path: Union[str, Path] + self, + task_id: TaskID, + urls: Union[str, List[str]], + full_path: Union[str, Path], ) -> None: """ 下载文件 (Download file) Args: task_id (TaskID): 任务ID (Task ID) - url (str): 文件URL (File URL) + urls (Union[str, List[str]]): 文件URL (File URL) full_path (Union[str, Path]): 保存路径 (Save path) + + Note: + url仅代表一个文件的链接,当url为列表时,表示该文件的多个链接 + (url represents only a link to a file, when url is a list, + it represents multiple links to the file) """ async with self.semaphore: + # 如果urls是单个链接,则转换为列表以便统一处理 + if isinstance(urls, str): + urls = [urls] + # 确保目标路径存在 (Ensure target path exists) full_path = self._ensure_path(full_path) - # 获取文件内容大小 (Get the size of the file content) - content_length = await get_content_length(url, self.headers, self.proxies) - logger.debug( - _("{0}在服务器上的总内容长度为:{1} 字节".format(url, content_length)) - ) + # 遍历所有链接 (Iterate over all links) + for link in urls: + # 获取文件内容大小 (Get the size of the file content) + content_length = await get_content_length( + link, self.headers, self.proxies + ) - # 如果文件内容大小为0, 则不下载 (If file content size is 0, skip download) - if content_length == 0: - logger.warning(_("内容长度为0,跳过下载")) - await self.progress.update( - task_id, - description=_("[ 丢失 ]:"), - filename=trim_filename(full_path.name, 45), - state="completed", + logger.debug( + _("{0} 在服务器上的总内容长度为:{1} 字节").format( + link, content_length + ) ) - return - # 确保目标路径存在 (Ensure target path exists) - full_path.parent.mkdir(parents=True, exist_ok=True) - # 寻找未下载完的临时文件 (Find unfinished temporary files) - tmp_path = full_path.with_suffix(".tmp") - # 获取临时文件的大小 (Get the size of the temporary file) - start_byte = 0 if not tmp_path.exists() else tmp_path.stat().st_size - - logger.debug( - _( - "找到了未下载完的文件 {0}, 大小为 {1} 字节".format( + # 如果文件内容大小为0, 则尝试下一个链接 (If file content size is 0, try the next link) + if content_length == 0: + logger.warning( + _("链接 {0} 内容长度为0,尝试下一个链接是否可用").format(link) + ) + continue + + # 确保目标路径存在 (Ensure target path exists) + full_path.parent.mkdir(parents=True, exist_ok=True) + # 寻找未下载完的临时文件 (Find unfinished temporary files) + tmp_path = full_path.with_suffix(".tmp") + # 获取临时文件的大小 (Get the size of the temporary file) + start_byte = 0 if not tmp_path.exists() else tmp_path.stat().st_size + + logger.debug( + _("找到了未下载完的文件 {0}, 大小为 {1} 字节").format( tmp_path, start_byte ) ) - ) - if start_byte in [0, content_length]: - if start_byte: + if start_byte in [0, content_length]: + if start_byte: + tmp_path.rename(full_path) + logger.debug(_("临时文件已完全下载")) + return + + # 构建range请求头 (Build range request header) + range_headers = ( + {"Range": "bytes={}-".format(start_byte)} if start_byte else {} + ) + range_headers.update(self.headers) + range_request = self.aclient.build_request( + "GET", link, headers=range_headers + ) + async with aiofiles.open( + tmp_path, "ab" if start_byte else "wb" + ) as file: + await self._download_chunks( + self.aclient, range_request, file, content_length, task_id + ) + + # 下载完成后重命名文件 (Rename file after download is complete) + try: tmp_path.rename(full_path) - logger.debug(_("临时文件已完全下载")) - return + except FileExistsError: + logger.warning(_("{0} 已存在,将覆盖").format(full_path)) + tmp_path.replace(full_path) + except PermissionError: + logger.error( + _( + "另一个程序正在使用此文件或受异步调度影响,该任务需要重新下载" + ) + ) + # 尝试删除临时文件 (Try to delete the temporary file) + try: + tmp_path.unlink() + tmp_path.rename(full_path) + except Exception as e: + logger.error(_("尝试删除临时文件失败:{0}").format(e)) - # 构建range请求头 (Build range request header) - range_headers = ( - {"Range": "bytes={}-".format(start_byte)} if start_byte else {} - ) - range_headers.update(self.headers) - range_request = self.aclient.build_request( - "GET", url, headers=range_headers - ) - async with aiofiles.open(tmp_path, "ab" if start_byte else "wb") as file: - await self._download_chunks( - self.aclient, range_request, file, content_length, task_id + await self.progress.update( + task_id, + description=_("[ 失败 ]:"), + filename=trim_filename(full_path.name, 45), + state="error", + ) + + await self.progress.update( + task_id, + description=_("[ 完成 ]:"), + filename=trim_filename(full_path.name, 45), + state="completed", ) + logger.debug(_("下载完成, 文件已保存为 {0}").format(full_path)) - # 下载完成后重命名文件 (Rename file after download is complete) - tmp_path.rename(full_path) + # 如果下载成功,则跳出循环 (If download is successful, break the loop) + break - await self.progress.update( - task_id, - description=_("[ 完成 ]:"), - filename=trim_filename(full_path.name, 45), - state="completed", - ) - logger.debug(_("下载完成, 文件已保存为 {0}".format(full_path))) + else: + # 如果遍历完所有链接仍然无法成功下载,则记录警告 + logger.warning("所有链接都无法下载") + await self.progress.update( + task_id, + description=_("[ 丢失 ]:所有链接都无法下载"), + filename=trim_filename(full_path.name, 45), + state="error", + ) async def save_file( - self, task_id: TaskID, content: Any, full_path: Union[str, Path] + self, + task_id: TaskID, + content: Any, + full_path: Union[str, Path], ): """ 保存文件 (Save file) @@ -188,10 +241,13 @@ async def save_file( filename=trim_filename(full_path.name, 45), state="completed", ) - logger.debug(_("下载完成, 文件已保存为 {0}".format(full_path))) + logger.debug(_("下载完成, 文件已保存为 {0}").format(full_path)) async def download_m3u8_stream( - self, task_id: TaskID, url: str, full_path: Union[str, Path] + self, + task_id: TaskID, + url: str, + full_path: Union[str, Path], ) -> None: """ 下载m3u8流视频 (Download m3u8 stream video) @@ -263,9 +319,9 @@ async def download_m3u8_stream( ) except httpx.ReadTimeout as e: - logger.warning(_("TS文件下载超时: {0}".format(e))) + logger.warning(_("TS文件下载超时: {0}").format(e)) except Exception as e: - logger.error(_("TS文件下载失败: {0}".format(e))) + logger.error(_("TS文件下载失败: {0}").format(e)) logger.error(traceback.format_exc()) finally: await ts_response.aclose() @@ -283,7 +339,7 @@ async def download_m3u8_stream( ) return else: - logger.error(_("HTTP错误: {0}".format(e))) + logger.error(_("HTTP错误: {0}").format(e)) await self.progress.update( task_id, description=_("[ 失败 ]:"), @@ -293,7 +349,7 @@ async def download_m3u8_stream( return except Exception as e: - logger.error(_("m3u8文件解析失败: {0}".format(e))) + logger.error(_("m3u8文件解析失败: {0}").format(e)) logger.error(traceback.format_exc()) await self.progress.update( task_id, @@ -306,7 +362,7 @@ async def download_m3u8_stream( async def initiate_download( self, file_type: str, - file_url: str, + file_url: Union[str, List[str]], base_path: Union[str, Path], file_name: str, file_suffix: Optional[str], @@ -318,10 +374,15 @@ async def initiate_download( Args: file_type (str): 文件类型描述 (File type description) - file_url (str): 文件URL (File URL) + file_url (Union[str, List[str]]): 文件URL (File URL) file_name (str): 文件名称 (File name) base_path (Union[str, Path]): 基础路径 (Base path) file_suffix (Optional[str]): 文件后缀 (File suffix) + + Note: + file_url仅代表一个文件的链接,当file_url为列表时,表示该文件的多个链接 + (file_url represents only a link to a file, when file_url is a list, + it represents multiple links to the file) """ # 文件路径 @@ -340,7 +401,7 @@ async def initiate_download( await self.progress.update(task_id, state="completed") else: task_id = await self.progress.add_task( - description=_("[ {0} ]:".format(file_type)), + description=_("[ {0} ]:").format(file_type), filename=trim_filename(file_path, 45), start=True, ) @@ -387,7 +448,7 @@ async def initiate_static_download( await self.progress.update(task_id, state="completed") else: task_id = await self.progress.add_task( - description=_("[ {0} ]:".format(file_type)), + description=_("[ {0} ]:").format(file_type), filename=trim_filename(file_path, 45), start=True, ) @@ -433,7 +494,7 @@ async def initiate_m3u8_download( await self.progress.update(task_id, state="completed") else: task_id = await self.progress.add_task( - description=_("[ {0} ]:".format(file_type)), + description=_("[ {0} ]:").format(file_type), filename=trim_filename(file_path, 45), start=True, ) @@ -446,7 +507,7 @@ async def initiate_m3u8_download( async def execute_tasks(self): """执行所有下载任务 (Execute all download tasks)""" logger.debug( - _("开始执行下载任务,本次共有 {0} 个任务".format(len(self.download_tasks))) + _("开始执行下载任务,本次共有 {0} 个任务").format(len(self.download_tasks)) ) await asyncio.gather(*self.download_tasks) self.download_tasks.clear() diff --git a/f2/exceptions/api_exceptions.py b/f2/exceptions/api_exceptions.py index 069677e..9241f79 100644 --- a/f2/exceptions/api_exceptions.py +++ b/f2/exceptions/api_exceptions.py @@ -8,70 +8,71 @@ class APIError(Exception): """基本API异常类,其他API异常都会继承这个类""" - def __init__(self, status_code=None): - self.status_code = status_code + def __init__(self, message=None, status_code=None): exception_console.print( "请前往QA文档 https://johnserf-seed.github.io/f2/question-answer/qa.html 查看相关帮助" ) + self.status_code = status_code + super().__init__(message) - def display_error(self): - """显示错误信息和状态码(如果有的话)""" - return f"Error: {self.args[0]}." + ( - f" Status Code: {self.status_code}." if self.status_code else "" + def __str__(self): + """返回错误信息和文件路径(如果有的话)""" + return f"{super().__str__()}" + ( + f" Status Code: {self.status_code}" if self.status_code else "" ) class APIConnectionError(APIError): """当与API的连接出现问题时抛出""" - def display_error(self): - return f"API Connection Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIUnavailableError(APIError): """当API服务不可用时抛出,例如维护或超时""" - def display_error(self): - return f"API Unavailable Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APINotFoundError(APIError): """当API端点不存在时抛出""" - def display_error(self): - return f"API Not Found Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIResponseError(APIError): """当API返回的响应与预期不符时抛出""" - def display_error(self): - return f"API Response Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIRateLimitError(APIError): """当达到API的请求速率限制时抛出""" - def display_error(self): - return f"API Rate Limit Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APITimeoutError(APIError): """当API请求超时时抛出""" - def display_error(self): - return f"API Timeout Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIUnauthorizedError(APIError): """当API请求由于授权失败而被拒绝时抛出""" - def display_error(self): - return f"API Unauthorized Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) class APIRetryExhaustedError(APIError): """当API请求重试次数用尽时抛出""" - def display_error(self): - return f"API Retry Exhausted Error: {self.args[0]}." + def __init__(self, message=None, status_code=None): + super().__init__(message, status_code) diff --git a/f2/exceptions/db_exceptions.py b/f2/exceptions/db_exceptions.py index 38456a6..1edb871 100644 --- a/f2/exceptions/db_exceptions.py +++ b/f2/exceptions/db_exceptions.py @@ -8,46 +8,48 @@ class DatabaseError(Exception): """基本数据库异常类,其他数据库异常都会继承这个类""" - def __init__(self, *args, **kwargs): + def __init__(self, message=None, db=None): exception_console.print( "请前往QA文档 https://johnserf-seed.github.io/f2/question-answer/qa.html 查看相关帮助" ) + self.db = db + super().__init__(message) - def display_error(self): - """显示错误信息""" - return f"Database Error: {self.args[0]}." + def __str__(self): + """返回错误信息和db(如果有的话)""" + return f"{super().__str__()}" + (f" Database: {self.db}" if self.db else "") class DatabaseConnectionError(DatabaseError): """当与数据库的连接出现问题时抛出""" - def display_error(self): - return f"Database Connection Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class RecordNotFoundError(DatabaseError): """当在数据库中找不到预期的记录时抛出""" - def display_error(self): - return f"Record Not Found Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class MultipleRecordsFoundError(DatabaseError): """当期望找到一个记录但实际找到多个时抛出""" - def display_error(self): - return f"Multiple Records Found Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class DatabaseTimeoutError(DatabaseError): """当数据库操作超时时抛出""" - def display_error(self): - return f"Database Timeout Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) class DatabaseConstraintError(DatabaseError): """当违反数据库约束时抛出,例如唯一性约束""" - def display_error(self): - return f"Database Constraint Error: {self.args[0]}." + def __init__(self, message=None, db=None): + super().__init__(message, db) diff --git a/f2/exceptions/file_exceptions.py b/f2/exceptions/file_exceptions.py index 3bbd5ef..6edb2b8 100644 --- a/f2/exceptions/file_exceptions.py +++ b/f2/exceptions/file_exceptions.py @@ -8,50 +8,41 @@ class FileError(Exception): """基本的文件错误异常类,其他文件异常都会继承这个类""" - def __init__(self, filepath=None): - self.filepath = filepath + def __init__(self, message, filepath=None): exception_console.print( "请前往QA文档 https://johnserf-seed.github.io/f2/question-answer/qa.html 查看相关帮助" ) + self.filepath = filepath + super().__init__(message) - def display_error(self): - """显示错误信息和文件路径(如果有的话)""" - return f"File Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __str__(self): + """返回错误信息和文件路径(如果有的话)""" + return f"{super().__str__()} Filepath: {self.filepath}" if self.filepath else "" -class FileNotFound(FileError, FileNotFoundError): +class FileNotFound(FileError): """文件不存在错误""" - def display_error(self): - return f"File Not Found Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message=None, filepath=None): + super().__init__(message, filepath) -class FilePermissionError(FileError, PermissionError): +class FilePermissionError(FileError): """文件权限错误""" - def display_error(self): - return f"File Permission Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message, filepath=None): + super().__init__(message, filepath) class FileReadError(FileError): """文件读取错误""" - def display_error(self): - return f"File Read Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message, filepath=None): + super().__init__(message, filepath) class FileWriteError(FileError): """文件写入错误""" - def display_error(self): - return f"File Write Error: {self.args[0]}." + ( - f" Filepath: {self.filepath}." if self.filepath else "" - ) + def __init__(self, message, filepath=None): + super().__init__(message, filepath) diff --git a/f2/helps.py b/f2/helps.py index 4c2ae61..ece0bc3 100644 --- a/f2/helps.py +++ b/f2/helps.py @@ -4,28 +4,29 @@ @Description:helps.py @Date :2023/02/06 17:36:41 @Author :JohnserfSeed -@version :0.0.1.4 +@version :0.0.1.5 @License :Apache License 2.0 @Github :https://github.com/johnserf-seed @Mail :johnserf-seed@foxmail.com ------------------------------------------------- Change Log : 2023/02/06 17:36:41 - create output help +2024/03/11 18:23:30 - change get_help @ importlib path ------------------------------------------------- """ +import f2 import importlib from rich.console import Console from rich.panel import Panel from rich.table import Table from f2.i18n.translator import _ -from f2.utils import __version__ def get_help(app_name: str) -> None: try: - module = importlib.import_module(f"f2.apps.{app_name}") + module = importlib.import_module(f"f2.apps.{app_name}.help") if hasattr(module, "help"): module.help() else: @@ -34,54 +35,47 @@ def get_help(app_name: str) -> None: print(_("没有找到 {0} 应用").format(app_name)) -def f2() -> None: +def main() -> None: # 真彩 console = Console(color_system="truecolor") - console.print( - f"\n:rocket: [bold]f2 {__version__._version} :rocket:", justify="center" - ) - console.print(f"\n[i]{__version__._description_cn}", justify="center") - console.print(f"[i]{__version__._description_en}", justify="center") - console.print(f"[i]GitHub {__version__._repourl}\n", justify="center") - - table = Table.grid(padding=1, pad_edge=True, expand=True) - table.add_column("Website", no_wrap=True, justify="left", style="bold") - table.add_column("Description", no_wrap=True, justify="left", style="bold") - - # 分割 - # console.rule("[b]已适配[/b]", align="center") - # table.add_row( - # _("抖音"), _(" 单个作品,主页作品,点赞作品,收藏作品,合辑作品,图文,原声。后续更新:推荐作品,朋友作品,好友作品,搜索作品") - # ) - # table.add_row( - # _("TikTok"), _(" 单个作品,主页作品,点赞作品,收藏作品,播放列表(合辑)作品,原声。后续更新:推荐作品,朋友作品,好友作品,搜索作品") - # ) - # # 待适配 - # console.print(table) - # 分割 - # console.rule() + console.print(f"\n:rocket: [bold]f2 {f2.__version__} :rocket:", justify="center") + console.print(f"\n[i]{f2.__description_cn__}", justify="center") + console.print(f"[i]{f2.__description_en__}", justify="center") + console.print(f"[i]GitHub {f2.__repourl__}\n", justify="center") # 使用方法 table = Table.grid(padding=1, pad_edge=True) table.add_column("Usage", no_wrap=True, justify="left", style="bold") table.add_row("[b]f2[/b] [magenta] [/magenta][cyan][COMMAND]") - table.add_row(_("例: f2 dy -h 来获取douyin的命令帮助")) + table.add_row(_("例:f2 dy -h/--help 获取douyin的命令帮助")) + table.add_row( + "[b]f2[/b] [magenta][Option] [/magenta][cyan][Args][/cyan] [magenta] [/magenta][cyan][COMMAND]" + ) + table.add_row(_("例:f2 -d DEBUG dy 日志级别为调试运行")) console.print( Panel(table, border_style="bold", title="使用方法 | Usage", title_align="left") ) - table = Table.grid(padding=1, pad_edge=True, expand=True) + # 应用列表 table = Table(show_header=True, header_style="bold magenta") - table.add_column("Parameter", no_wrap=True, justify="left", style="bold") - table.add_column("Description", no_wrap=True, style="bold") - table.add_column("Status", no_wrap=True, justify="left", style="bold") + table.add_column(_("参数"), no_wrap=True, justify="left", style="bold") + table.add_column(_("描述"), no_wrap=True, style="bold") + table.add_column(_("状态"), no_wrap=True, justify="left", style="bold") table.add_row(_("weibo 或 wb"), _("- 获取微博")) table.add_row( - _("douyin 或 dy"), _("- 单个作品,主页作品,点赞作品,收藏作品,合辑作品,图文,文案,封面,直播,原声。"), _("✔") + _("douyin 或 dy"), + _( + "- 单个作品,主页作品,点赞作品,收藏作品,合辑作品,图文,文案,封面,直播,原声。" + ), + _("✔"), ) table.add_row( - _("tiktok 或 tk"), _("- 单个作品,主页作品,点赞作品,收藏作品,播放列表(合辑)作品,文案,封面,原声。"), _("✔") + _("tiktok 或 tk"), + _( + "- 单个作品,主页作品,点赞作品,收藏作品,播放列表(合辑)作品,文案,封面,原声。" + ), + _("✔"), ) table.add_row(_("instagram 或 ig"), _("- 获取ig的作品"), _("⏳")) table.add_row(_("twitch 或 tv"), _("- 获取Twitch直播")) @@ -92,8 +86,10 @@ def f2() -> None: table.add_row(_("little_red_book 或 lrb"), _("- 获取小红书的作品")) table.add_row("\n") table.add_row( - "f2 -d [magenta] [/magenta][cyan][COMMAND]", - _("- 记录app的debug到/logs下,如遇BUG提交Issue时请附带该文件并[red]删除个人敏感信息[/red]"), + "f2 -d DEBUG", + _( + "- 记录app的调试日志到/logs下,如遇BUG提交Issue时请附带该文件并[red]删除个人敏感信息[/red]" + ), _("⚠"), ) table.add_row( @@ -106,7 +102,7 @@ def f2() -> None: Panel( table, border_style="bold", - title="", + title="应用 | apps", title_align="left", subtitle=_("欢迎提交PR适配更多网站"), ) diff --git a/f2/languages/en_US/LC_MESSAGES/en_US.mo b/f2/languages/en_US/LC_MESSAGES/en_US.mo index e263a60..55aebf4 100644 Binary files a/f2/languages/en_US/LC_MESSAGES/en_US.mo and b/f2/languages/en_US/LC_MESSAGES/en_US.mo differ diff --git a/f2/languages/zh_CN/LC_MESSAGES/zh_CN.mo b/f2/languages/zh_CN/LC_MESSAGES/zh_CN.mo index dd754cf..4aa11a1 100644 Binary files a/f2/languages/zh_CN/LC_MESSAGES/zh_CN.mo and b/f2/languages/zh_CN/LC_MESSAGES/zh_CN.mo differ diff --git a/f2/log/logger.py b/f2/log/logger.py index 39f2d9d..6b982f5 100644 --- a/f2/log/logger.py +++ b/f2/log/logger.py @@ -68,7 +68,9 @@ def clean_logs(self, keep_last_n=10): try: log_file.unlink() except PermissionError: - self.logger.warning(f"无法删除日志文件 {log_file}, 它正被另一个进程使用") + self.logger.warning( + f"无法删除日志文件 {log_file}, 它正被另一个进程使用" + ) def shutdown(self): for handler in self.logger.handlers: diff --git a/f2/utils/__version__.py b/f2/utils/__version__.py deleted file mode 100644 index 7006cd0..0000000 --- a/f2/utils/__version__.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- -""" -@Description:__version__.py -@Date :2023/01/15 23:42:17 -@Author :JohnserfSeed -@version :0.0.1.4 -@License :(C)Copyright 2019-2022, Liugroup-NLPR-CASIA -@Github :https://github.com/johnserf-seed -@Mail :johnserf-seed@foxmail.com -------------------------------------------------- -Change Log : -2023/01/15 23:43:07 - Create __version__.py -------------------------------------------------- -""" - -_author = "JohnserfSeed" -_description_cn = "基于[red]异步[/red]的[green]全平台下载工具." -_description_en = "[yellow]Asynchronous based [/yellow]full-platform download tool." -_reponame = "f2" -_repourl = "https://github.com/Johnserf-Seed/f2" -_version = "0.0.1.4" - -__all__ = [ - "_author", - "_description_cn", - "_description_en", - "_reponame", - "_repourl", - "_version", -] diff --git a/f2/utils/_dl.py b/f2/utils/_dl.py index 0283888..d38d9c3 100644 --- a/f2/utils/_dl.py +++ b/f2/utils/_dl.py @@ -25,13 +25,23 @@ async def get_content_length(url: str, headers: dict = {}, proxies: dict = {}) - ) as client: try: response = await client.head(url, headers=headers, follow_redirects=True) + # 当head请求被禁止时,释放status异常被捕获 (When head requests are forbidden, release status exceptions are caught) response.raise_for_status() + + if ( + response.headers.get("Content-Length") != None + and int(response.headers.get("Content-Length")) == 0 + ): + # 如果head请求无法获取Content-Length, 则使用GET请求再次尝试获取 + response = await client.get(url, headers=headers, follow_redirects=True) + response.raise_for_status() + except httpx.ConnectTimeout: # 连接超时错误处理 (Handling connection timeout errors) logger.error(_("连接超时错误: {0}".format(url))) - logger.error("==========================") + logger.error("===================================") logger.error(f"headers:{headers}, proxies:{proxies}") - logger.error("==========================") + logger.error("===================================") return 0 # 对HTTP状态错误进行处理 (Handling HTTP status errors) except httpx.HTTPStatusError as exc: diff --git a/f2/utils/conf_manager.py b/f2/utils/conf_manager.py index f7cff44..c1dca20 100644 --- a/f2/utils/conf_manager.py +++ b/f2/utils/conf_manager.py @@ -1,7 +1,6 @@ # path: f2/utils/conf_manager.py import f2 -import time import yaml import click @@ -27,14 +26,10 @@ def __init__(self, filepath: str = f2.F2_CONFIG_FILE_PATH): def load_config(self) -> dict: """从文件中加载配置 (Load the conf from the file)""" - try: - if not self.filepath.exists(): - raise FileNotFound(_("'{0}' 配置文件路径不存在").format(self.filepath)) - return yaml.safe_load(self.filepath.read_text(encoding="utf-8")) or {} - except FileNotFound as e: - e.display_error() - time.sleep(2) - exit(0) + if not self.filepath.exists(): + raise FileNotFound(_("配置文件不存在"), self.filepath) + + return yaml.safe_load(self.filepath.read_text(encoding="utf-8")) or {} def get_config(self, app_name: str, default=None) -> dict: """ @@ -58,9 +53,7 @@ def save_config(self, config: dict): try: self.filepath.write_text(yaml.dump(config), encoding="utf-8") except PermissionError: - raise FilePermissionError( - _("'{0}' 配置文件路径无写权限").format(self.filepath) - ) + raise FilePermissionError(_("配置文件路径无写权限"), self.filepath) def backup_config(self): """在进行更改前备份配置文件 (Backup the conf file before making changes)""" diff --git a/f2/utils/utils.py b/f2/utils/utils.py index b0cfe5e..25f37c7 100644 --- a/f2/utils/utils.py +++ b/f2/utils/utils.py @@ -5,6 +5,7 @@ import random import secrets import datetime +import browser_cookie3 import importlib_resources from typing import Union, Any @@ -77,6 +78,10 @@ def timestamp_2_str( if timestamp is None or timestamp == "None": return "" + if isinstance(timestamp, str): + if len(timestamp) == 30: + return datetime.datetime.strptime(timestamp, "%a %b %d %H:%M:%S %z %Y") + return datetime.datetime.fromtimestamp(float(timestamp)).strftime(format) @@ -196,6 +201,7 @@ def replaceT(obj: Union[str, Any]) -> Union[str, Any]: if isinstance(obj, str): return re.sub(reSub, "_", obj) + return obj # raise TypeError("输入应为字符串或字符串列表") @@ -236,3 +242,111 @@ def split_filename(text: str, os_limit: dict) -> str: def ensure_path(path: Union[str, Path]) -> Path: """确保路径是一个Path对象 (Ensure the path is a Path object)""" return Path(path) if isinstance(path, str) else path + + +def get_cookie_from_browser(browser_choice: str, domain: str = "") -> dict: + """ + 根据用户选择的浏览器获取domain的cookie。 + + Args: + browser_choice (str): 用户选择的浏览器名称 + + Returns: + str: *.domain的cookie值 + """ + + if not browser_choice or not domain: + return "" + + BROWSER_FUNCTIONS = { + "chrome": browser_cookie3.chrome, + "firefox": browser_cookie3.firefox, + "edge": browser_cookie3.edge, + "opera": browser_cookie3.opera, + "opera_gx": browser_cookie3.opera_gx, + "safari": browser_cookie3.safari, + "chromium": browser_cookie3.chromium, + "brave": browser_cookie3.brave, + "vivaldi": browser_cookie3.vivaldi, + "librewolf": browser_cookie3.librewolf, + } + cj_function = BROWSER_FUNCTIONS.get(browser_choice) + cj = cj_function(domain_name=domain) + cookie_value = {c.name: c.value for c in cj if c.domain.endswith(domain)} + return cookie_value + + +def check_invalid_naming( + naming: str, allowed_patterns: list, allowed_separators: list +) -> list: + """ + 检查命名是否符合命名模板 (Check if the naming conforms to the naming template) + + Args: + naming (str): 命名字符串 (Naming string) + allowed_patterns (list): 允许的模式列表 (List of allowed patterns) + allowed_separators (list): 允许的分隔符列表 (List of allowed separators) + Returns: + list: 无效的模式列表 (List of invalid patterns) + """ + if not naming or not allowed_patterns or not allowed_separators: + return [] + + temp_naming = naming + invalid_patterns = [] + + # 检查提供的模式是否有效 + for pattern in allowed_patterns: + if pattern in temp_naming: + temp_naming = temp_naming.replace(pattern, "") + + # 此时,temp_naming应只包含分隔符 + for char in temp_naming: + if char not in allowed_separators: + invalid_patterns.append(char) + + # 检查连续的无效模式或分隔符 + for pattern in allowed_patterns: + # 检查像"{xxx}{xxx}"这样的模式 + if pattern + pattern in naming: + invalid_patterns.append(pattern + pattern) + for sep in allowed_patterns: + # 检查像"{xxx}-{xxx}"这样的模式 + if pattern + sep + pattern in naming: + invalid_patterns.append(pattern + sep + pattern) + + return invalid_patterns + + +def merge_config( + main_conf: dict = ..., + custom_conf: dict = ..., + **kwargs, +): + """ + 合并配置参数,使 CLI 参数优先级高于自定义配置,自定义配置优先级高于主配置,最终生成完整配置参数字典。 + + Args: + main_conf (dict): 主配置参数字典 + custom_conf (dict): 自定义配置参数字典 + **kwargs: CLI 参数和其他额外的配置参数 + + Returns: + dict: 合并后的配置参数字典 + """ + # 合并主配置和自定义配置 + merged_conf = {} + for key, value in main_conf.items(): + merged_conf[key] = value # 将主配置复制到合并后的配置中 + for key, value in custom_conf.items(): + if value is not None and value != "": # 只有值不为 None 和 空值,才进行合并 + merged_conf[key] = value # 自定义配置参数会覆盖主配置中的同名参数 + + # 合并 CLI 参数与合并后的配置,确保 CLI 参数的优先级最高 + for key, value in kwargs.items(): + if key not in merged_conf: # 如果合并后的配置中没有这个键,则直接添加 + merged_conf[key] = value + elif value is not None and value != "": # 如果值不为 None 和 空值,则进行合并 + merged_conf[key] = value # CLI 参数会覆盖自定义配置和主配置中的同名参数 + + return merged_conf diff --git a/f2/utils/xbogus.py b/f2/utils/xbogus.py index 815c9a8..d23aac7 100644 --- a/f2/utils/xbogus.py +++ b/f2/utils/xbogus.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- -''' +""" @Description:xbogus.py @Date :2023/02/09 00:29:30 @Author :JohnserfSeed -@version :0.0.1 +@version :0.0.2 @License :Apache License 2.0 @Github :https://github.com/johnserf-seed @Mail :johnserf-seed@foxmail.com @@ -12,15 +12,18 @@ Change Log : 2023/02/09 00:29:30 - Create XBogus class 2023/06/07 17:26:02 - Refactor the XB algorithm using Python. +2024/04/01 00:32:30 - Black Code Style & Support custom ua ------------------------------------------------- -''' +""" import time +import base64 import hashlib -class XBogus: - def __init__(self) -> None: +class XBogus: + def __init__(self, user_agent: str = None) -> None: + # fmt: off self.Array = [ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, @@ -30,7 +33,13 @@ def __init__(self) -> None: None, None, None, None, None, None, None, None, None, None, None, None, 10, 11, 12, 13, 14, 15 ] self.character = "Dkdpgh4ZKsQB80/Mfvw36XI1R25-WUAlEi7NLboqYTOPuzmFjJnryx9HVGcaStCe=" - + # fmt: on + self.ua_key = b"\x00\x01\x0c" + self.user_agent = ( + user_agent + if user_agent is not None and user_agent != "" + else "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0" + ) def md5_str_to_array(self, md5_str): """ @@ -43,20 +52,23 @@ def md5_str_to_array(self, md5_str): array = [] idx = 0 while idx < len(md5_str): - array.append((self.Array[ord(md5_str[idx])] << 4) | self.Array[ord(md5_str[idx + 1])]) + array.append( + (self.Array[ord(md5_str[idx])] << 4) + | self.Array[ord(md5_str[idx + 1])] + ) idx += 2 return array - def md5_encrypt(self, url_path): """ 使用多轮md5哈希算法对URL路径进行加密。 Encrypt the URL path using multiple rounds of md5 hashing. """ - hashed_url_path = self.md5_str_to_array(self.md5(self.md5_str_to_array(self.md5(url_path)))) + hashed_url_path = self.md5_str_to_array( + self.md5(self.md5_str_to_array(self.md5(url_path))) + ) return hashed_url_path - def md5(self, input_data): """ 计算输入数据的md5哈希值。 @@ -73,8 +85,9 @@ def md5(self, input_data): md5_hash.update(bytes(array)) return md5_hash.hexdigest() - - def encoding_conversion(self, a, b, c, e, d, t, f, r, n, o, i, _, x, u, s, l, v, h, p): + def encoding_conversion( + self, a, b, c, e, d, t, f, r, n, o, i, _, x, u, s, l, v, h, p + ): """ 第一次编码转换。 Perform encoding conversion. @@ -82,10 +95,9 @@ def encoding_conversion(self, a, b, c, e, d, t, f, r, n, o, i, _, x, u, s, l, v, y = [a] y.append(int(i)) y.extend([b, _, c, x, e, u, d, s, t, l, f, v, r, h, n, p, o]) - re = bytes(y).decode('ISO-8859-1') + re = bytes(y).decode("ISO-8859-1") return re - def encoding_conversion2(self, a, b, c): """ 第二次编码转换。 @@ -93,7 +105,6 @@ def encoding_conversion2(self, a, b, c): """ return chr(a) + chr(b) + c - def rc4_encrypt(self, key, data): """ 使用RC4算法对数据进行加密。 @@ -121,7 +132,6 @@ def rc4_encrypt(self, key, data): return encrypted_data - def calculation(self, a1, a2, a3): """ 对给定的输入值执行位运算计算,并返回结果。 @@ -130,18 +140,30 @@ def calculation(self, a1, a2, a3): x1 = (a1 & 255) << 16 x2 = (a2 & 255) << 8 x3 = x1 | x2 | a3 - return self.character[(x3 & 16515072) >> 18] + self.character[(x3 & 258048) >> 12] + self.character[(x3 & 4032) >> 6] + self.character[ - x3 & 63] - + return ( + self.character[(x3 & 16515072) >> 18] + + self.character[(x3 & 258048) >> 12] + + self.character[(x3 & 4032) >> 6] + + self.character[x3 & 63] + ) def getXBogus(self, url_path): """ 获取 X-Bogus 值。 Get the X-Bogus value. """ - array1 = self.md5_str_to_array("d88201c9344707acde7261b158656c0e") + + array1 = self.md5_str_to_array( + self.md5( + base64.b64encode( + self.rc4_encrypt(self.ua_key, self.user_agent.encode("ISO-8859-1")) + ).decode("ISO-8859-1") + ) + ) + array2 = self.md5_str_to_array( - self.md5(self.md5_str_to_array("d41d8cd98f00b204e9800998ecf8427e"))) + self.md5(self.md5_str_to_array("d41d8cd98f00b204e9800998ecf8427e")) + ) url_path_array = self.md5_encrypt(url_path) timer = int(time.time()) @@ -149,17 +171,16 @@ def getXBogus(self, url_path): array3 = [] array4 = [] xb_ = "" - + # fmt: off new_array = [ - 64, 0.00390625, 1, 8, + 64, 0.00390625, 1, 12, url_path_array[14], url_path_array[15], array2[14], array2[15], array1[14], array1[15], timer >> 24 & 255, timer >> 16 & 255, timer >> 8 & 255, timer & 255, ct >> 24 & 255, ct >> 16 & 255, ct >> 8 & 255, ct & 255 ] - + # fmt: on xor_result = new_array[0] for i in range(1, len(new_array)): - # a = xor_result b = new_array[i] if isinstance(b, float): b = int(b) @@ -179,21 +200,32 @@ def getXBogus(self, url_path): merge_array = array3 + array4 garbled_code = self.encoding_conversion2( - 2, 255, self.rc4_encrypt("ÿ".encode('ISO-8859-1'), self.encoding_conversion(*merge_array).encode('ISO-8859-1')).decode('ISO-8859-1')) + 2, + 255, + self.rc4_encrypt( + "ÿ".encode("ISO-8859-1"), + self.encoding_conversion(*merge_array).encode("ISO-8859-1"), + ).decode("ISO-8859-1"), + ) idx = 0 while idx < len(garbled_code): - xb_ += self.calculation(ord(garbled_code[idx]), ord( - garbled_code[idx + 1]), ord(garbled_code[idx + 2])) + xb_ += self.calculation( + ord(garbled_code[idx]), + ord(garbled_code[idx + 1]), + ord(garbled_code[idx + 2]), + ) idx += 3 - self.params = '%s&X-Bogus=%s' % (url_path, xb_) + self.params = "%s&X-Bogus=%s" % (url_path, xb_) self.xb = xb_ - return (self.params, self.xb) + return (self.params, self.xb, self.user_agent) + +if __name__ == "__main__": + url_path = "https://www.douyin.com/aweme/v1/web/aweme/post/?device_platform=webapp&aid=6383&channel=channel_pc_web&sec_user_id=MS4wLjABAAAAW9FWcqS7RdQAWPd2AA5fL_ilmqsIFUCQ_Iym6Yh9_cUa6ZRqVLjVQSUjlHrfXY1Y&max_cursor=0&locate_query=false&show_live_replay_strategy=1&need_time_list=1&time_list_query=0&whale_cut_token=&cut_version=1&count=18&publish_video_strategy_type=2&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Edge&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=12&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7335414539335222835&msToken=p9Y7fUBuq9DKvAuN27Peml6JbaMqG2ZcXfFiyDv1jcHrCN00uidYqUgSuLsKl1onC-E_n82m-aKKYE0QGEmxIWZx9iueQ6WLbvzPfqnMk4GBAlQIHcDzxb38FLXXQxAm" + # ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0" + ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" -if __name__ == '__main__': - url_path = "aweme_id=7196239141472980280&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333" - print("url:", url_path) - XB = XBogus() + XB = XBogus(user_agent=ua) xbogus = XB.getXBogus(url_path) - print("xbogus:", xbogus[1]) \ No newline at end of file + print(f"url: {xbogus[0]}, xbogus:{xbogus[1]}, ua: {xbogus[2]}") diff --git a/pyproject.toml b/pyproject.toml index dc31bfd..b5c7ed3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ dependencies = [ "click==8.1.7", - "rich==13.6.0", + "rich==13.7.1", "httpx==0.25.0", "aiofiles==22.1.0", "aiosqlite==0.19.0",