Skip to content

Commit

Permalink
Merge pull request #71 from Johnserf-Seed/v0.0.1.6-pw2
Browse files Browse the repository at this point in the history
fix douyin mix& f2 ci yml from v0.0.1.6 pw2 on 24/Apr/07
  • Loading branch information
Johnserf-Seed authored Apr 7, 2024
2 parents 4debf16 + 1c815fe commit 3c3f38a
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 51 deletions.
1 change: 1 addition & 0 deletions f2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
APP_CONFIG_FILE_PATH = "conf/app.yaml"
F2_CONFIG_FILE_PATH = "conf/conf.yaml"
F2_DEFAULTS_FILE_PATH = "conf/defaults.yaml"
TEST_CONFIG_FILE_PATH = "conf/test.yaml"

BROWSER_LIST = [
"chrome",
Expand Down
20 changes: 16 additions & 4 deletions f2/apps/douyin/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from f2.apps.douyin.utils import (
SecUserIdFetcher,
AwemeIdFetcher,
MixIdFetcher,
WebCastIdFetcher,
VerifyFpManager,
create_or_rename_user_folder,
Expand Down Expand Up @@ -888,10 +889,21 @@ async def handle_user_mix(self):
page_counts = self.kwargs.get("page_counts", 20)
max_counts = self.kwargs.get("max_counts")

aweme_id = await AwemeIdFetcher.get_aweme_id(self.kwargs.get("url"))
mix_data = await self.fetch_one_video(aweme_id)
sec_user_id = mix_data.get("sec_user_id")
mix_id = mix_data.get("mix_id")
# 先假定合集链接获取合集ID
try:
mix_id = await MixIdFetcher.get_mix_id(self.kwargs.get("url"))
async for aweme_data in self.fetch_user_mix_videos(mix_id, 0, 20, 1):
logger.info(_("正在从合集链接获取合集ID"))
sec_user_id = aweme_data.sec_user_id[0] # 注意这里是一个列表
except Exception as e:
logger.warning(
_("获取合集ID失败,尝试解析作品链接。错误信息:{0}").format(e)
)
# 如果获取失败,则假定作品链接获取作品ID
aweme_id = await AwemeIdFetcher.get_aweme_id(self.kwargs.get("url"))
aweme_data = await self.fetch_one_video(aweme_id)
sec_user_id = aweme_data.sec_user_id
mix_id = aweme_data.mix_id

async with AsyncUserDB("douyin_users.db") as db:
user_path = await self.get_or_add_user_data(self.kwargs, sec_user_id, db)
Expand Down
4 changes: 3 additions & 1 deletion f2/apps/douyin/test/test_apps_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ def test_xbogus_manager():
)

final_endpoint = XBogusManager.model_2_endpoint(
dyendpoint.USER_DETAIL, params.dict()
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0",
base_endpoint=dyendpoint.USER_DETAIL,
params=params.dict(),
)

assert final_endpoint, "Failed to get a final endpoint."
15 changes: 6 additions & 9 deletions f2/apps/douyin/test/test_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,17 @@
from f2.apps.douyin.model import UserPost
from f2.apps.douyin.filter import UserPostFilter
from f2.apps.douyin.crawler import DouyinCrawler
from f2.utils.conf_manager import TestConfigManager


kwargs = {
"headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Referer": "https://www.douyin.com/",
},
"cookie": "YOUR_COOKIE_HERE",
}
@pytest.fixture
def cookie_fixture():
return TestConfigManager.get_test_config("douyin")


@pytest.mark.asyncio
async def test_crawler_fetcher():
async with DouyinCrawler(kwargs) as crawler:
async def test_crawler_fetcher(cookie_fixture):
async with DouyinCrawler(cookie_fixture) as crawler:
params = UserPost(
max_cursor=0,
count=1,
Expand Down
27 changes: 12 additions & 15 deletions f2/apps/douyin/test/test_handler.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
import pytest
from f2.apps.douyin.handler import DouyinHandler
from f2.utils.conf_manager import TestConfigManager


kwargs = {
"headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Referer": "https://www.douyin.com/",
},
"cookie": "YOUR_COOKIE_HERE",
}
user_sec_id = "MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE"
@pytest.fixture
def cookie_fixture():
return TestConfigManager.get_test_config("douyin")


@pytest.mark.asyncio
async def test_fetch_user_post_videos():
results = [
async def test_fetch_user_post_videos(cookie_fixture):
async for aweme_data_list in DouyinHandler(cookie_fixture).fetch_user_post_videos(
sec_user_id="MS4wLjABAAAANXSltcLCzDGmdNFI2Q_QixVTr67NiYzjKOIP5s03CAE",
max_cursor=0,
page_counts=1,
max_counts=1,
):
aweme_data_list
async for aweme_data_list in DouyinHandler(kwargs).fetch_user_post_videos(
user_sec_id
)
]

assert results, f"Failed to fetch videos for user_sec_id: {user_sec_id}"
assert aweme_data_list, f"Failed to fetch videos for user_sec_id: {aweme_data_list}"
20 changes: 7 additions & 13 deletions f2/apps/douyin/test/test_room_id.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
import pytest
from f2.apps.douyin.handler import DouyinHandler
from f2.log.logger import logger
from f2.utils.conf_manager import TestConfigManager

logger.setLevel("DEBUG")

kwargs = {
"headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Referer": "https://www.douyin.com/",
},
"cookie": "YOUR_COOKIE_HERE",
}
@pytest.fixture
def cookie_fixture():
return TestConfigManager.get_test_config("douyin")


@pytest.mark.asyncio
async def test_fetch_user_live_videos_by_room_id():
result = await DouyinHandler(kwargs).fetch_user_live_videos_by_room_id(
async def test_fetch_user_live_videos_by_room_id(cookie_fixture):
result = await DouyinHandler(cookie_fixture).fetch_user_live_videos_by_room_id(
"7318296342189919011"
)

assert isinstance(result, dict)
assert "room_id" in result
assert "7318296342189919011" == str(result.room_id)
7 changes: 0 additions & 7 deletions f2/apps/douyin/test/test_sso_login.py

This file was deleted.

86 changes: 84 additions & 2 deletions f2/apps/douyin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,10 +439,92 @@ async def get_all_aweme_id(cls, urls: list) -> list:


class MixIdFetcher:
# 获取方法同AwemeIdFetcher
# 预编译正则表达式
_DOUYIN_MIX_URL_PATTERN = re.compile(r"collection/([^/?]*)")

@classmethod
async def get_mix_id(cls, url: str) -> str:
return
"""
从单个url中获取mix_id (Get mix_id from a single url)
Args:
url (str): 输入的url (Input url)
Returns:
str: 匹配到的mix_id (Matched mix_id)。
"""

if not isinstance(url, str):
raise TypeError(_("参数必须是字符串类型"))

# 提取有效URL
url = extract_valid_urls(url)

if url is None:
raise (
APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__))
)

# 重定向到完整链接
transport = httpx.AsyncHTTPTransport(retries=5)
async with httpx.AsyncClient(
transport=transport, proxies=TokenManager.proxies, timeout=10
) as client:
try:
response = await client.get(url, follow_redirects=True)
response.raise_for_status()

mix_pattern = cls._DOUYIN_MIX_URL_PATTERN

match = mix_pattern.search(str(response.url))
if match:
mix_id = match.group(1)
else:
raise APIResponseError(
_("未在响应的地址中找到mix_id,检查链接是否为合集页"), 404
)
return mix_id

except httpx.RequestError as exc:
# 捕获所有与 httpx 请求相关的异常情况 (Captures all httpx request-related exceptions)
raise APIConnectionError(
_(
"请求端点失败,请检查当前网络环境。 链接:{0},代理:{1},异常类名:{2},异常详细信息:{3}"
).format(url, TokenManager.proxies, cls.__name__, exc)
)

except httpx.HTTPStatusError as e:
raise APIResponseError(
_("链接:{0},状态码 {1}:{2} ").format(
e.response.url, e.response.status_code, e.response.text
)
)

async def get_all_mix_id(cls, urls: list) -> str:
"""
获取合集mix_id,传入列表url都可以解析出mix_id (Get video mix_id, pass in the list url can parse out aweme_id)
Args:
urls: list: 列表url (list url)
Return:
mix_ids: list: 视频的唯一标识,返回列表 (The unique identifier of the video, return list)
"""
if not isinstance(urls, list):
raise TypeError(_("参数必须是列表类型"))

# 提取有效URL
urls = extract_valid_urls(urls)

if urls == []:
raise (
APINotFoundError(
_("输入的URL List不合法。类名:{0}").format(cls.__name__)
)
)

mix_ids = [cls.get_mix_id(url) for url in urls]
return await asyncio.gather(*mix_ids)


class WebCastIdFetcher:
Expand Down
17 changes: 17 additions & 0 deletions f2/conf/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
douyin:
headers:
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36
Referer: https://www.douyin.com/
cookie: __ac_nonce=066126d7400831d6b5c4e; __ac_signature=_02B4Z6wo00f01Y.vKPwAAIDC-cD1sEsc6ZmPzyxAAAXq04; ttwid=1%7Crk-8HHHCVERrpnGqN5PSPeaEwNAo02w-MzX5peKwBnI%7C1712483701%7C16399ab279d221e1f50ef17c96b5215641887b7a739cec048347e0c45221a491; douyin.com; device_web_cpu_core=12; device_web_memory_size=8; architecture=amd64; IsDouyinActive=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=1920; dy_sheight=1080; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1920%2C%5C%22screen_height%5C%22%3A1080%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A12%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A150%7D%22; csrf_session_id=3d3dbde628fcd833106552c68320f88b; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; strategyABtestKey=%221712483704.155%22; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A0.7%7D; stream_player_status_params=%22%7B%5C%22is_auto_play%5C%22%3A0%2C%5C%22is_full_screen%5C%22%3A0%2C%5C%22is_full_webscreen%5C%22%3A0%2C%5C%22is_mute%5C%22%3A0%2C%5C%22is_speed%5C%22%3A1%2C%5C%22is_visible%5C%22%3A1%7D%22; passport_csrf_token=b2ef0d72ded3b759412fe41b62111cc9; passport_csrf_token_default=b2ef0d72ded3b759412fe41b62111cc9; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCRWFLenNuYW8vUHhnajB2L1NRYUU4U29QVGZhS1YxVnpnYnZEMnFlektXdVhqcDNXOTNjZXB6b1hoU0MxQ05YNVNRTUg1QWRlakd5UzBNNld1RjRBZVE9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoxfQ%3D%3D; bd_ticket_guard_client_web_domain=2; msToken=mnRmihU-xb0ZsLkb-itjzad_A7ZQyDmKNrcczdt6Jj_7IXz0gdwCT8uN8XYukG1Zj5-JFxDv4yeRKsiC_WxSeXrcbYJYC0Uwyuz0C-Oe; odin_tt=509a427e7fae6b0f4498c2bbed2ead86ae99d77284740ea0f1ab337321cb107858575a289f02b0d4e81f660aeec9405be135e8361c3726ca00857825c875944955ed204c58993e6c6e879dcd024a0733
proxies:
http:
https:

tiktok:
headers:
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36
Referer: https://www.tiktok.com/
cookie: tt_csrf_token=OFxPH3PE-pSbFptiHW4iy-z4yo3u6gMDt3H0; tt_chain_token=mCH+/C/4flkJarNC1WciiQ==; ak_bmsc=91F3094B27728C66727FD8F413E3CFDA~000000000000000000000000000000~YAAQDfN0aMJLgrOOAQAAfOkFuBeR4cRsy06d9RdcxcQPWGV8lJwTK79/VYCCwvjeEtlU2mCQalUnDU1R2p6Put0FonN6gVhoG2cCdwocRl87ItHMeEQpQxAH0640eFwLYG7OBS5ydU9DUbYX3pCINlGp3AbB7wbAxQb75svv1eEALa0ba8YvA85s1ingqnpt4WmKuSeyR9LkCIg9oABnKJHbPaR2FAWA982tsSdQYIeKlsUc5nyotjtrmgDubgD+v624hs2ilwVQ61HKiCQ7P15mH0M9FL8ciGsDpTKotxZ8sG2phoylvmn0ajD8QV2YdDOegI9ss6RC79POVI5xaFnpoV8TQWLyPzdEl/E6S1TVqaqRHMJsF1De/tdQcLC9//aCKt3NZTqDGg==; tiktok_webapp_theme=light; ttwid=1%7C92fxGgEo0Z8ZDy0YLeqOnKTMHMCPieG0L8UVR1j1oI8%7C1712484381%7Ccb6cbac4d746f5a6acaf73301b34642d1358092dce19a9cb97a4adc978e9d383; odin_tt=d739e385ecc07c0682ee2c2824e464e42d6760e26ec3df8c5b4a0ed1b0bcf02fb36a3eed2a0012a70b3f8ac84a79d5278e23b2ead40ed7aaf85c8ca8b611a8f5819324df5b2ecc7e1d45725a1f5f3ed9; msToken=iA6SqoX-3ggC9aEGWcxqgX0GoRG_1z-3JA9jecj8NwngM3ivhpnl6113Xc72cEc1RhP5YXXeblbo5OOrjfH6tyqtVhD68pnoxVHFTwONHFKwxGkryp3AP2ipQ8Y4; msToken=-RWw93z8QG4ppPftuXXV0NXTs3KN74Jc-eZ7G8Yw0GOWvLoCXB4upq-lPpbKVhIOxp_wJ-hDkWHj364_BTIJUmFLhFLIOaTzgjD4tazEVOEQUBdyG3IbsEZnHxcR
proxies:
http:
https:
12 changes: 12 additions & 0 deletions f2/utils/conf_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,15 @@ def update_config_with_args(self, app_name: str, **kwargs):
click.echo(_("配置文件已更新!"))
else:
click.echo(_("已取消更新配置文件!"))


class TestConfigManager:
# 返回传入app的测试配置内容 (Return the test conf content passed in app)

@classmethod
def get_test_config(cls, app_name: str) -> dict:
return ConfigManager(f2.TEST_CONFIG_FILE_PATH).get_config(app_name)


if __name__ == "__main__":
print(TestConfigManager.get_test_config("douyin"))

0 comments on commit 3c3f38a

Please sign in to comment.