Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
FHU-yezi committed Jul 12, 2022
2 parents 4b54aa4 + c5a27d6 commit 1be29b1
Show file tree
Hide file tree
Showing 14 changed files with 200 additions and 171 deletions.
2 changes: 1 addition & 1 deletion JianshuResearchTools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.10.0"
__version__ = "2.10.1"

from . import (article, beikeisland, collection, island, notebook, objects,
rank, user)
Expand Down
2 changes: 1 addition & 1 deletion JianshuResearchTools/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def GetArticlePublishTime(article_url: str, disable_check: bool = False) -> date
Args:
article_url (str): 文章 URL
disable_check (str): 禁用参数有效性检查. Defaults to False.
Returns:
datetime: 文章发布时间
Expand Down Expand Up @@ -530,7 +531,6 @@ def GetArticleAllCommentsData(article_id: int, count: int = 10, author_only: boo
author_only (bool, optional): 为 True 时只获取作者发布的评论,包含作者发布的子评论及其父评论. Defaults to False.
sorting_method (str, optional): 排序方式,为”positive“时按时间正序排列,为”reverse“时按时间倒序排列. Defaults to "positive".
max_count (int, optional): 获取的文章评论信息数量上限,Defaults to None.
disable_check (bool): 禁用参数有效性检查. Defaults to False.
Yields:
Iterator[Dict], None, None]: 文章信息
Expand Down
10 changes: 5 additions & 5 deletions JianshuResearchTools/assert_funcs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from functools import lru_cache
from typing import Any
from re import compile as re_compile
from typing import Any

from .basic_apis import (GetArticleJsonDataApi, GetCollectionJsonDataApi,
GetIslandJsonDataApi, GetNotebookJsonDataApi,
Expand Down Expand Up @@ -28,18 +28,18 @@
JIANSHU_ISLAND_POST_URL_REGEX = re_compile(r"^https://www\.jianshu\.com/gp/\w{16}/?$")


def AssertType(object: Any, type_obj: Any) -> None:
def AssertType(obj: Any, type_obj: Any) -> None:
"""判断对象是否是指定类型
Args:
object (Any): 需要进行判断的对象
obj (Any): 需要进行判断的对象
type_obj (object): 目标类型
Raises:
TypeError: 对象类型错误时抛出此错误
"""
if not isinstance(object, type_obj):
raise TypeError(f"{object} 不是 {type_obj.__name__} 类型,而是 { type(object).__name__ } 类型")
if not isinstance(obj, type_obj):
raise TypeError(f"{obj} 不是 {type_obj.__name__} 类型,而是 { type(obj).__name__ } 类型")


def AssertJianshuUrl(string: str) -> None:
Expand Down
17 changes: 9 additions & 8 deletions JianshuResearchTools/basic_apis.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Dict, List, Union
from typing import Dict, List, Union, Optional

from httpx import get as httpx_get
from httpx import post as httpx_post
from lxml import etree
from lxml.etree import _Element

from .headers import (BeikeIsland_request_header, PC_header,
api_request_header, mobile_header)
Expand Down Expand Up @@ -37,7 +38,7 @@ def GetArticleJsonDataApi(article_url: str) -> Dict:
return json_obj


def GetArticleHtmlJsonDataApi(article_url: str) -> Dict:
def GetArticleHtmlJsonDataApi(article_url: str) -> _Element:
source = httpx_get(article_url, headers=PC_header).content
html_obj = etree.HTML(source)
json_obj = json_loads(html_obj.xpath("//script[@id='__NEXT_DATA__']/text()")[0])
Expand Down Expand Up @@ -190,7 +191,7 @@ def GetDailyArticleRankListJsonDataApi() -> Dict:
return json_obj


def GetArticlesFPRankListJsonDataApi(date: str, type_: str) -> Dict: # 避免覆盖内置函数
def GetArticlesFPRankListJsonDataApi(date: str, type_: Optional[str]) -> Dict: # 避免覆盖内置函数
params = {
"date": date,
"type": type_
Expand All @@ -207,7 +208,7 @@ def GetUserJsonDataApi(user_url: str) -> Dict:
return json_obj


def GetUserPCHtmlDataApi(user_url: str) -> Dict:
def GetUserPCHtmlDataApi(user_url: str) -> _Element:
source = httpx_get(user_url, headers=PC_header).content
html_obj = etree.HTML(source)
return html_obj
Expand Down Expand Up @@ -236,7 +237,7 @@ def GetUserArticlesListJsonDataApi(user_url: str, page: int,
return json_obj


def GetUserFollowingListHtmlDataApi(user_url: str, page: int):
def GetUserFollowingListHtmlDataApi(user_url: str, page: int) -> _Element:
request_url = user_url.replace("/u/", "/users/") + "/following"
params = {
"page": page
Expand All @@ -246,7 +247,7 @@ def GetUserFollowingListHtmlDataApi(user_url: str, page: int):
return html_obj


def GetUserFollowersListHtmlDataApi(user_url: str, page: int):
def GetUserFollowersListHtmlDataApi(user_url: str, page: int) -> _Element:
request_url = user_url.replace("/u/", "/users/") + "/followers"
params = {
"page": page
Expand All @@ -256,7 +257,7 @@ def GetUserFollowersListHtmlDataApi(user_url: str, page: int):
return html_obj


def GetUserNextAnniversaryDayHtmlDataApi(user_slug: str):
def GetUserNextAnniversaryDayHtmlDataApi(user_slug: str) -> _Element:
request_url = f"https://www.jianshu.com/mobile/u/{user_slug}/anniversary"
source = httpx_get(request_url, headers=mobile_header).content
html_obj = etree.HTML(source)
Expand All @@ -270,7 +271,7 @@ def GetIslandPostJsonDataApi(post_slug: str) -> List[Dict]:
return json_obj


def GetUserTimelineHtmlDataApi(uslug: str, max_id: int) -> Dict:
def GetUserTimelineHtmlDataApi(uslug: str, max_id: int) -> _Element:
request_url = f"https://www.jianshu.com/users/{uslug}/timeline"
params = {
"max_id": max_id
Expand Down
53 changes: 34 additions & 19 deletions JianshuResearchTools/beikeisland.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,30 +126,45 @@ def GetBeikeIslandTradeOrderInfo(trade_type: str, page: int = 1) -> List[Dict]:
"buy": 2,
"sell": 1
}[trade_type]
json_obj = GetBeikeIslandTradeListJsonDataApi(pageIndex=page, retype=retype)
json_obj = GetBeikeIslandTradeListJsonDataApi(pageIndex=page,
retype=retype)
result = []
for item in json_obj["data"]["tradelist"]:
item_data = {
"tradeid": item["id"],
"tradeslug": item["tradeno"], # ? 我也不确定这个 no 什么意思,回来去问问
"user": {
"jianshuname": item["jianshuname"],
"bkname": item["reusername"], # ? 还有个 nickname,不知道哪个对
"avatar_url": item["avatarurl"],
"userlevelcode": item["levelnum"],
"userlevel": item["userlevel"],
"user_trade_count": item["tradecount"]
"trade_id": item["id"],
"trade_slug": item["tradeno"],
"publish_time": datetime.fromisoformat(item["releasetime"]),
"status": {
"code": item["statuscode"],
"text": item["statustext"]
},
"total": item["recount"],
"traded": item["recount"] - item["cantradenum"],
"remaining": item["cantradenum"],
"price": item["reprice"],
"minimum_limit": item["minlimit"],
"percentage": item["compeletper"],
"statuscode": item["statuscode"],
"status": item["statustext"],
"publish_time": datetime.fromisoformat(item["releasetime"])
"trade": {
"total": item["recount"],
"traded": item["recount"] - item["cantradenum"],
"remaining": item["cantradenum"],
"minimum_trade_limit": item["minlimit"],
"traded_percentage": round(
float(item["compeletper"]) / 100, 3
),
"price": item["reprice"],
}
}

if item["anonymity"]:
item_data["user"] = {
"is_anonymity": True
}
else:
item_data["user"] = {
"is_anonymity": False,
"name": item["reusername"],
"avatar_url": item["avatarurl"],
"level": {
"code": item["levelnum"],
"text": item["userlevel"]
}
}

result.append(item_data)
return result

Expand Down
6 changes: 3 additions & 3 deletions JianshuResearchTools/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def UserSlugToUserId(user_slug: str) -> int:
"""用户 Slug 转用户 ID
Args:
user_url (str): 用户 Slug
user_slug (str): 用户 Slug
Returns:
int: 用户 ID
Expand Down Expand Up @@ -318,7 +318,7 @@ def IslandPostUrlToIslandPostSlug(post_url: str) -> str:
"""小岛文章 URL 转小岛帖子 Slug
Args:
island_url (str): 小岛帖子 URL
post_url (str): 小岛帖子 URL
Returns:
str: 小岛帖子 Slug
Expand All @@ -333,7 +333,7 @@ def IslandPostSlugToIslandPostUrl(post_slug: str) -> str:
"""小岛帖子 Slug 转小岛帖子 URL
Args:
island_url (str): 小岛帖子 Slug
post_slug (str): 小岛帖子 Slug
Returns:
str: 小岛帖子 URL
Expand Down
17 changes: 9 additions & 8 deletions JianshuResearchTools/island.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
__all__ = [
"GetIslandName", "GetIslandAvatarUrl", "GetIslandIntroduction",
"GetIslandMembersCount", "GetIslandPostsCount", "GetIslandCategory",
"GetIslandPostFullConetnt", "GetIslandPosts", "GetIslandAllBasicData",
"GetIslandPostFullContent", "GetIslandPosts", "GetIslandAllBasicData",
"GetIslandAllPostsData"
]

Expand Down Expand Up @@ -124,11 +124,11 @@ def GetIslandCategory(island_url: str, disable_check: bool = False) -> str:
return result


def GetIslandPostFullConetnt(post_url: str, disable_check: bool = False) -> str:
def GetIslandPostFullContent(post_url: str, disable_check: bool = False) -> str:
"""获取小岛帖子完整内容
Args:
island_url (str): 小岛 URL
post_url (str): 小岛帖子 URL
disable_check (bool): 禁用参数有效性检查. Defaults to False.
Returns:
Expand All @@ -153,9 +153,9 @@ def GetIslandPosts(island_url: str, start_sort_id: int = None, count: int = 10,
count (int, optional): 每次返回的数据数量. Defaults to 10.
topic_id (int, optional): 话题 ID. Defaults to None.
sorting_method (str, optional): 排序方法,"time" 为按照发布时间排序,
"comment_time" 为按照最近评论时间排序,"hot" 为按照热度排序. Defaults to "time".
"comment_time" 为按照最近评论时间排序,"hot" 为按照热度排序. Defaults to "time".
get_full_content (bool, optional): 为 True 时,当检测到获取的帖子内容不全时,
自动调用 GetIslandPostFullConetnt 函数获取完整内容并替换. Defaults to False.
自动调用 GetIslandPostFullContent 函数获取完整内容并替换. Defaults to False.
disable_check (bool): 禁用参数有效性检查. Defaults to False.
Returns:
Expand All @@ -170,7 +170,8 @@ def GetIslandPosts(island_url: str, start_sort_id: int = None, count: int = 10,
"most_valuable": "best"
}[sorting_method],
json_obj = GetIslandPostsJsonDataApi(group_slug=IslandUrlToIslandSlug(island_url),
max_id=start_sort_id, count=count, topic_id=topic_id, order_by=order_by)
max_id=start_sort_id, count=count, topic_id=topic_id,
order_by=order_by)

result = []
for item in json_obj:
Expand Down Expand Up @@ -228,7 +229,7 @@ def GetIslandPosts(island_url: str, start_sort_id: int = None, count: int = 10,
except KeyError:
pass # 没有话题则跳过
if get_full_content and "..." in item_data["content"]: # 获取到的帖子内容不全
item_data["content"] = GetIslandPostFullConetnt(IslandPostSlugToIslandPostUrl(item_data["pslug"]),
item_data["content"] = GetIslandPostFullContent(IslandPostSlugToIslandPostUrl(item_data["pslug"]),
disable_check=True)
result.append(item_data)
return result
Expand Down Expand Up @@ -272,7 +273,7 @@ def GetIslandAllPostsData(island_url: str, count: int = 10,
sorting_method (str, optional): 排序方法,time 为按照发布时间排序,
comment_time 为按照最近评论时间排序,hot 为按照热度排序. Defaults to "time".
get_full_content (bool, optional): 为 True 时,当检测到获取的帖子内容不全时,
自动调用 GetIslandPostFullConetnt 函数获取完整内容并替换. Defaults to False.
自动调用 GetIslandPostFullContent 函数获取完整内容并替换. Defaults to False.
max_count (int, optional): 获取的小岛帖子信息数量上限,Defaults to None.
disable_check (bool): 禁用参数有效性检查. Defaults to False.
Expand Down
23 changes: 12 additions & 11 deletions JianshuResearchTools/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from .convert import (ArticleSlugToArticleUrl, CollectionSlugToCollectionUrl,
IslandSlugToIslandUrl, IslandUrlToIslandSlug,
NotebookSlugToNotebookUrl, UserSlugToUserUrl,
UserUrlToUserSlug)
UserUrlToUserSlug, ArticleUrlToArticleSlug,
NotebookUrlToNotebookId, NotebookUrlToNotebookSlug, CollectionUrlToCollectionSlug)
from .exceptions import InputError
from .utils import CallWithoutCheck, NameValueMappingToString, OnlyOne

Expand Down Expand Up @@ -85,7 +86,7 @@ def clear_cache():
_cache_dict.clear()


class User():
class User:
"""用户类
"""
def __init__(self, user_url: str = None, *, user_slug: str = None):
Expand Down Expand Up @@ -416,7 +417,7 @@ def __str__(self) -> str:
}, title="用户信息摘要")


class Article():
class Article:
"""文章类
"""
def __init__(self, article_url: str = None, article_slug: str = None):
Expand Down Expand Up @@ -479,7 +480,7 @@ def slug(self) -> str:
Returns:
str: 文章 Slug
"""
return article.GetArticleSlug(self._url)
return ArticleUrlToArticleSlug(self._url)

@property
@cache_result_wrapper
Expand Down Expand Up @@ -709,7 +710,7 @@ def __str__(self) -> str:
}, title="文章信息摘要")


class Notebook():
class Notebook:
"""文集类
"""
def __init__(self, notebook_url: str = None, notebook_slug: str = None):
Expand Down Expand Up @@ -772,7 +773,7 @@ def id(self) -> int:
Returns:
int: 文集 ID
"""
return notebook.GetNotebookId(self._url)
return NotebookUrlToNotebookId(self._url)

@property
@cache_result_wrapper
Expand All @@ -782,7 +783,7 @@ def slug(self) -> str:
Returns:
str: 文集 Slug
"""
return notebook.GetNotebookSlug(self._url)
return NotebookUrlToNotebookSlug(self._url)

@property
@cache_result_wrapper
Expand Down Expand Up @@ -910,7 +911,7 @@ def __str__(self) -> str:
}, title="文集信息摘要")


class Collection():
class Collection:
"""专题类
"""
def __init__(self, collection_url: str = None, collection_slug: str = None,
Expand Down Expand Up @@ -979,7 +980,7 @@ def slug(self) -> str:
Returns:
str: 专题 Slug
"""
return collection.GetCollectionSlug(self._url)
return CollectionUrlToCollectionSlug(self._url)

@property
@cache_result_wrapper
Expand Down Expand Up @@ -1076,7 +1077,7 @@ def editors_info(self, page: int = 1) -> List[Dict]:
"""获取专题编辑信息
Args:
page (int, optional): 页码. Defause to 1.
page (int, optional): 页码. Default to 1.
Raises:
InputError: 因缺少 ID 参数而无法获取结果时抛出此异常
Expand Down Expand Up @@ -1181,7 +1182,7 @@ def __str__(self) -> str:
}, title="专题信息摘要")


class Island():
class Island:
"""小岛类
"""
def __init__(self, island_url: str = None, island_slug: str = None):
Expand Down
Loading

0 comments on commit 1be29b1

Please sign in to comment.