Merge pull request #39 from cornatul/feature/social-information

More code cleanup and added a new key called accounts
PythonCompany · Nov 23, 2023 · 62aa435 · 62aa435
2 parents 357008e + b38eed5
commit 62aa435
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 20 deletions.
diff --git a/api/endpoints/nlp.py b/api/endpoints/nlp.py
@@ -1,7 +1,7 @@
 
 import spacy
 import socials
-
+import socid_extractor
 
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 from markdownify import markdownify as md
@@ -76,5 +76,6 @@ async def root(article: ArticleAction):
             "social": social,
             "spacy": displacy.render(doc, style="ent"),
             "sentiment": sentiment.polarity_scores(crawler.text),
+            'accounts': socid_extractor.extract(crawler.text)
         },
     }
diff --git a/api/endpoints/videos.py b/api/endpoints/videos.py
@@ -1,6 +1,10 @@
 from fastapi import APIRouter, Path, Query, Depends
 from pydantic import BaseModel
 from cachetools import TTLCache
+from youtube_search import YoutubeSearch
+from datetime import datetime, timedelta
+from dateutil import parser, relativedelta
+
 
 router = APIRouter()
 
@@ -9,25 +13,37 @@ class VideosAction(BaseModel):
     keyword: str
 
 
-class TikTokAction(BaseModel):
-    token: str
-
-
-async def get_hashtag_videos(token):
-    async with TikTokApi() as api:
-        await api.create_sessions(ms_tokens=token, num_sessions=1, sleep_after=3)
-        tag = api.hashtag(name="funny")
-        return tag.videos(count=30)
-
-
-@router.post("/videos")
+def parse_publish_time(publish_time):
+    if "ago" in publish_time:
+        words = publish_time.split()
+        delta = int(words[0])
+        if "years" in words:
+            return (datetime.now() - timedelta(days=365 * delta)).strftime("%Y-%m-%d")
+        elif "year" in words:
+            return (datetime.now() - timedelta(days=365 * delta)).strftime("%Y-%m-%d")
+        elif "month" in words:
+            return (datetime.now() - timedelta(days=30 * delta)).strftime("%Y-%m-%d")
+        elif "months" in words:
+            return (datetime.now() - timedelta(days=30 * delta)).strftime("%Y-%m-%d")
+        elif "week" in words:
+            return (datetime.now() - timedelta(weeks=delta)).strftime("%Y-%m-%d")
+        elif "weeks" in words:
+            return (datetime.now() - timedelta(weeks=delta)).strftime("%Y-%m-%d")
+        elif "day" in words:
+            return (datetime.now() - timedelta(days=delta)).strftime("%Y-%m-%d")
+        elif "days" in words:
+            return (datetime.now() - timedelta(days=delta)).strftime("%Y-%m-%d")
+    return parser.parse(publish_time)
+
+
+@router.post("/videos/youtube")
 async def root(post: VideosAction):
-    from youtube_search import YoutubeSearch
-    results = YoutubeSearch(post.keyword, max_results=10)
-    return {"data": results}
+    results = YoutubeSearch(post.keyword, max_results=30).to_dict()
+
+    # Extract the list of videos
+    videos = results
 
+    # Sort the videos by publish date in descending order
+    sorted_videos = sorted(videos, key=lambda x: parse_publish_time(x['publish_time']), reverse=True)
 
-@router.post("/tiktok")
-async def tiktok(post: TikTokAction):
-    results = await get_hashtag_videos(post.token)
-    return {"data": results}
+    return {"data": {"search_terms": post.keyword, "max_results": 30, "videos": sorted_videos}}
diff --git a/main.py b/main.py
@@ -9,6 +9,7 @@
 from api.endpoints import scrapper
 from api.endpoints import google
 from api.endpoints import seo
+from api.endpoints import videos
 from api.endpoints import nlp as nlp_endpoint
 
 app = FastAPI(
@@ -41,6 +42,7 @@
 app.include_router(google.router)
 app.include_router(seo.router)
 app.include_router(nlp_endpoint.router)
+app.include_router(videos.router)
 
 
 @app.get("/")

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 email-validator~=2.0
+socid-extractor~=0.0.26
 TikTokApi~=6.1.1
 fastapi~=0.104.1
 html5lib~=1.1