Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a new field to article response #41

Merged
merged 1 commit into from
Nov 24, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions api/endpoints/nlp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import spacy
import socials
import socid_extractor
Expand All @@ -16,16 +15,17 @@

router = APIRouter()


nlp = spacy.load("en_core_web_md")


class ArticleAction(BaseModel):
link: str


class SummarizeAction(BaseModel):
text: str


@router.post("/nlp/article")
async def root(article: ArticleAction):
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) ' \
Expand All @@ -46,7 +46,8 @@ async def root(article: ArticleAction):

sentiment = SentimentIntensityAnalyzer()

remove_entities = ["TIME", "DATE", "CARDINAL", "LANGUAGE", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL"]
remove_entities = ["TIME", "DATE", "LANGUAGE", "PERCENT", "MONEY", "QUANTITY", "ORDINAL",
"CARDINAL"]

entities = [(e.label_, e.text, e.start_char, e.end_char) for e in doc.ents]

Expand All @@ -59,6 +60,18 @@ async def root(article: ArticleAction):

social = socials.extract(article.link).get_matches_per_platform()

# List of entities to remove from the rendered HTML
entities_to_remove = ["TIME", "DATE", "LANGUAGE", "PERCENT", "MONEY", "QUANTITY", "ORDINAL",
"CARDINAL"]

# ...

# Filter out entities to be removed
filtered_entities_unique = [ent for ent in filtered_entities_unique if ent[1] not in entities_to_remove]

# Render HTML with filtered entities
spacy_html = displacy.render(doc, style="ent", options={"ents": [ent[0] for ent in filtered_entities_unique]})

return {
"data": {
"title": crawler.title,
Expand All @@ -74,7 +87,8 @@ async def root(article: ArticleAction):
"entities": filtered_entities_unique,
"videos": crawler.movies,
"social": social,
"spacy": displacy.render(doc, style="ent"),
"spacy": spacy_html,
"spacy_markdown": md(spacy_html, newline_style="BACKSLASH", strip=['a'],heading_style="ATX"),
"sentiment": sentiment.polarity_scores(crawler.text),
'accounts': socid_extractor.extract(crawler.text)
},
Expand Down