Skip to content

Commit

Permalink
Merge pull request #14 from elsong86/vader-feature
Browse files Browse the repository at this point in the history
Swapped out textblob for spaCY and Vader for added functionalities
  • Loading branch information
elsong86 authored Oct 5, 2024
2 parents e841067 + 860eb8d commit bc1a811
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 17 deletions.
34 changes: 22 additions & 12 deletions backend/app/routers/outscraper_reviews.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from fastapi import APIRouter, HTTPException, Query, Depends, Request
from outscraper import ApiClient
from textblob import TextBlob
import logging
import json
import os
from datetime import datetime, timezone, timedelta
from ..utils.rate_limiter import rate_limiter
from ..utils.redis_utils import redis_client
from ..services.supabase_service import SupabaseService
from ..utils.sentiment_analysis import analyze_sentiments
from lingua import Language, LanguageDetectorBuilder
from pydantic import BaseModel, Field
from typing import Callable
from typing import Callable

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand All @@ -21,6 +22,19 @@
# Instantiate the SupabaseService using the modularized Supabase client
supabase_service = SupabaseService()

# Initialize Lingua language detector with English enabled
detector = LanguageDetectorBuilder.from_languages(Language.ENGLISH, Language.SPANISH).build()

# Define the is_english function
def is_english(text):
try:
# Detect language using Lingua
detected_language = detector.detect_language_of(text)
return detected_language == Language.ENGLISH
except Exception as e:
logger.error(f"Error detecting language: {e}")
return False

router = APIRouter()

def check_cache(place_id: str):
Expand All @@ -42,7 +56,11 @@ def fetch_reviews_from_api(place_id: str):
)
if results and isinstance(results, list) and len(results) > 0:
reviews = results[0].get('reviews_data', [])
non_empty_reviews = [review for review in reviews if review.get('review_text') and review['review_text'].strip()]

# Filter out non-English reviews using is_english
english_reviews = [review for review in reviews if is_english(review['review_text'])]

non_empty_reviews = [review for review in english_reviews if review.get('review_text') and review['review_text'].strip()]

# Cache the fetched reviews
cache_key = f"reviews:{place_id}"
Expand All @@ -55,6 +73,7 @@ def fetch_reviews_from_api(place_id: str):
logger.error(f"Error fetching reviews from API: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))


def store_restaurant(place_id: str, name: str, address: str):
try:
# Check if the restaurant already exists
Expand All @@ -81,15 +100,6 @@ def store_restaurant(place_id: str, name: str, address: str):
logger.error(f"Failed to store restaurant: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))


def analyze_sentiments(reviews):
sentiments = [TextBlob(review['review_text']).sentiment.polarity for review in reviews]
if sentiments:
average_sentiment = sum(sentiments) / len(sentiments)
else:
average_sentiment = 0.0
return average_sentiment

def get_stored_reviews(place_id: str):
# Define your freshness criteria (e.g., 1 week)
freshness_limit = timedelta(weeks=1)
Expand Down
44 changes: 44 additions & 0 deletions backend/app/utils/sentiment_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import spacy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import logging

logger = logging.getLogger(__name__)

# Initialize spaCy and VADER
nlp = spacy.load("en_core_web_sm")
analyzer = SentimentIntensityAnalyzer()

def clean_text(text):
doc = nlp(text)
cleaned_tokens = []

for token in doc:
# Only keep alphabetic tokens, remove stopwords and punctuation
if token.is_alpha and not token.is_stop:
cleaned_tokens.append(token.text.lower())

return ' '.join(cleaned_tokens)

def analyze_sentiments(reviews):
sentiments = []

for review in reviews:
review_text = review['review_text']

# Clean the review text using spaCy
cleaned_text = clean_text(review_text)

# Perform sentiment analysis with VADER on the cleaned text
sentiment_score = analyzer.polarity_scores(cleaned_text)['compound']

# Rescale VADER score from [-1, 1] to [0, 10]
scaled_score = (sentiment_score + 1) * 5 # Transforms the score to [0, 10]
sentiments.append(scaled_score)

# Calculate the average sentiment
if sentiments:
average_sentiment = sum(sentiments) / len(sentiments)
else:
average_sentiment = 0.0

return average_sentiment
23 changes: 23 additions & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,22 @@ annotated-types==0.7.0
anyio==4.4.0
attrs==24.2.0
Authlib==1.3.2
blis==1.0.1
catalogue==2.0.10
certifi==2024.7.4
cffi==1.17.0
charset-normalizer==3.3.2
click==8.1.7
cloudpathlib==0.19.0
confection==0.1.5
cryptography==43.0.0
cymem==2.0.8
deprecation==2.1.0
dnspython==2.6.1
dparse==0.6.4b0
ecdsa==0.19.0
email_validator==2.2.0
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
fastapi==0.112.0
filelock==3.12.4
frozenlist==1.4.1
Expand All @@ -28,17 +34,24 @@ hyperframe==6.0.1
idna==3.7
Jinja2==3.1.4
joblib==1.4.2
langcodes==3.4.1
language_data==1.2.0
lingua-language-detector==2.0.2
marisa-trie==1.2.0
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.22.0
mdurl==0.1.2
multidict==6.0.5
murmurhash==1.0.10
nltk==3.9.1
numpy==2.0.2
outscraper==5.2.1
packaging==24.1
pip-autoremove==0.10.0
pipdeptree==2.23.1
postgrest==0.16.11
preshed==3.0.9
psutil==6.0.0
pyasn1==0.6.0
pycparser==2.22
Expand All @@ -61,18 +74,28 @@ safety-schemas==0.0.5
setuptools==73.0.1
shellingham==1.5.4
six==1.16.0
smart-open==7.0.5
sniffio==1.3.1
spacy==3.8.2
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.4.8
starlette==0.37.2
storage3==0.7.7
StrEnum==0.4.15
supabase==2.7.3
supafunc==0.5.1
textblob==0.18.0.post0
thinc==8.3.2
tqdm==4.66.5
typer==0.12.5
typing_extensions==4.12.2
urllib3==2.2.2
uvicorn==0.30.5
vaderSentiment==3.3.2
wasabi==1.1.3
weasel==0.4.1
websockets==12.0
wrapt==1.16.0
yarl==1.9.4
zipp==3.20.1
10 changes: 5 additions & 5 deletions frontend/src/app/restaurant/[id]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ const RestaurantPage: React.FC = () => {
}

if (data.average_sentiment !== undefined) {
const scaledSentiment = ((data.average_sentiment + 1) / 2) * 10;
setAverageSentiment(scaledSentiment);
} else {
setAverageSentiment(null);
}
setAverageSentiment(data.average_sentiment); // No additional scaling needed
} else {
setAverageSentiment(null);
}

} catch (error) {
console.error('Failed to fetch reviews:', error);
} finally {
Expand Down

0 comments on commit bc1a811

Please sign in to comment.