-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from elsong86/vader-feature
Swapped out textblob for spaCY and Vader for added functionalities
- Loading branch information
Showing
4 changed files
with
94 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import spacy | ||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | ||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
# Initialize spaCy and VADER | ||
nlp = spacy.load("en_core_web_sm") | ||
analyzer = SentimentIntensityAnalyzer() | ||
|
||
def clean_text(text): | ||
doc = nlp(text) | ||
cleaned_tokens = [] | ||
|
||
for token in doc: | ||
# Only keep alphabetic tokens, remove stopwords and punctuation | ||
if token.is_alpha and not token.is_stop: | ||
cleaned_tokens.append(token.text.lower()) | ||
|
||
return ' '.join(cleaned_tokens) | ||
|
||
def analyze_sentiments(reviews): | ||
sentiments = [] | ||
|
||
for review in reviews: | ||
review_text = review['review_text'] | ||
|
||
# Clean the review text using spaCy | ||
cleaned_text = clean_text(review_text) | ||
|
||
# Perform sentiment analysis with VADER on the cleaned text | ||
sentiment_score = analyzer.polarity_scores(cleaned_text)['compound'] | ||
|
||
# Rescale VADER score from [-1, 1] to [0, 10] | ||
scaled_score = (sentiment_score + 1) * 5 # Transforms the score to [0, 10] | ||
sentiments.append(scaled_score) | ||
|
||
# Calculate the average sentiment | ||
if sentiments: | ||
average_sentiment = sum(sentiments) / len(sentiments) | ||
else: | ||
average_sentiment = 0.0 | ||
|
||
return average_sentiment |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters