-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentiment_analysis_amalgomator_checkpoint.py
68 lines (49 loc) · 2.5 KB
/
sentiment_analysis_amalgomator_checkpoint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
model = AutoModelForSequenceClassification.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
# Define the emotions (added a placeholder for now)
emotions = [
"admiration", "amusement", "anger", "annoyance", "anticipation", "approval", "caring", "confusion", "curiosity",
"desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear",
"gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization",
"relief", "remorse", "sadness", "surprise"
]
# Path to the folder containing the text files
folder_path = os.path.expanduser('/home/drew/Desktop/Test/prodigal_son/prodigal_son_texts')
# Get the list of text files in the folder
files = ['nasb95.txt', 'nlt.txt', 'esv.txt', 'kjv.txt', 'nkjv.txt', 'csb.txt', 'lsb.txt', 'niv84.txt']
# Initialize array to hold probabilities
all_probabilities = np.zeros((len(files), len(emotions)))
# Loop through each file and perform emotion analysis
for idx, file_name in enumerate(files):
# Construct the full file path
file_path = os.path.join(folder_path, file_name)
# Read the content of the file
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
# Tokenize the input text
inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
# Perform emotion detection
outputs = model(**inputs)
# Get the logits and apply sigmoid to get probabilities
logits = outputs.logits
probabilities = torch.sigmoid(logits).detach().numpy()[0]
# Store the probabilities
all_probabilities[idx] = probabilities
# Calculate the mean and standard deviation for each emotion
mean_of_emotions = np.mean(all_probabilities, axis=0)
std_dev_of_emotions = np.std(all_probabilities, axis=0)
# Print the means and standard deviations for each emotion
print("\nMean of each emotion:")
for i, emotion in enumerate(emotions):
print(f"{emotion}: {mean_of_emotions[i]:.2f}")
print("\nMean of each emotion's standard deviation:")
for i, emotion in enumerate(emotions):
print(f"{emotion} Std Dev: {std_dev_of_emotions[i]:.2f}")
# Get the logits and apply sigmoid to get probabilities
logits = outputs.logits
probabilities = torch.sigmoid(logits).detach().numpy()[0]