From 395a5c5825d9975c86d3fae6af68e691ae22766a Mon Sep 17 00:00:00 2001
From: Anuj Kumar Pandey <83698322+codermal7@users.noreply.github.com>
Date: Tue, 8 Oct 2024 00:57:51 +0530
Subject: [PATCH] Add files via upload
Submitting Code Side Quest: Simple Sentiment Analysis for Stock Prices Notebook (#6720)
---
.../Sentiment_Analysis_for_Stock_Prices.ipynb | 576 ++++++++++++++++++
1 file changed, 576 insertions(+)
create mode 100644 examples/Sentiment_Analysis_for_Stock_Prices.ipynb
diff --git a/examples/Sentiment_Analysis_for_Stock_Prices.ipynb b/examples/Sentiment_Analysis_for_Stock_Prices.ipynb
new file mode 100644
index 000000000000..e7bd7e4dafe0
--- /dev/null
+++ b/examples/Sentiment_Analysis_for_Stock_Prices.ipynb
@@ -0,0 +1,576 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Sentiment Analysis for Stock Prices**\n",
+ "\n",
+ "This notebook demonstrates how to fetch sentiment data from financial news using OpenBB and correlate it with stock price movements. The analysis will use sentiment scores to understand the impact of news on stock prices.\n",
+ "\n",
+ "Author:
\n",
+ "[Anuj Kumar Pandey](https://github.com/codermal7)\n",
+ "\n",
+ "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1BXyVZEYZfrVdvaM7vDRoVn0cKSG_BT7Q?usp=sharing)\n",
+ "\n",
+ "\n",
+ "If you are running this notebook in Colab, you can run the following command to install the OpenBB Platform:\n",
+ "\n",
+ "`!pip install openbb -q`"
+ ],
+ "metadata": {
+ "id": "K_fd_9baXaH9"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install openbb -q"
+ ],
+ "metadata": {
+ "id": "9SiXPtRwW_lo"
+ },
+ "execution_count": 55,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "from openbb import obb\n",
+ "from textblob import TextBlob"
+ ],
+ "metadata": {
+ "id": "J7B1R7s10Bsa"
+ },
+ "execution_count": 56,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "symbols = ['AAPL', 'GOOG', 'MSFT', 'NVDA']\n",
+ "dataframes = []\n",
+ "\n",
+ "for symbol in symbols:\n",
+ " try:\n",
+ " data = obb.equity.price.historical(\n",
+ " symbol=symbol,\n",
+ " start_date='2010-01-01',\n",
+ " provider=\"yfinance\"\n",
+ " ).to_df()\n",
+ " data['Symbol'] = symbol\n",
+ " dataframes.append(data)\n",
+ " except Exception as e:\n",
+ " print(f\"Failed to fetch data for {symbol}: {str(e)}\")\n",
+ "\n",
+ "combined_data = pd.concat(dataframes)\n",
+ "combined_data = combined_data.reset_index()\n",
+ "\n",
+ "combined_data.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "MrRw8lT_zD11",
+ "outputId": "0be96afa-3206-4f38-c904-5fc1d2e3626f"
+ },
+ "execution_count": 57,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " date open high low close volume split_ratio \\\n",
+ "0 2010-01-04 7.622500 7.660714 7.585000 7.643214 493729600 0.0 \n",
+ "1 2010-01-05 7.664286 7.699643 7.616071 7.656429 601904800 0.0 \n",
+ "2 2010-01-06 7.656429 7.686786 7.526786 7.534643 552160000 0.0 \n",
+ "3 2010-01-07 7.562500 7.571429 7.466071 7.520714 477131200 0.0 \n",
+ "4 2010-01-08 7.510714 7.571429 7.466429 7.570714 447610800 0.0 \n",
+ "\n",
+ " dividend Symbol \n",
+ "0 0.0 AAPL \n",
+ "1 0.0 AAPL \n",
+ "2 0.0 AAPL \n",
+ "3 0.0 AAPL \n",
+ "4 0.0 AAPL "
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " date | \n",
+ " open | \n",
+ " high | \n",
+ " low | \n",
+ " close | \n",
+ " volume | \n",
+ " split_ratio | \n",
+ " dividend | \n",
+ " Symbol | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2010-01-04 | \n",
+ " 7.622500 | \n",
+ " 7.660714 | \n",
+ " 7.585000 | \n",
+ " 7.643214 | \n",
+ " 493729600 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " AAPL | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2010-01-05 | \n",
+ " 7.664286 | \n",
+ " 7.699643 | \n",
+ " 7.616071 | \n",
+ " 7.656429 | \n",
+ " 601904800 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " AAPL | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2010-01-06 | \n",
+ " 7.656429 | \n",
+ " 7.686786 | \n",
+ " 7.526786 | \n",
+ " 7.534643 | \n",
+ " 552160000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " AAPL | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2010-01-07 | \n",
+ " 7.562500 | \n",
+ " 7.571429 | \n",
+ " 7.466071 | \n",
+ " 7.520714 | \n",
+ " 477131200 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " AAPL | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2010-01-08 | \n",
+ " 7.510714 | \n",
+ " 7.571429 | \n",
+ " 7.466429 | \n",
+ " 7.570714 | \n",
+ " 447610800 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " AAPL | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "combined_data",
+ "summary": "{\n \"name\": \"combined_data\",\n \"rows\": 14860,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2010-01-04\",\n \"max\": \"2024-10-07\",\n \"num_unique_values\": 3715,\n \"samples\": [\n \"2023-08-03\",\n \"2013-05-09\",\n \"2011-04-12\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.71524871638425,\n \"min\": 0.21799999475479126,\n \"max\": 467.0,\n \"num_unique_values\": 12947,\n \"samples\": [\n 27.040000915527344,\n 46.676998138427734,\n 92.47000122070312\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 84.51248909827238,\n \"min\": 0.22624999284744263,\n \"max\": 468.3500061035156,\n \"num_unique_values\": 12888,\n \"samples\": [\n 7.557143211364746,\n 28.66181755065918,\n 47.467750549316406\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 82.87467459426078,\n \"min\": 0.2162500023841858,\n \"max\": 464.4599914550781,\n \"num_unique_values\": 13002,\n \"samples\": [\n 21.50214385986328,\n 31.549999237060547,\n 12.909112930297852\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.7286349882444,\n \"min\": 0.22200000286102295,\n \"max\": 467.55999755859375,\n \"num_unique_values\": 13067,\n \"samples\": [\n 28.653831481933594,\n 26.477306365966797,\n 253.6999969482422\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 266755248,\n \"min\": 158434,\n \"max\": 3692928000,\n \"num_unique_values\": 14684,\n \"samples\": [\n 49455600,\n 46230579,\n 403984000\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"split_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.19856447343167133,\n \"min\": 0.0,\n \"max\": 20.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.0,\n 7.0,\n 20.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dividend\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.030061456207607976,\n \"min\": 0.0,\n \"max\": 0.75,\n \"num_unique_values\": 35,\n \"samples\": [\n 0.75,\n 0.25,\n 0.62\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Symbol\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"GOOG\",\n \"NVDA\",\n \"AAPL\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 57
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "news_data = {\n",
+ " \"AAPL\": [\"Apple releases new iPhone, stocks rise\", \"Apple faces new challenges in production\"],\n",
+ " \"GOOG\": [\"Google announces AI breakthroughs\", \"Google facing lawsuit over privacy issues\"],\n",
+ " \"MSFT\": [\"Microsoft's cloud service revenue surges\", \"Microsoft layoffs hit global offices\"],\n",
+ " \"NVDA\": [\"NVIDIA dominates the GPU market\", \"NVIDIA shares drop amidst market concerns\"]\n",
+ "} # I have taken this dummy data sentiment analysis\n",
+ "\n",
+ "def get_sentiment(text):\n",
+ " analysis = TextBlob(text)\n",
+ " return analysis.sentiment.polarity\n",
+ "\n",
+ "sentiment_scores = {}\n",
+ "for symbol in news_data:\n",
+ " headlines = news_data[symbol]\n",
+ " scores = [get_sentiment(headline) for headline in headlines]\n",
+ " avg_sentiment = sum(scores) / len(scores)\n",
+ " sentiment_scores[symbol] = avg_sentiment\n",
+ " print(f\"{symbol} Average Sentiment Score: {avg_sentiment}\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ioPlJc67zJuY",
+ "outputId": "ca989ffd-9453-4734-ea29-ffb46ff2b588"
+ },
+ "execution_count": 58,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "AAPL Average Sentiment Score: 0.13636363636363635\n",
+ "GOOG Average Sentiment Score: 0.0\n",
+ "MSFT Average Sentiment Score: 0.0\n",
+ "NVDA Average Sentiment Score: 0.0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def visualize_stock_prices_with_sentiment(data, symbol, avg_sentiment):\n",
+ " stock_data = data[data['Symbol'] == symbol]\n",
+ "\n",
+ " stock_data['Rolling Mean'] = stock_data['close'].rolling(window=30).mean()\n",
+ "\n",
+ " plt.figure(figsize=(12, 7))\n",
+ " sns.set(style=\"whitegrid\")\n",
+ "\n",
+ " plt.plot(stock_data['date'], stock_data['close'], label=f'{symbol} Close Price', color='blue', alpha=0.6)\n",
+ "\n",
+ " plt.plot(stock_data['date'], stock_data['Rolling Mean'], label=f'{symbol} 30-day Rolling Mean', color='orange', linestyle='--')\n",
+ "\n",
+ " plt.axhline(y=avg_sentiment, color='green' if avg_sentiment > 0 else 'red', linestyle='--', label=f'Sentiment Score ({avg_sentiment:.2f})')\n",
+ "\n",
+ " plt.title(f'{symbol} Stock Price vs Sentiment', fontsize=16, fontweight='bold')\n",
+ " plt.xlabel('Date', fontsize=12)\n",
+ " plt.ylabel('Stock Price (USD)', fontsize=12)\n",
+ "\n",
+ " plt.annotate('Sentiment Impact', xy=(stock_data['date'].iloc[-200], stock_data['Rolling Mean'].iloc[-200]),\n",
+ " xytext=(stock_data['date'].iloc[-300], stock_data['Rolling Mean'].iloc[-300]+50),\n",
+ " arrowprops=dict(facecolor='black', shrink=0.05), fontsize=10)\n",
+ "\n",
+ " plt.legend(loc='best', fontsize=10)\n",
+ " plt.xticks(rotation=45)\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "for symbol in symbols:\n",
+ " stock_data = combined_data[combined_data['Symbol'] == symbol].copy()\n",
+ "\n",
+ " stock_data.loc[:, 'Rolling Mean'] = stock_data['close'].rolling(window=30).mean()\n",
+ "\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " plt.plot(stock_data['date'], stock_data['close'], label=f'{symbol} Close Price', color='blue')\n",
+ " plt.plot(stock_data['date'], stock_data['Rolling Mean'], label='30-Day Rolling Mean', color='orange')\n",
+ "\n",
+ " plt.axhline(y=0.5, color='r', linestyle='--', label='Positive Sentiment Line')\n",
+ "\n",
+ " plt.title(f'Stock Price vs Sentiment for {symbol}')\n",
+ " plt.xlabel('Date')\n",
+ " plt.ylabel('Price')\n",
+ " plt.legend()\n",
+ " plt.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "wqzmFS-iOhMh",
+ "outputId": "b089577c-ae4b-4b1e-e15b-b52e539f5e5e"
+ },
+ "execution_count": 59,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "