From 2713c46aeb0e249eade8b841976ec3a800a7215f Mon Sep 17 00:00:00 2001 From: Nadun Dilhara <48908773+ndilhara@users.noreply.github.com> Date: Tue, 25 Oct 2022 02:29:05 +0530 Subject: [PATCH] Numpy & Pandas practice --- ...for Data Analysis - Numpy and Pandas.ipynb | 3300 +++++++++++++++++ 1 file changed, 3300 insertions(+) create mode 100644 Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb diff --git a/Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb b/Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb new file mode 100644 index 00000000..612fa98e --- /dev/null +++ b/Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb @@ -0,0 +1,3300 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "f7yOpaRM5IbG", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #1: DEFINE SINGLE AND MULTI-DIMENSIONAL NUMPY ARRAYS" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [], + "source": [ + "import numpy as np" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 525, + "status": "ok", + "timestamp": 1598026431247, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "Bqk10u47qEmJ", + "outputId": "6d0f8ddd-fcf1-4ca2-9531-cdd013622565", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 3 4 5 6]\n" + ] + }, + { + "data": { + "text/plain": "numpy.ndarray" + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# NumPy is a Linear Algebra Library used for multidimensional arrays\n", + "# NumPy brings the best of two worlds: (1) C/Fortran computational efficiency, (2) Python language easy syntax \n", + "list_1=[1,2,3,4,5,6]\n", + "list_1=np.array(list_1)\n", + "# Let's define a one-dimensional array \n", + "array=np.array([1,2,34,5])\n", + "print(list_1)\n", + "# list_1\n", + "type(list_1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 476, + "status": "ok", + "timestamp": 1598026434403, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "b98d-lkK5NbQ", + "outputId": "29441c2a-7312-40ab-b879-3768dfc9f581", + "pycharm": { + "name": "#%%\n", + "is_executing": true + } + }, + "outputs": [], + "source": [ + "# Let's create a numpy array from the list \"my_list\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 433, + "status": "ok", + "timestamp": 1598026452462, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "tljNDWBw5Nf9", + "outputId": "5171039b-3bfe-4796-dfc7-5db17eb2cc2e", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 482, + "status": "ok", + "timestamp": 1598026468861, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "wpiZ25fZ5NiN", + "outputId": "c7d16231-d02d-4908-9d84-1236b8c202b8", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "array([[ 2, 3, 4, 5],\n [ 5, 6, 78, 5]])" + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Multi-dimensional (Matrix definition)\n", + "my_matrix=np.array([[2,3,4,5],[5,6,78,5]])\n", + "my_matrix\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "tmB_S-JfHZRL", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #1: \n", + "- Write a code that creates the following 2x4 numpy array\n", + "\n", + "```\n", + "[[3 7 9 3] \n", + "[4 3 2 2]]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Nabjzl1oKtGX", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[3 7 9 3]\n", + " [4 3 2 2]]\n" + ] + }, + { + "data": { + "text/plain": "numpy.ndarray" + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c1=np.array([[3,7,9,3],[4,3,2,2]])\n", + "print(c1)\n", + "type(c1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "uBuqdDQM6Qd9", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #2: LEVERAGE NUMPY BUILT-IN METHODS AND FUNCTIONS " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 475, + "status": "ok", + "timestamp": 1598026506288, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "yXQV-Nrg5No3", + "outputId": "5237733f-68da-43a1-a25c-3671274bfd1a", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "array([0.98591448, 0.53657714, 0.56200152, 0.09515122, 0.16745552,\n 0.82449263, 0.12779976, 0.97491658, 0.27277207, 0.535552 ,\n 0.31059964, 0.09386708, 0.87096615, 0.89026602, 0.55110742,\n 0.95584194, 0.26116079, 0.01461415, 0.59956196, 0.61365036])" + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# \"rand()\" uniform distribution between 0 and 1\n", + "x=np.random.rand(20)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 101 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 439, + "status": "ok", + "timestamp": 1598026560564, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "PpJUCdB15NkT", + "outputId": "c89e0cb1-e897-4ae6-c96b-1563c658c20f", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "array([[0.65571287, 0.83242738, 0.51282503],\n [0.79868025, 0.36705005, 0.69031891],\n [0.5634533 , 0.5681617 , 0.05276662]])" + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can create a matrix of random number as well\n", + "x=np.random.rand(3,3)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 413, + "status": "ok", + "timestamp": 1598026584111, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "RBcU5xgb6YL0", + "outputId": "c5bf0eeb-44f7-4d01-dfaf-9d3ef21a0c31", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "33" + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# \"randint\" is used to generate random integers between upper and lower bounds\n", + "x= np.random.randint(1,50)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 360, + "status": "ok", + "timestamp": 1598026643201, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "O3JdVtvo6YP7", + "outputId": "ff2a258d-6e98-4383-a10e-a3a1325ea5b2", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[17 95 80 85 68]\n" + ] + } + ], + "source": [ + "# \"randint\" can be used to generate a certain number of random itegers as follows\n", + "x=np.random.randint(1,100,5)\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 463, + "status": "ok", + "timestamp": 1598026648633, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "EOOLMB416YUh", + "outputId": "bb0d4be7-1ed0-44e8-80bb-a139d2bceb71", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])" + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# np.arange creates an evenly spaced values within a given interval\n", + "x=np.arange(1, 50)\n", + "# print(x)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 101 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 428, + "status": "ok", + "timestamp": 1598026659940, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "sft1flaM6YXF", + "outputId": "4d82d2af-0943-4e05-92f4-112a4ab078d1", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0.]\n", + " [0. 0. 1. 0. 0. 0. 0.]\n", + " [0. 0. 0. 1. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 0. 0.]\n", + " [0. 0. 0. 0. 0. 1. 0.]\n", + " [0. 0. 0. 0. 0. 0. 1.]]\n" + ] + } + ], + "source": [ + "# create a diagonal of ones and zeros everywhere else\n", + "x=np.eye(7)\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 269 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 429, + "status": "ok", + "timestamp": 1598026673247, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "VQ8fp8Qb6feB", + "outputId": "d030f2cb-6339-4d2a-8ea0-467f02dfb2b6", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1.]]\n" + ] + } + ], + "source": [ + "# Matrix of ones\n", + "x=np.ones((7,7))\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 234, + "status": "ok", + "timestamp": 1598026680567, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "TTROXKYo6YSn", + "outputId": "ba7288e2-2ed5-411d-da07-0f7f3c315bac", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n" + ] + } + ], + "source": [ + "# Array of zeros\n", + "x=np.zeros((4,3))\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "IGiQ87EjKzh-", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #2:\n", + "- Write a code that takes in a positive integer \"x\" from the user and creates a 1x10 array with random numbers ranging from 0 to \"x\"" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "xg1mPZKKML5j", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5 1 5 5 3 6 7 7 5 1]\n" + ] + } + ], + "source": [ + "num=int(input(\"enter value_ \"))\n", + "x=np.random.randint(1,10,num)\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VRyk_VYQArV3", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #3: PERFORM MATHEMATICAL OPERATIONS IN NUMPY" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 599, + "status": "ok", + "timestamp": 1598027357814, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "vm-gmaQG-Vvd", + "outputId": "27258be3-3de6-4679-9dbf-b972d368c2a6", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 3 4 5 6 7 8 9] [1 2 3 4 5 6 7 8 9]\n" + ] + } + ], + "source": [ + "# np.arange() returns an evenly spaced values within a given interval\n", + "x=np.arange(1, 10)\n", + "y=np.arange(1,10)\n", + "print(x,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 426, + "status": "ok", + "timestamp": 1598027414005, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "ED0OSzpw2p9R", + "outputId": "ab08c55e-5fb6-4701-cccc-69676029603f", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 2 4 6 8 10 12 14 16 18]\n" + ] + } + ], + "source": [ + "# Add 2 numpy arrays together\n", + "sum=x+y\n", + "print(sum)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 409, + "status": "ok", + "timestamp": 1598027429464, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "TH6xTJcuAzK-", + "outputId": "5953a19a-6d59-4c71-9f9f-d5e9350900a5", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1 4 9 16 25 36 49 64 81]\n" + ] + } + ], + "source": [ + "squared=x**2\n", + "print(squared)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 490, + "status": "ok", + "timestamp": 1598027449631, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "A0T25q5bAzIw", + "outputId": "3d710c16-1d10-425b-b7c3-08f4ec63be9f", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1. 2. 3. 4. 5. 6. 7. 8. 9.]\n" + ] + } + ], + "source": [ + "sqrt=np.sqrt(squared)\n", + "print(sqrt)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 335, + "status": "ok", + "timestamp": 1598027458892, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "1HvrjH4fAzGd", + "outputId": "0ce031ac-c4fe-4b7a-d2a1-0ebf5faa1529", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01\n", + " 1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03\n", + " 8.10308393e+03]\n" + ] + } + ], + "source": [ + "z =np.exp(y)\n", + "print(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "-mj5Mwc2MmY2", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #3:\n", + "- Given the X and Y values below, obtain the distance between them\n", + "\n", + "```\n", + "X = [5, 7, 20]\n", + "Y = [9, 15, 4]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 426, + "status": "ok", + "timestamp": 1598027909595, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "LfvrGIb83zCA", + "outputId": "523bb184-661b-418c-be70-39119e22abcc", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[10.29563014 16.55294536 20.39607805]\n" + ] + } + ], + "source": [ + "X =np.array( [5, 7, 20])\n", + "Y =np.array( [9, 15, 4])\n", + "# distance=np.linalg.norm(X-Y)\n", + "distance=np.sqrt(X**2 +Y**2)\n", + "print((distance))\n", + "# X-Y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ZA-Yb20mCYLl", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #4: PERFORM ARRAYS SLICING AND INDEXING " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 448, + "status": "ok", + "timestamp": 1598028063099, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "Bu6rhQ99BFCp", + "outputId": "317a7aa3-95d1-4f4d-fc32-252f9438ee60", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 447, + "status": "ok", + "timestamp": 1598028069917, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "9TuEXJxgCdi_", + "outputId": "c815002a-1009-4516-ff3d-a0a762ef50ba", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1 2 4 5 67 80]\n" + ] + } + ], + "source": [ + "# Access specific index from the numpy array\n", + "my_nparray=np.array([1,2,4,5,67,80])\n", + "print(my_nparray)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 436, + "status": "ok", + "timestamp": 1598028098676, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "EjisujRgCdh4", + "outputId": "ccd890ba-d245-41df-8f64-370196a50f12", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 4]\n" + ] + } + ], + "source": [ + "# Starting from the first index 0 up until and NOT including the last element\n", + "# my_nparray[0:-1]\n", + "my_nparray[0]\n", + "print(my_nparray[0:3])" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 463, + "status": "ok", + "timestamp": 1598028112470, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "1nWlP602Cdg3", + "outputId": "808392cd-5fe1-4bf6-dd2e-466a42ffd8bc", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 7 7 7 7 67 80]\n" + ] + } + ], + "source": [ + "# Broadcasting, altering several values in a numpy array at once\n", + "my_nparray[0:4]=7\n", + "print(my_nparray)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 101 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 492, + "status": "ok", + "timestamp": 1598028132718, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "fVKbwHguCddn", + "outputId": "966bcd20-2eda-4c60-dd0b-be36b71bfab2", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[4 6 1 4]\n", + " [9 9 5 7]\n", + " [9 4 6 3]\n", + " [3 3 8 7]]\n" + ] + } + ], + "source": [ + "# Let's define a two dimensional numpy array\n", + "matrix=np.random.randint(1,10,(4,4))\n", + "print(matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 439, + "status": "ok", + "timestamp": 1598028135101, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "Ea7ukwEzCda-", + "outputId": "65c5dbb6-43ee-4910-cf2d-6d2841023369", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[4 6 1 4]\n", + "[3 3 8 7]\n" + ] + } + ], + "source": [ + "# Get a row from a mtrix\n", + "print(matrix[0])\n", + "print(matrix[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 534, + "status": "ok", + "timestamp": 1598028148280, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "8KJk10J8Ci8k", + "outputId": "d533fa0a-9de0-4a2b-e5b0-1005a782ce5b", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "1" + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get one element\n", + "matrix[0][2]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zgyFPV5R8dUS", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #4:\n", + "- In the following matrix, replace the last row with 0\n", + "\n", + "```\n", + "X = [2 30 20 -2 -4]\n", + " [3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [25 45 34 22 12]\n", + " [13 24 22 32 37]\n", + "```\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 2 30 20 -2 -4]\n", + " [ 3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [25 45 34 22 12]]\n", + "[[ 2 30 20 -2 -4]\n", + " [ 3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [ 0 0 0 0 0]]\n" + ] + } + ], + "source": [ + "X=np.array([[2,30,20,-2,-4],[3,4,40,-3,-2],[-3,4,-6,90,10],[25, 45, 34, 22, 12]])\n", + "print(X)\n", + "X[-1]=0\n", + "print(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "juuMOEq7Col_", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #5: PERFORM ELEMENTS SELECTION (CONDITIONAL)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 101 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 522, + "status": "ok", + "timestamp": 1598028251687, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "UujCvuhMCt4K", + "outputId": "8e605106-5cdf-4026-a3bc-1a9716e0a23a", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[8 9 8 7 8]\n", + " [3 4 3 7 9]\n", + " [7 5 1 1 3]\n", + " [5 5 3 9 5]\n", + " [9 1 4 9 5]]\n" + ] + } + ], + "source": [ + "matrix=np.random.randint(1,10,(5,5))\n", + "print(matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 475, + "status": "ok", + "timestamp": 1598028252674, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "JBgJ8ugyCvht", + "outputId": "19222d58-5192-41c0-9177-9700365640f8", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[8 9 8 8 9 9 9 9]\n" + ] + } + ], + "source": [ + "new_matrix=matrix[matrix > 7]\n", + "print((new_matrix))" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 501, + "status": "ok", + "timestamp": 1598028255328, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "TIQ-A7IBCvgl", + "outputId": "339c27ed-4291-4e10-a1d6-0cc5857e1a93", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[9 7 3 3 7 9 7 5 1 1 3 5 5 3 9 5 9 1 9 5]\n" + ] + } + ], + "source": [ + "# Obtain odd elements only\n", + "new_matrix =matrix[matrix % 2 ==1]\n", + "# print(np.mean(new_matrix))\n", + "print(new_matrix)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "39QiMh-W8ogs", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #5:\n", + "- In the following matrix, replace negative elements by 0 and replace odd elements with -2\n", + "\n", + "\n", + "```\n", + "X = [2 30 20 -2 -4]\n", + " [3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [25 45 34 22 12]\n", + " [13 24 22 32 37]\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 2 30 20 -2 -4]\n", + " [ 3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [25 45 34 22 12]\n", + " [13 24 22 32 37]]\n" + ] + } + ], + "source": [ + "X=np.array([[2,30,20,-2,-4],[3,4,40,-3,-2],[-3,4,-6,90,10],[25,45,34,22,12,],[13,24,22,32,37]])\n", + "print(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 2 30 20 0 0]\n", + " [-2 4 40 0 0]\n", + " [ 0 4 0 90 10]\n", + " [-2 -2 34 22 12]\n", + " [-2 24 22 32 -2]]\n" + ] + } + ], + "source": [ + "X[X<0]=0\n", + "X[ X%2 ==1]=-2\n", + "print(X)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "AbN-vySf9gen", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #6: UNDERSTAND PANDAS FUNDAMENTALS" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Pandas is a data manipulation and analysis tool that is built on Numpy.\n", + "# Pandas uses a data structure known as DataFrame (think of it as Microsoft excel in Python). \n", + "# DataFrames empower programmers to store and manipulate data in a tabular fashion (rows and columns).\n", + "# Series Vs. DataFrame? Series is considered a single column of a DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 rayan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDBank Client NameNet Worth[$]Years with bank
0111Chanel35003
1222Steve290004
2333Mitch100009
3444rayan20005
\n
" + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's define a two-dimensional Pandas DataFrame\n", + "# Note that you can create a pandas dataframe from a python dictionary\n", + "bank_client_df =pd.DataFrame({'Bank Client ID':[111,222,333,444],'Bank Client Name':['Chanel','Steve','Mitch','rayan'],\n", + "'Net Worth[$]':[3500,29000,10000,2000],\n", + " 'Years with bank':[3,4,9,5]})\n", + "bank_client_df" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Net Worth[$] Years with bank\ncount 4.000000 4.000000 4.000000\nmean 277.500000 11125.000000 5.250000\nstd 143.300384 12412.191587 2.629956\nmin 111.000000 2000.000000 3.000000\n25% 194.250000 3125.000000 3.750000\n50% 277.500000 6750.000000 4.500000\n75% 360.750000 14750.000000 6.000000\nmax 444.000000 29000.000000 9.000000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDNet Worth[$]Years with bank
count4.0000004.0000004.000000
mean277.50000011125.0000005.250000
std143.30038412412.1915872.629956
min111.0000002000.0000003.000000
25%194.2500003125.0000003.750000
50%277.5000006750.0000004.500000
75%360.75000014750.0000006.000000
max444.00000029000.0000009.000000
\n
" + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bank_client_df.describe()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "pandas.core.frame.DataFrame" + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's obtain the data type \n", + "type(bank_client_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDBank Client NameNet Worth[$]Years with bank
0111Chanel35003
1222Steve290004
\n
" + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can only view the first couple of rows using .head()\n", + "bank_client_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n2 333 Mitch 10000 9\n3 444 rayan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDBank Client NameNet Worth[$]Years with bank
2333Mitch100009
3444rayan20005
\n
" + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can only view the last couple of rows using .tail()\n", + "bank_client_df.tail(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #6:\n", + "- A porfolio contains a collection of securities such as stocks, bonds and ETFs. Define a dataframe named 'portfolio_df' that holds 3 different stock ticker symbols, number of shares, and price per share (feel free to choose any stocks)\n", + "- Calculate the total value of the porfolio including all stocks" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " stock ticker symbol price per share [$] Number of stocks\n0 AAPL 3500 3\n1 AMZN 200 4\n2 T 40 9", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
stock ticker symbolprice per share [$]Number of stocks
0AAPL35003
1AMZN2004
2T409
\n
" + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "portfolio_df=pd.DataFrame({'stock ticker symbol':['AAPL','AMZN','T'],\n", + " 'price per share [$]':[3500,200,40],\n", + " 'Number of stocks':[3,4,9]})\n", + "portfolio_df" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "outputs": [ + { + "data": { + "text/plain": "11660" + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stocks_value=portfolio_df['price per share [$]']*portfolio_df['Number of stocks']\n", + "stocks_value.sum()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #7: PANDAS WITH CSV AND HTML DATA" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " City \\\n0 Vancouver, BC \n1 Toronto, Ont \n2 Ottawa, Ont \n3 Calgary, Alb \n4 Montreal, Que \n5 Halifax, NS \n6 Regina, Sask \n7 Fredericton, NB \n8 (adsbygoogle = window.adsbygoogle || []).push(... \n\n Average House Price \\\n0 $1,036,000 \n1 $870,000 \n2 $479,000 \n3 $410,000 \n4 $435,000 \n5 $331,000 \n6 $254,000 \n7 $198,000 \n8 (adsbygoogle = window.adsbygoogle || []).push(... \n\n 12 Month Change \n0 + 2.63 % \n1 +10.2 % \n2 + 15.4 % \n3 – 1.5 % \n4 + 9.3 % \n5 + 3.6 % \n6 – 3.9 % \n7 – 4.3 % \n8 (adsbygoogle = window.adsbygoogle || []).push(... ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CityAverage House Price12 Month Change
0Vancouver, BC$1,036,000+ 2.63 %
1Toronto, Ont$870,000+10.2 %
2Ottawa, Ont$479,000+ 15.4 %
3Calgary, Alb$410,000– 1.5 %
4Montreal, Que$435,000+ 9.3 %
5Halifax, NS$331,000+ 3.6 %
6Regina, Sask$254,000– 3.9 %
7Fredericton, NB$198,000– 4.3 %
8(adsbygoogle = window.adsbygoogle || []).push(...(adsbygoogle = window.adsbygoogle || []).push(...(adsbygoogle = window.adsbygoogle || []).push(...
\n
" + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pandas is used to read a csv file and store data in a DataFrame\n", + "house_price_df=pd.read_html('https://www.livingin-canada.com/house-prices-canada.html')\n", + "house_price_df[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Province \\\n0 British Columbia \n1 Ontario \n2 Alberta \n3 Quebec \n4 Manitoba \n5 Saskatchewan \n6 Nova Scotia \n7 Prince Edward Island \n8 Newfoundland / Labrador \n9 New Brunswick \n10 Canadian Average \n11 (adsbygoogle = window.adsbygoogle || []).push(... \n\n Average House Price \\\n0 $736,000 \n1 $594,000 \n2 $353,000 \n3 $340,000 \n4 $295,000 \n5 $271,000 \n6 $266,000 \n7 $243,000 \n8 $236,000 \n9 $183,000 \n10 $488,000 \n11 (adsbygoogle = window.adsbygoogle || []).push(... \n\n 12 Month Change \n0 + 7.6 % \n1 – 3.2 % \n2 – 7.5 % \n3 + 7.6 % \n4 – 1.4 % \n5 – 3.8 % \n6 + 3.5 % \n7 + 3.0 % \n8 – 1.6 % \n9 – 2.2 % \n10 – 1.3 % \n11 (adsbygoogle = window.adsbygoogle || []).push(... ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ProvinceAverage House Price12 Month Change
0British Columbia$736,000+ 7.6 %
1Ontario$594,000– 3.2 %
2Alberta$353,000– 7.5 %
3Quebec$340,000+ 7.6 %
4Manitoba$295,000– 1.4 %
5Saskatchewan$271,000– 3.8 %
6Nova Scotia$266,000+ 3.5 %
7Prince Edward Island$243,000+ 3.0 %
8Newfoundland / Labrador$236,000– 1.6 %
9New Brunswick$183,000– 2.2 %
10Canadian Average$488,000– 1.3 %
11(adsbygoogle = window.adsbygoogle || []).push(...(adsbygoogle = window.adsbygoogle || []).push(...(adsbygoogle = window.adsbygoogle || []).push(...
\n
" + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "house_price_df[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Read tabular data using read_html\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #7:\n", + "- Write a code that uses Pandas to read tabular US retirement data\n", + "- You can use data from here: https://www.ssa.gov/oact/progdata/nra.html " + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Year of birth \\\n0 1937 and prior \n1 1938 \n2 1939 \n3 1940 \n4 1941 \n5 1942 \n6 1943-54 \n7 1955 \n8 1956 \n9 1957 \n10 1958 \n11 1959 \n12 1960 and later \n13 Notes: 1. Persons born on January 1 of any yea... \n\n Age \n0 65 \n1 65 and 2 months \n2 65 and 4 months \n3 65 and 6 months \n4 65 and 8 months \n5 65 and 10 months \n6 66 \n7 66 and 2 months \n8 66 and 4 months \n9 66 and 6 months \n10 66 and 8 months \n11 66 and 10 months \n12 67 \n13 Notes: 1. Persons born on January 1 of any yea... ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Year of birthAge
01937 and prior65
1193865 and 2 months
2193965 and 4 months
3194065 and 6 months
4194165 and 8 months
5194265 and 10 months
61943-5466
7195566 and 2 months
8195666 and 4 months
9195766 and 6 months
10195866 and 8 months
11195966 and 10 months
121960 and later67
13Notes: 1. Persons born on January 1 of any yea...Notes: 1. Persons born on January 1 of any yea...
\n
" + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "riterement_df=pd.read_html('https://www.ssa.gov/oact/progdata/nra.html')\n", + "riterement_df[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #8: PANDAS OPERATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 rayan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDBank Client NameNet Worth[$]Years with bank
0111Chanel35003
1222Steve290004
2333Mitch100009
3444rayan20005
\n
" + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's define a dataframe as follows:\n", + "bank_client_df =pd.DataFrame({'Bank Client ID':[111,222,333,444],'Bank Client Name':['Chanel','Steve','Mitch','rayan'],\n", + " 'Net Worth[$]':[3500,29000,10000,2000],\n", + " 'Years with bank':[3,4,9,5]})\n", + "bank_client_df" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n2 333 Mitch 10000 9\n3 444 rayan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDBank Client NameNet Worth[$]Years with bank
2333Mitch100009
3444rayan20005
\n
" + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_loyal=bank_client_df[bank_client_df['Years with bank']>=5]\n", + "df_loyal" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Pick certain rows that satisfy a certain criteria \n" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client Name Net Worth[$] Years with bank\n0 Chanel 3500 3\n1 Steve 29000 4\n2 Mitch 10000 9\n3 rayan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client NameNet Worth[$]Years with bank
0Chanel35003
1Steve290004
2Mitch100009
3rayan20005
\n
" + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Delete a column from a DataFrame\n", + "del bank_client_df['Bank Client ID']\n", + "bank_client_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #8:\n", + "- Using \"bank_client_df\" DataFrame, leverage pandas operations to only select high networth individuals with minimum $5000 \n", + "- What is the combined networth for all customers with 5000+ networth?" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client Name Net Worth[$] Years with bank\n1 Steve 29000 4\n2 Mitch 10000 9", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client NameNet Worth[$]Years with bank
1Steve290004
2Mitch100009
\n
" + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "networth_df=bank_client_df[bank_client_df['Net Worth[$]']>=5000]\n", + "networth_df" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "outputs": [ + { + "data": { + "text/plain": "39000" + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "networth_df['Net Worth[$]'].sum()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #9: PANDAS WITH FUNCTIONS" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 Ryan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank client IDBank Client NameNet worth [$]Years with bank
0111Chanel35003
1222Steve290004
2333Mitch100009
3444Ryan20005
\n
" + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's define a dataframe as follows:\n", + "bank_client_df = pd.DataFrame({'Bank client ID':[111, 222, 333, 444], \n", + " 'Bank Client Name':['Chanel', 'Steve', 'Mitch', 'Ryan'], \n", + " 'Net worth [$]':[3500, 29000, 10000, 2000], \n", + " 'Years with bank':[3, 4, 9, 5]})\n", + "bank_client_df" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Define a function that increases all clients networth (stocks) by a fixed value of 20% (for simplicity sake) \n", + "def networth(balance):\n", + " return balance *1.2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "0 4200.0\n1 34800.0\n2 12000.0\n3 2400.0\nName: Net worth [$], dtype: float64" + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can apply a function to the DataFrame \n", + "bank_client_df['Net worth [$]'].apply(networth)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "0 6\n1 5\n2 5\n3 4\nName: Bank Client Name, dtype: int64" + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bank_client_df['Bank Client Name'].apply(len)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #9:\n", + "- Define a function that triples the stock prices and adds $200\n", + "- Apply the function to the DataFrame\n", + "- Calculate the updated total networth of all clients combined" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "0 10700\n1 87200\n2 30200\n3 6200\nName: Net worth [$], dtype: int64" + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def triple_stock(price):\n", + " return price*3 +200\n", + "bank_client_df['Net worth [$]'].apply(triple_stock)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #10: PERFORM SORTING AND ORDERING IN PANDAS" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 Ryan 2000 5", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank client IDBank Client NameNet worth [$]Years with bank
0111Chanel35003
1222Steve290004
2333Mitch100009
3444Ryan20005
\n
" + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's define a dataframe as follows:\n", + "bank_client_df = pd.DataFrame({'Bank client ID':[111, 222, 333, 444], \n", + " 'Bank Client Name':['Chanel', 'Steve', 'Mitch', 'Ryan'], \n", + " 'Net worth [$]':[3500, 29000, 10000, 2000], \n", + " 'Years with bank':[3, 4, 9, 5]})\n", + "bank_client_df" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n3 444 Ryan 2000 5\n2 333 Mitch 10000 9", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank client IDBank Client NameNet worth [$]Years with bank
0111Chanel35003
1222Steve290004
3444Ryan20005
2333Mitch100009
\n
" + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can sort the values in the dataframe according to number of years with bank\n", + "bank_client_df.sort_values(by='Years with bank')" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Note that nothing changed in memory! you have to make sure that inplace is set to True\n", + "bank_client_df.sort_values(by='Years with bank',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n3 444 Ryan 2000 5\n2 333 Mitch 10000 9", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank client IDBank Client NameNet worth [$]Years with bank
0111Chanel35003
1222Steve290004
3444Ryan20005
2333Mitch100009
\n
" + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set inplace = True to ensure that change has taken place in memory \n", + "bank_client_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Note that now the change (ordering) took place \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #11: PERFORM CONCATENATING AND MERGING WITH PANDAS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Check this out: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "df1=pd.DataFrame({'A':['A0','A1','A2','A3'],\n", + " 'B':['B0','B1','B2','B3'],\n", + " 'C':['C0','C1','C2','C3'],\n", + " 'D':['D0','D1','D2','D3']},\n", + " index=[0,1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " A B C D\n0 A0 B0 C0 D0\n1 A1 B1 C1 D1\n2 A2 B2 C2 D2\n3 A3 B3 C3 D3", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
\n
" + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " A B C D\n0 A4 B4 C4 D4\n1 A5 B5 C5 D5\n2 A6 B6 C6 D6\n3 A7 B7 C7 D7", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
0A4B4C4D4
1A5B5C5D5
2A6B6C6D6
3A7B7C7D7
\n
" + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2=pd.DataFrame({'A':['A4','A5','A6','A7'],\n", + " 'B':['B4','B5','B6','B7'],\n", + " 'C':['C4','C5','C6','C7'],\n", + " 'D':['D4','D5','D6','D7']},\n", + " index=[0,1,2,3])\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " A B C D\n0 A0 B0 C0 D0\n1 A1 B1 C1 D1\n2 A2 B2 C2 D2\n3 A3 B3 C3 D3\n0 A4 B4 C4 D4\n1 A5 B5 C5 D5\n2 A6 B6 C6 D6\n3 A7 B7 C7 D7", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
0A4B4C4D4
1A5B5C5D5
2A6B6C6D6
3A7B7C7D7
\n
" + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TASK #12: PROJECT AND CONCLUDING REMARKS" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "- Define a dataframe named 'Bank_df_1' that contains the first and last names for 5 bank clients with IDs = 1, 2, 3, 4, 5 \n", + "- Assume that the bank got 5 new clients, define another dataframe named 'Bank_df_2' that contains a new clients with IDs = 6, 7, 8, 9, 10\n", + "- Let's assume we obtained additional information (Annual Salary) about all our bank customers (10 customers) \n", + "- Concatenate both 'bank_df_1' and 'bank_df_2' dataframes\n", + "- Merge client names and their newly added salary information using the 'Bank Client ID'\n", + "- Let's assume that you became a new client to the bank\n", + "- Define a new DataFrame that contains your information such as client ID (choose 11), first name, last name, and annual salary.\n", + "- Add this new dataframe to the original dataframe 'bank_df_all'." + ] + }, + { + "cell_type": "code", + "execution_count": 200, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID First Name Last Name\n0 1 Nancy Rob\n1 2 Alex Ali\n2 3 Shep George\n3 4 Max Mitch\n4 5 Allen Steve", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDFirst NameLast Name
01NancyRob
12AlexAli
23ShepGeorge
34MaxMitch
45AllenSteve
\n
" + }, + "execution_count": 200, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'],\n", + " 'First Name': ['Nancy', 'Alex', 'Shep', 'Max', 'Allen'],\n", + " 'Last Name': ['Rob', 'Ali', 'George', 'Mitch', 'Steve']}\n", + "bank_df_1=pd.DataFrame(raw_data,columns=['Bank Client ID','First Name','Last Name'])\n", + "# bank_df_1=pd.DataFrame(raw_data)\n", + "bank_df_1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 201, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID First Name Last Name\n0 6 Bill Christian\n1 7 Dina Mo\n2 8 Sarah Steve\n3 9 Heather Bob\n4 10 Holly Michelle", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDFirst NameLast Name
06BillChristian
17DinaMo
28SarahSteve
39HeatherBob
410HollyMichelle
\n
" + }, + "execution_count": 201, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = {\n", + " 'Bank Client ID': ['6', '7', '8', '9', '10'],\n", + " 'First Name': ['Bill', 'Dina', 'Sarah', 'Heather', 'Holly'],\n", + " 'Last Name': ['Christian', 'Mo', 'Steve', 'Bob', 'Michelle']}\n", + "bank_df_2=pd.DataFrame(raw_data,columns=['Bank Client ID','First Name','Last Name'])\n", + "bank_df_2" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 202, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID Annual Salary [$/year]\n0 1 25000\n1 2 35000\n2 3 45000\n3 4 48000\n4 5 49000\n5 6 32000\n6 7 33000\n7 8 34000\n8 9 23000\n9 10 22000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDAnnual Salary [$/year]
0125000
1235000
2345000
3448000
4549000
5632000
6733000
7834000
8923000
91022000
\n
" + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = {\n", + " 'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],\n", + " 'Annual Salary [$/year]': [25000, 35000, 45000, 48000, 49000, 32000, 33000, 34000, 23000, 22000]}\n", + "bank_df_salary=pd.DataFrame(raw_data,columns=['Bank Client ID','Annual Salary [$/year]'])\n", + "bank_df_salary" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 204, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID First Name Last Name\n0 1 Nancy Rob\n1 2 Alex Ali\n2 3 Shep George\n3 4 Max Mitch\n4 5 Allen Steve\n0 6 Bill Christian\n1 7 Dina Mo\n2 8 Sarah Steve\n3 9 Heather Bob\n4 10 Holly Michelle", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDFirst NameLast Name
01NancyRob
12AlexAli
23ShepGeorge
34MaxMitch
45AllenSteve
06BillChristian
17DinaMo
28SarahSteve
39HeatherBob
410HollyMichelle
\n
" + }, + "execution_count": 204, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bank_df_all=pd.concat([bank_df_1,bank_df_2])\n", + "bank_df_all" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 205, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID First Name Last Name Annual Salary [$/year]\n0 1 Nancy Rob 25000\n1 2 Alex Ali 35000\n2 3 Shep George 45000\n3 4 Max Mitch 48000\n4 5 Allen Steve 49000\n5 6 Bill Christian 32000\n6 7 Dina Mo 33000\n7 8 Sarah Steve 34000\n8 9 Heather Bob 23000\n9 10 Holly Michelle 22000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDFirst NameLast NameAnnual Salary [$/year]
01NancyRob25000
12AlexAli35000
23ShepGeorge45000
34MaxMitch48000
45AllenSteve49000
56BillChristian32000
67DinaMo33000
78SarahSteve34000
89HeatherBob23000
910HollyMichelle22000
\n
" + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bank_df_all=pd.merge(bank_df_all,bank_df_salary,on='Bank Client ID')\n", + "bank_df_all" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 206, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID First Name Last Name Annual Salary [$/year]\n0 11 Rayan Ahemd 5000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDFirst NameLast NameAnnual Salary [$/year]
011RayanAhemd5000
\n
" + }, + "execution_count": 206, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_client={'Bank Client ID':['11'],\n", + " 'First Name':['Rayan'],\n", + " 'Last Name':['Ahemd'],\n", + " 'Annual Salary [$/year]':[5000]}\n", + "new_client_df = pd.DataFrame(new_client, columns = ['Bank Client ID', 'First Name', 'Last Name','Annual Salary [$/year]'])\n", + "new_client_df" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 208, + "outputs": [ + { + "data": { + "text/plain": " Bank Client ID First Name Last Name Annual Salary [$/year]\n0 1 Nancy Rob 25000\n1 2 Alex Ali 35000\n2 3 Shep George 45000\n3 4 Max Mitch 48000\n4 5 Allen Steve 49000\n5 6 Bill Christian 32000\n6 7 Dina Mo 33000\n7 8 Sarah Steve 34000\n8 9 Heather Bob 23000\n9 10 Holly Michelle 22000\n0 11 Rayan Ahemd 5000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Bank Client IDFirst NameLast NameAnnual Salary [$/year]
01NancyRob25000
12AlexAli35000
23ShepGeorge45000
34MaxMitch48000
45AllenSteve49000
56BillChristian32000
67DinaMo33000
78SarahSteve34000
89HeatherBob23000
910HollyMichelle22000
011RayanAhemd5000
\n
" + }, + "execution_count": 208, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df=pd.concat([bank_df_all,new_client_df],axis=0)\n", + "new_df" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "dakv7iPFgw7Y", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# EXCELLENT JOB!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pNWQrMz6KnIF", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# MINI CHALLENGES SOLUTIONS" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6AMqSvtAKj6o", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #1 SOLUTION: \n", + "- Write a code that creates the following 2x4 numpy array\n", + "\n", + "```\n", + "[[3 7 9 3] \n", + "[4 3 2 2]]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ivvFzDy-KlQP", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "x = np.array([[[3, 7, 9, 3] , [4, 3, 2, 2]]])\n", + "x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2GskzNkOMFu0", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #2 SOLUTION:\n", + "- Write a code that takes in a positive integer \"x\" from the user and creates a 1x10 array with random numbers ranging from 0 to \"x\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 4089, + "status": "ok", + "timestamp": 1598026853681, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "0vnDfvwFMFu3", + "outputId": "0a7008bb-7820-42a6-d942-6e5f22b38672", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "x = int(input(\"Please enter a positive integer value: \"))\n", + "x = np.random.randint(1, x, 10)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tIVL0uEr4xsC", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "s1p4WaNB4yFf", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #3 SOLUTION:\n", + "- Given the X and Y values below, obtain the distance between them\n", + "\n", + "\n", + "```\n", + "X = [5, 7, 20]\n", + "Y = [9, 15, 4]\n", + "```\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 481, + "status": "ok", + "timestamp": 1598027988068, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "_GJmf2Pl4yFi", + "outputId": "6aed42b8-e013-4461-e0ac-343555153b51", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X = np.array([5, 7, 20])\n", + "Y = np.array([9, 15, 4])\n", + "Z = np.sqrt(X**2 + Y**2)\n", + "Z" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "EZ9i2CLm6Iuf", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #4 SOLUTION:\n", + "- In the following matrix, replace the last row with 0\n", + "\n", + "```\n", + "X = [2 30 20 -2 -4]\n", + " [3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [25 45 34 22 12]\n", + " [13 24 22 32 37]\n", + "```\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 356, + "status": "ok", + "timestamp": 1598028573756, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "qB7S_wIx6Hsm", + "outputId": "176ca693-b427-4447-cd56-6cef02f8cbca", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X = np.array([[2, 30, 20, -2, -4],\n", + " [3, 4, 40, -3, -2],\n", + " [-3, 4, -6, 90, 10],\n", + " [25, 45, 34, 22, 12],\n", + " [13, 24, 22, 32, 37]])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 307, + "status": "ok", + "timestamp": 1598028590866, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "y98hmGRe7Qtl", + "outputId": "ef92bacd-6440-411d-c860-aa99fbb705fa", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X[4] = 0\n", + "X" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2oikl6JS9YUs", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #5 SOLUTION:\n", + "- In the following matrix, replace negative elements by 0 and replace odd elements with -2\n", + "\n", + "\n", + "```\n", + "X = [2 30 20 -2 -4]\n", + " [3 4 40 -3 -2]\n", + " [-3 4 -6 90 10]\n", + " [25 45 34 22 12]\n", + " [13 24 22 32 37]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 346, + "status": "ok", + "timestamp": 1598029110658, + "user": { + "displayName": "Stemplicity", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64", + "userId": "10668071569687886186" + }, + "user_tz": 240 + }, + "id": "h105KzGr9YUu", + "outputId": "b5b352dd-e609-45f0-9571-73f25505b2b3", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X = np.array([[2, 30, 20, -2, -4],\n", + " [3, 4, 40, -3, -2],\n", + " [-3, 4, -6, 90, 10],\n", + " [25, 45, 34, 22, 12],\n", + " [13, 24, 22, 32, 37]])\n", + "\n", + "X[X<0] = 0\n", + "X[X%2==1] = -2\n", + "X" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #6 SOLUTION:\n", + "- A porfolio contains a collection of securities such as stocks, bonds and ETFs. Define a dataframe named 'portfolio_df' that holds 3 different stock ticker symbols, number of shares, and price per share (feel free to choose any stocks)\n", + "- Calculate the total value of the porfolio including all stocks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "portfolio_df = pd.DataFrame({'stock ticker symbols':['AAPL', 'AMZN', 'T'],\n", + " 'price per share [$]':[3500, 200, 40], \n", + " 'Number of stocks':[3, 4, 9]})\n", + "portfolio_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "stocks_dollar_value = portfolio_df['price per share [$]'] * portfolio_df['Number of stocks']\n", + "print(stocks_dollar_value)\n", + "print('Total portfolio value = {}'.format(stocks_dollar_value.sum()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #7 SOLUTION:\n", + "- Write a code that uses Pandas to read tabular US retirement data\n", + "- You can use data from here: https://www.ssa.gov/oact/progdata/nra.html " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Read tabular data using read_html\n", + "retirement_age_df = pd.read_html('https://www.ssa.gov/oact/progdata/nra.html')\n", + "retirement_age_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #8 SOLUTION:\n", + "- Using \"bank_client_df\" DataFrame, leverage pandas operations to only select high networth individuals with minimum $5000 \n", + "- What is the combined networth for all customers with 5000+ networth?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "df_high_networth = bank_client_df[ (bank_client_df['Net worth [$]'] >= 5000) ]\n", + "df_high_networth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "df_high_networth['Net worth [$]'].sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "MINI CHALLENGE #9 SOLUTION:\n", + "- Define a function that triples the stock prices and adds $200\n", + "- Apply the function to the DataFrame\n", + "- Calculate the updated total networth of all clients combined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def networth_update(balance):\n", + " return balance * 3 + 200 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# You can apply a function to the DataFrame \n", + "results = bank_client_df['Net worth [$]'].apply(networth_update)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "results.sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "PROJECT SOLUTION:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Creating a dataframe from a dictionary\n", + "# Let's define a dataframe with a list of bank clients with IDs = 1, 2, 3, 4, 5 \n", + "\n", + "raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'],\n", + " 'First Name': ['Nancy', 'Alex', 'Shep', 'Max', 'Allen'], \n", + " 'Last Name': ['Rob', 'Ali', 'George', 'Mitch', 'Steve']}\n", + "\n", + "Bank_df_1 = pd.DataFrame(raw_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])\n", + "Bank_df_1\n", + "\n", + "\n", + "# Let's define another dataframe for a separate list of clients (IDs = 6, 7, 8, 9, 10)\n", + "raw_data = {\n", + " 'Bank Client ID': ['6', '7', '8', '9', '10'],\n", + " 'First Name': ['Bill', 'Dina', 'Sarah', 'Heather', 'Holly'], \n", + " 'Last Name': ['Christian', 'Mo', 'Steve', 'Bob', 'Michelle']}\n", + "Bank_df_2 = pd.DataFrame(raw_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])\n", + "Bank_df_2\n", + "\n", + "\n", + "# Let's assume we obtained additional information (Annual Salary) about our bank customers \n", + "# Note that data obtained is for all clients with IDs 1 to 10 \n", + "raw_data = {\n", + " 'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],\n", + " 'Annual Salary [$/year]': [25000, 35000, 45000, 48000, 49000, 32000, 33000, 34000, 23000, 22000]}\n", + "bank_df_salary = pd.DataFrame(raw_data, columns = ['Bank Client ID','Annual Salary [$/year]'])\n", + "bank_df_salary\n", + "\n", + "\n", + "# Let's concatenate both dataframes #1 and #2\n", + "# Note that we now have client IDs from 1 to 10\n", + "bank_df_all = pd.concat([Bank_df_1, Bank_df_2])\n", + "bank_df_all\n", + "\n", + "\n", + "# Let's merge all data on 'Bank Client ID'\n", + "bank_df_all = pd.merge(bank_df_all, bank_df_salary, on = 'Bank Client ID')\n", + "bank_df_all\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_client = {\n", + " 'Bank Client ID': ['11'],\n", + " 'First Name': ['Ry'], \n", + " 'Last Name': ['Aly'],\n", + " 'Annual Salary [$/year]' : [1000]}\n", + "new_client_df = pd.DataFrame(new_client, columns = ['Bank Client ID', 'First Name', 'Last Name', 'Annual Salary [$/year]'])\n", + "new_client_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_df = pd.concat([bank_df_all, new_client_df], axis = 0)\n", + "new_df" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "7. Python 101 - Python Libraries for Data Analysis - Numpy Solution.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file