From 2713c46aeb0e249eade8b841976ec3a800a7215f Mon Sep 17 00:00:00 2001
From: Nadun Dilhara <48908773+ndilhara@users.noreply.github.com>
Date: Tue, 25 Oct 2022 02:29:05 +0530
Subject: [PATCH] Numpy & Pandas practice
---
...for Data Analysis - Numpy and Pandas.ipynb | 3300 +++++++++++++++++
1 file changed, 3300 insertions(+)
create mode 100644 Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb
diff --git a/Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb b/Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb
new file mode 100644
index 00000000..612fa98e
--- /dev/null
+++ b/Numpy & Pandas/Python 101 - Python Libraries for Data Analysis - Numpy and Pandas.ipynb
@@ -0,0 +1,3300 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "f7yOpaRM5IbG",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #1: DEFINE SINGLE AND MULTI-DIMENSIONAL NUMPY ARRAYS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 525,
+ "status": "ok",
+ "timestamp": 1598026431247,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "Bqk10u47qEmJ",
+ "outputId": "6d0f8ddd-fcf1-4ca2-9531-cdd013622565",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 2 3 4 5 6]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": "numpy.ndarray"
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# NumPy is a Linear Algebra Library used for multidimensional arrays\n",
+ "# NumPy brings the best of two worlds: (1) C/Fortran computational efficiency, (2) Python language easy syntax \n",
+ "list_1=[1,2,3,4,5,6]\n",
+ "list_1=np.array(list_1)\n",
+ "# Let's define a one-dimensional array \n",
+ "array=np.array([1,2,34,5])\n",
+ "print(list_1)\n",
+ "# list_1\n",
+ "type(list_1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 476,
+ "status": "ok",
+ "timestamp": 1598026434403,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "b98d-lkK5NbQ",
+ "outputId": "29441c2a-7312-40ab-b879-3768dfc9f581",
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Let's create a numpy array from the list \"my_list\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 433,
+ "status": "ok",
+ "timestamp": 1598026452462,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "tljNDWBw5Nf9",
+ "outputId": "5171039b-3bfe-4796-dfc7-5db17eb2cc2e",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 50
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 482,
+ "status": "ok",
+ "timestamp": 1598026468861,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "wpiZ25fZ5NiN",
+ "outputId": "c7d16231-d02d-4908-9d84-1236b8c202b8",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "array([[ 2, 3, 4, 5],\n [ 5, 6, 78, 5]])"
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Multi-dimensional (Matrix definition)\n",
+ "my_matrix=np.array([[2,3,4,5],[5,6,78,5]])\n",
+ "my_matrix\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "tmB_S-JfHZRL",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #1: \n",
+ "- Write a code that creates the following 2x4 numpy array\n",
+ "\n",
+ "```\n",
+ "[[3 7 9 3] \n",
+ "[4 3 2 2]]\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "Nabjzl1oKtGX",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[3 7 9 3]\n",
+ " [4 3 2 2]]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": "numpy.ndarray"
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c1=np.array([[3,7,9,3],[4,3,2,2]])\n",
+ "print(c1)\n",
+ "type(c1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "uBuqdDQM6Qd9",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #2: LEVERAGE NUMPY BUILT-IN METHODS AND FUNCTIONS "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 475,
+ "status": "ok",
+ "timestamp": 1598026506288,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "yXQV-Nrg5No3",
+ "outputId": "5237733f-68da-43a1-a25c-3671274bfd1a",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "array([0.98591448, 0.53657714, 0.56200152, 0.09515122, 0.16745552,\n 0.82449263, 0.12779976, 0.97491658, 0.27277207, 0.535552 ,\n 0.31059964, 0.09386708, 0.87096615, 0.89026602, 0.55110742,\n 0.95584194, 0.26116079, 0.01461415, 0.59956196, 0.61365036])"
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# \"rand()\" uniform distribution between 0 and 1\n",
+ "x=np.random.rand(20)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 101
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 439,
+ "status": "ok",
+ "timestamp": 1598026560564,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "PpJUCdB15NkT",
+ "outputId": "c89e0cb1-e897-4ae6-c96b-1563c658c20f",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "array([[0.65571287, 0.83242738, 0.51282503],\n [0.79868025, 0.36705005, 0.69031891],\n [0.5634533 , 0.5681617 , 0.05276662]])"
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# you can create a matrix of random number as well\n",
+ "x=np.random.rand(3,3)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 413,
+ "status": "ok",
+ "timestamp": 1598026584111,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "RBcU5xgb6YL0",
+ "outputId": "c5bf0eeb-44f7-4d01-dfaf-9d3ef21a0c31",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "33"
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# \"randint\" is used to generate random integers between upper and lower bounds\n",
+ "x= np.random.randint(1,50)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 360,
+ "status": "ok",
+ "timestamp": 1598026643201,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "O3JdVtvo6YP7",
+ "outputId": "ff2a258d-6e98-4383-a10e-a3a1325ea5b2",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[17 95 80 85 68]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# \"randint\" can be used to generate a certain number of random itegers as follows\n",
+ "x=np.random.randint(1,100,5)\n",
+ "print(x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 463,
+ "status": "ok",
+ "timestamp": 1598026648633,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "EOOLMB416YUh",
+ "outputId": "bb0d4be7-1ed0-44e8-80bb-a139d2bceb71",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])"
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# np.arange creates an evenly spaced values within a given interval\n",
+ "x=np.arange(1, 50)\n",
+ "# print(x)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 101
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 428,
+ "status": "ok",
+ "timestamp": 1598026659940,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "sft1flaM6YXF",
+ "outputId": "4d82d2af-0943-4e05-92f4-112a4ab078d1",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[1. 0. 0. 0. 0. 0. 0.]\n",
+ " [0. 1. 0. 0. 0. 0. 0.]\n",
+ " [0. 0. 1. 0. 0. 0. 0.]\n",
+ " [0. 0. 0. 1. 0. 0. 0.]\n",
+ " [0. 0. 0. 0. 1. 0. 0.]\n",
+ " [0. 0. 0. 0. 0. 1. 0.]\n",
+ " [0. 0. 0. 0. 0. 0. 1.]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# create a diagonal of ones and zeros everywhere else\n",
+ "x=np.eye(7)\n",
+ "print(x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 269
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 429,
+ "status": "ok",
+ "timestamp": 1598026673247,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "VQ8fp8Qb6feB",
+ "outputId": "d030f2cb-6339-4d2a-8ea0-467f02dfb2b6",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[1. 1. 1. 1. 1. 1. 1.]\n",
+ " [1. 1. 1. 1. 1. 1. 1.]\n",
+ " [1. 1. 1. 1. 1. 1. 1.]\n",
+ " [1. 1. 1. 1. 1. 1. 1.]\n",
+ " [1. 1. 1. 1. 1. 1. 1.]\n",
+ " [1. 1. 1. 1. 1. 1. 1.]\n",
+ " [1. 1. 1. 1. 1. 1. 1.]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Matrix of ones\n",
+ "x=np.ones((7,7))\n",
+ "print(x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 234,
+ "status": "ok",
+ "timestamp": 1598026680567,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "TTROXKYo6YSn",
+ "outputId": "ba7288e2-2ed5-411d-da07-0f7f3c315bac",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[0. 0. 0.]\n",
+ " [0. 0. 0.]\n",
+ " [0. 0. 0.]\n",
+ " [0. 0. 0.]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Array of zeros\n",
+ "x=np.zeros((4,3))\n",
+ "print(x)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "IGiQ87EjKzh-",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #2:\n",
+ "- Write a code that takes in a positive integer \"x\" from the user and creates a 1x10 array with random numbers ranging from 0 to \"x\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "xg1mPZKKML5j",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[5 1 5 5 3 6 7 7 5 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "num=int(input(\"enter value_ \"))\n",
+ "x=np.random.randint(1,10,num)\n",
+ "print(x)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "VRyk_VYQArV3",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #3: PERFORM MATHEMATICAL OPERATIONS IN NUMPY"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 599,
+ "status": "ok",
+ "timestamp": 1598027357814,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "vm-gmaQG-Vvd",
+ "outputId": "27258be3-3de6-4679-9dbf-b972d368c2a6",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 2 3 4 5 6 7 8 9] [1 2 3 4 5 6 7 8 9]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# np.arange() returns an evenly spaced values within a given interval\n",
+ "x=np.arange(1, 10)\n",
+ "y=np.arange(1,10)\n",
+ "print(x,y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 426,
+ "status": "ok",
+ "timestamp": 1598027414005,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "ED0OSzpw2p9R",
+ "outputId": "ab08c55e-5fb6-4701-cccc-69676029603f",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 2 4 6 8 10 12 14 16 18]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Add 2 numpy arrays together\n",
+ "sum=x+y\n",
+ "print(sum)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 409,
+ "status": "ok",
+ "timestamp": 1598027429464,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "TH6xTJcuAzK-",
+ "outputId": "5953a19a-6d59-4c71-9f9f-d5e9350900a5",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 1 4 9 16 25 36 49 64 81]\n"
+ ]
+ }
+ ],
+ "source": [
+ "squared=x**2\n",
+ "print(squared)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 490,
+ "status": "ok",
+ "timestamp": 1598027449631,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "A0T25q5bAzIw",
+ "outputId": "3d710c16-1d10-425b-b7c3-08f4ec63be9f",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1. 2. 3. 4. 5. 6. 7. 8. 9.]\n"
+ ]
+ }
+ ],
+ "source": [
+ "sqrt=np.sqrt(squared)\n",
+ "print(sqrt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 335,
+ "status": "ok",
+ "timestamp": 1598027458892,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "1HvrjH4fAzGd",
+ "outputId": "0ce031ac-c4fe-4b7a-d2a1-0ebf5faa1529",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01\n",
+ " 1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03\n",
+ " 8.10308393e+03]\n"
+ ]
+ }
+ ],
+ "source": [
+ "z =np.exp(y)\n",
+ "print(z)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "-mj5Mwc2MmY2",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #3:\n",
+ "- Given the X and Y values below, obtain the distance between them\n",
+ "\n",
+ "```\n",
+ "X = [5, 7, 20]\n",
+ "Y = [9, 15, 4]\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 426,
+ "status": "ok",
+ "timestamp": 1598027909595,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "LfvrGIb83zCA",
+ "outputId": "523bb184-661b-418c-be70-39119e22abcc",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[10.29563014 16.55294536 20.39607805]\n"
+ ]
+ }
+ ],
+ "source": [
+ "X =np.array( [5, 7, 20])\n",
+ "Y =np.array( [9, 15, 4])\n",
+ "# distance=np.linalg.norm(X-Y)\n",
+ "distance=np.sqrt(X**2 +Y**2)\n",
+ "print((distance))\n",
+ "# X-Y"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "ZA-Yb20mCYLl",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #4: PERFORM ARRAYS SLICING AND INDEXING "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 448,
+ "status": "ok",
+ "timestamp": 1598028063099,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "Bu6rhQ99BFCp",
+ "outputId": "317a7aa3-95d1-4f4d-fc32-252f9438ee60",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 447,
+ "status": "ok",
+ "timestamp": 1598028069917,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "9TuEXJxgCdi_",
+ "outputId": "c815002a-1009-4516-ff3d-a0a762ef50ba",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 1 2 4 5 67 80]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Access specific index from the numpy array\n",
+ "my_nparray=np.array([1,2,4,5,67,80])\n",
+ "print(my_nparray)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 436,
+ "status": "ok",
+ "timestamp": 1598028098676,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "EjisujRgCdh4",
+ "outputId": "ccd890ba-d245-41df-8f64-370196a50f12",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 2 4]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Starting from the first index 0 up until and NOT including the last element\n",
+ "# my_nparray[0:-1]\n",
+ "my_nparray[0]\n",
+ "print(my_nparray[0:3])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 463,
+ "status": "ok",
+ "timestamp": 1598028112470,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "1nWlP602Cdg3",
+ "outputId": "808392cd-5fe1-4bf6-dd2e-466a42ffd8bc",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 7 7 7 7 67 80]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Broadcasting, altering several values in a numpy array at once\n",
+ "my_nparray[0:4]=7\n",
+ "print(my_nparray)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 101
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 492,
+ "status": "ok",
+ "timestamp": 1598028132718,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "fVKbwHguCddn",
+ "outputId": "966bcd20-2eda-4c60-dd0b-be36b71bfab2",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[4 6 1 4]\n",
+ " [9 9 5 7]\n",
+ " [9 4 6 3]\n",
+ " [3 3 8 7]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Let's define a two dimensional numpy array\n",
+ "matrix=np.random.randint(1,10,(4,4))\n",
+ "print(matrix)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 439,
+ "status": "ok",
+ "timestamp": 1598028135101,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "Ea7ukwEzCda-",
+ "outputId": "65c5dbb6-43ee-4910-cf2d-6d2841023369",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[4 6 1 4]\n",
+ "[3 3 8 7]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get a row from a mtrix\n",
+ "print(matrix[0])\n",
+ "print(matrix[-1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 534,
+ "status": "ok",
+ "timestamp": 1598028148280,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "8KJk10J8Ci8k",
+ "outputId": "d533fa0a-9de0-4a2b-e5b0-1005a782ce5b",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "1"
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get one element\n",
+ "matrix[0][2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "zgyFPV5R8dUS",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #4:\n",
+ "- In the following matrix, replace the last row with 0\n",
+ "\n",
+ "```\n",
+ "X = [2 30 20 -2 -4]\n",
+ " [3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [25 45 34 22 12]\n",
+ " [13 24 22 32 37]\n",
+ "```\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[ 2 30 20 -2 -4]\n",
+ " [ 3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [25 45 34 22 12]]\n",
+ "[[ 2 30 20 -2 -4]\n",
+ " [ 3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [ 0 0 0 0 0]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "X=np.array([[2,30,20,-2,-4],[3,4,40,-3,-2],[-3,4,-6,90,10],[25, 45, 34, 22, 12]])\n",
+ "print(X)\n",
+ "X[-1]=0\n",
+ "print(X)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "juuMOEq7Col_",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #5: PERFORM ELEMENTS SELECTION (CONDITIONAL)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 101
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 522,
+ "status": "ok",
+ "timestamp": 1598028251687,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "UujCvuhMCt4K",
+ "outputId": "8e605106-5cdf-4026-a3bc-1a9716e0a23a",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[8 9 8 7 8]\n",
+ " [3 4 3 7 9]\n",
+ " [7 5 1 1 3]\n",
+ " [5 5 3 9 5]\n",
+ " [9 1 4 9 5]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "matrix=np.random.randint(1,10,(5,5))\n",
+ "print(matrix)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 475,
+ "status": "ok",
+ "timestamp": 1598028252674,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "JBgJ8ugyCvht",
+ "outputId": "19222d58-5192-41c0-9177-9700365640f8",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[8 9 8 8 9 9 9 9]\n"
+ ]
+ }
+ ],
+ "source": [
+ "new_matrix=matrix[matrix > 7]\n",
+ "print((new_matrix))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 501,
+ "status": "ok",
+ "timestamp": 1598028255328,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "TIQ-A7IBCvgl",
+ "outputId": "339c27ed-4291-4e10-a1d6-0cc5857e1a93",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[9 7 3 3 7 9 7 5 1 1 3 5 5 3 9 5 9 1 9 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Obtain odd elements only\n",
+ "new_matrix =matrix[matrix % 2 ==1]\n",
+ "# print(np.mean(new_matrix))\n",
+ "print(new_matrix)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "39QiMh-W8ogs",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #5:\n",
+ "- In the following matrix, replace negative elements by 0 and replace odd elements with -2\n",
+ "\n",
+ "\n",
+ "```\n",
+ "X = [2 30 20 -2 -4]\n",
+ " [3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [25 45 34 22 12]\n",
+ " [13 24 22 32 37]\n",
+ "```\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[ 2 30 20 -2 -4]\n",
+ " [ 3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [25 45 34 22 12]\n",
+ " [13 24 22 32 37]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "X=np.array([[2,30,20,-2,-4],[3,4,40,-3,-2],[-3,4,-6,90,10],[25,45,34,22,12,],[13,24,22,32,37]])\n",
+ "print(X)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[ 2 30 20 0 0]\n",
+ " [-2 4 40 0 0]\n",
+ " [ 0 4 0 90 10]\n",
+ " [-2 -2 34 22 12]\n",
+ " [-2 24 22 32 -2]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "X[X<0]=0\n",
+ "X[ X%2 ==1]=-2\n",
+ "print(X)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "AbN-vySf9gen",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #6: UNDERSTAND PANDAS FUNDAMENTALS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Pandas is a data manipulation and analysis tool that is built on Numpy.\n",
+ "# Pandas uses a data structure known as DataFrame (think of it as Microsoft excel in Python). \n",
+ "# DataFrames empower programmers to store and manipulate data in a tabular fashion (rows and columns).\n",
+ "# Series Vs. DataFrame? Series is considered a single column of a DataFrame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 131,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 rayan 2000 5",
+ "text/html": "
\n\n
\n \n \n | \n Bank Client ID | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n 444 | \n rayan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 131,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's define a two-dimensional Pandas DataFrame\n",
+ "# Note that you can create a pandas dataframe from a python dictionary\n",
+ "bank_client_df =pd.DataFrame({'Bank Client ID':[111,222,333,444],'Bank Client Name':['Chanel','Steve','Mitch','rayan'],\n",
+ "'Net Worth[$]':[3500,29000,10000,2000],\n",
+ " 'Years with bank':[3,4,9,5]})\n",
+ "bank_client_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Net Worth[$] Years with bank\ncount 4.000000 4.000000 4.000000\nmean 277.500000 11125.000000 5.250000\nstd 143.300384 12412.191587 2.629956\nmin 111.000000 2000.000000 3.000000\n25% 194.250000 3125.000000 3.750000\n50% 277.500000 6750.000000 4.500000\n75% 360.750000 14750.000000 6.000000\nmax 444.000000 29000.000000 9.000000",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n count | \n 4.000000 | \n 4.000000 | \n 4.000000 | \n
\n \n mean | \n 277.500000 | \n 11125.000000 | \n 5.250000 | \n
\n \n std | \n 143.300384 | \n 12412.191587 | \n 2.629956 | \n
\n \n min | \n 111.000000 | \n 2000.000000 | \n 3.000000 | \n
\n \n 25% | \n 194.250000 | \n 3125.000000 | \n 3.750000 | \n
\n \n 50% | \n 277.500000 | \n 6750.000000 | \n 4.500000 | \n
\n \n 75% | \n 360.750000 | \n 14750.000000 | \n 6.000000 | \n
\n \n max | \n 444.000000 | \n 29000.000000 | \n 9.000000 | \n
\n \n
\n
"
+ },
+ "execution_count": 141,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bank_client_df.describe()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "pandas.core.frame.DataFrame"
+ },
+ "execution_count": 132,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's obtain the data type \n",
+ "type(bank_client_df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n
\n
"
+ },
+ "execution_count": 135,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# you can only view the first couple of rows using .head()\n",
+ "bank_client_df.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n2 333 Mitch 10000 9\n3 444 rayan 2000 5",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n 444 | \n rayan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 136,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# you can only view the last couple of rows using .tail()\n",
+ "bank_client_df.tail(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #6:\n",
+ "- A porfolio contains a collection of securities such as stocks, bonds and ETFs. Define a dataframe named 'portfolio_df' that holds 3 different stock ticker symbols, number of shares, and price per share (feel free to choose any stocks)\n",
+ "- Calculate the total value of the porfolio including all stocks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " stock ticker symbol price per share [$] Number of stocks\n0 AAPL 3500 3\n1 AMZN 200 4\n2 T 40 9",
+ "text/html": "\n\n
\n \n \n | \n stock ticker symbol | \n price per share [$] | \n Number of stocks | \n
\n \n \n \n 0 | \n AAPL | \n 3500 | \n 3 | \n
\n \n 1 | \n AMZN | \n 200 | \n 4 | \n
\n \n 2 | \n T | \n 40 | \n 9 | \n
\n \n
\n
"
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "portfolio_df=pd.DataFrame({'stock ticker symbol':['AAPL','AMZN','T'],\n",
+ " 'price per share [$]':[3500,200,40],\n",
+ " 'Number of stocks':[3,4,9]})\n",
+ "portfolio_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "11660"
+ },
+ "execution_count": 140,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stocks_value=portfolio_df['price per share [$]']*portfolio_df['Number of stocks']\n",
+ "stocks_value.sum()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #7: PANDAS WITH CSV AND HTML DATA"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " City \\\n0 Vancouver, BC \n1 Toronto, Ont \n2 Ottawa, Ont \n3 Calgary, Alb \n4 Montreal, Que \n5 Halifax, NS \n6 Regina, Sask \n7 Fredericton, NB \n8 (adsbygoogle = window.adsbygoogle || []).push(... \n\n Average House Price \\\n0 $1,036,000 \n1 $870,000 \n2 $479,000 \n3 $410,000 \n4 $435,000 \n5 $331,000 \n6 $254,000 \n7 $198,000 \n8 (adsbygoogle = window.adsbygoogle || []).push(... \n\n 12 Month Change \n0 + 2.63 % \n1 +10.2 % \n2 + 15.4 % \n3 – 1.5 % \n4 + 9.3 % \n5 + 3.6 % \n6 – 3.9 % \n7 – 4.3 % \n8 (adsbygoogle = window.adsbygoogle || []).push(... ",
+ "text/html": "\n\n
\n \n \n | \n City | \n Average House Price | \n 12 Month Change | \n
\n \n \n \n 0 | \n Vancouver, BC | \n $1,036,000 | \n + 2.63 % | \n
\n \n 1 | \n Toronto, Ont | \n $870,000 | \n +10.2 % | \n
\n \n 2 | \n Ottawa, Ont | \n $479,000 | \n + 15.4 % | \n
\n \n 3 | \n Calgary, Alb | \n $410,000 | \n – 1.5 % | \n
\n \n 4 | \n Montreal, Que | \n $435,000 | \n + 9.3 % | \n
\n \n 5 | \n Halifax, NS | \n $331,000 | \n + 3.6 % | \n
\n \n 6 | \n Regina, Sask | \n $254,000 | \n – 3.9 % | \n
\n \n 7 | \n Fredericton, NB | \n $198,000 | \n – 4.3 % | \n
\n \n 8 | \n (adsbygoogle = window.adsbygoogle || []).push(... | \n (adsbygoogle = window.adsbygoogle || []).push(... | \n (adsbygoogle = window.adsbygoogle || []).push(... | \n
\n \n
\n
"
+ },
+ "execution_count": 144,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Pandas is used to read a csv file and store data in a DataFrame\n",
+ "house_price_df=pd.read_html('https://www.livingin-canada.com/house-prices-canada.html')\n",
+ "house_price_df[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Province \\\n0 British Columbia \n1 Ontario \n2 Alberta \n3 Quebec \n4 Manitoba \n5 Saskatchewan \n6 Nova Scotia \n7 Prince Edward Island \n8 Newfoundland / Labrador \n9 New Brunswick \n10 Canadian Average \n11 (adsbygoogle = window.adsbygoogle || []).push(... \n\n Average House Price \\\n0 $736,000 \n1 $594,000 \n2 $353,000 \n3 $340,000 \n4 $295,000 \n5 $271,000 \n6 $266,000 \n7 $243,000 \n8 $236,000 \n9 $183,000 \n10 $488,000 \n11 (adsbygoogle = window.adsbygoogle || []).push(... \n\n 12 Month Change \n0 + 7.6 % \n1 – 3.2 % \n2 – 7.5 % \n3 + 7.6 % \n4 – 1.4 % \n5 – 3.8 % \n6 + 3.5 % \n7 + 3.0 % \n8 – 1.6 % \n9 – 2.2 % \n10 – 1.3 % \n11 (adsbygoogle = window.adsbygoogle || []).push(... ",
+ "text/html": "\n\n
\n \n \n | \n Province | \n Average House Price | \n 12 Month Change | \n
\n \n \n \n 0 | \n British Columbia | \n $736,000 | \n + 7.6 % | \n
\n \n 1 | \n Ontario | \n $594,000 | \n – 3.2 % | \n
\n \n 2 | \n Alberta | \n $353,000 | \n – 7.5 % | \n
\n \n 3 | \n Quebec | \n $340,000 | \n + 7.6 % | \n
\n \n 4 | \n Manitoba | \n $295,000 | \n – 1.4 % | \n
\n \n 5 | \n Saskatchewan | \n $271,000 | \n – 3.8 % | \n
\n \n 6 | \n Nova Scotia | \n $266,000 | \n + 3.5 % | \n
\n \n 7 | \n Prince Edward Island | \n $243,000 | \n + 3.0 % | \n
\n \n 8 | \n Newfoundland / Labrador | \n $236,000 | \n – 1.6 % | \n
\n \n 9 | \n New Brunswick | \n $183,000 | \n – 2.2 % | \n
\n \n 10 | \n Canadian Average | \n $488,000 | \n – 1.3 % | \n
\n \n 11 | \n (adsbygoogle = window.adsbygoogle || []).push(... | \n (adsbygoogle = window.adsbygoogle || []).push(... | \n (adsbygoogle = window.adsbygoogle || []).push(... | \n
\n \n
\n
"
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "house_price_df[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Read tabular data using read_html\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #7:\n",
+ "- Write a code that uses Pandas to read tabular US retirement data\n",
+ "- You can use data from here: https://www.ssa.gov/oact/progdata/nra.html "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 148,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Year of birth \\\n0 1937 and prior \n1 1938 \n2 1939 \n3 1940 \n4 1941 \n5 1942 \n6 1943-54 \n7 1955 \n8 1956 \n9 1957 \n10 1958 \n11 1959 \n12 1960 and later \n13 Notes: 1. Persons born on January 1 of any yea... \n\n Age \n0 65 \n1 65 and 2 months \n2 65 and 4 months \n3 65 and 6 months \n4 65 and 8 months \n5 65 and 10 months \n6 66 \n7 66 and 2 months \n8 66 and 4 months \n9 66 and 6 months \n10 66 and 8 months \n11 66 and 10 months \n12 67 \n13 Notes: 1. Persons born on January 1 of any yea... ",
+ "text/html": "\n\n
\n \n \n | \n Year of birth | \n Age | \n
\n \n \n \n 0 | \n 1937 and prior | \n 65 | \n
\n \n 1 | \n 1938 | \n 65 and 2 months | \n
\n \n 2 | \n 1939 | \n 65 and 4 months | \n
\n \n 3 | \n 1940 | \n 65 and 6 months | \n
\n \n 4 | \n 1941 | \n 65 and 8 months | \n
\n \n 5 | \n 1942 | \n 65 and 10 months | \n
\n \n 6 | \n 1943-54 | \n 66 | \n
\n \n 7 | \n 1955 | \n 66 and 2 months | \n
\n \n 8 | \n 1956 | \n 66 and 4 months | \n
\n \n 9 | \n 1957 | \n 66 and 6 months | \n
\n \n 10 | \n 1958 | \n 66 and 8 months | \n
\n \n 11 | \n 1959 | \n 66 and 10 months | \n
\n \n 12 | \n 1960 and later | \n 67 | \n
\n \n 13 | \n Notes: 1. Persons born on January 1 of any yea... | \n Notes: 1. Persons born on January 1 of any yea... | \n
\n \n
\n
"
+ },
+ "execution_count": 148,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "riterement_df=pd.read_html('https://www.ssa.gov/oact/progdata/nra.html')\n",
+ "riterement_df[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #8: PANDAS OPERATIONS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 rayan 2000 5",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n 444 | \n rayan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 149,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's define a dataframe as follows:\n",
+ "bank_client_df =pd.DataFrame({'Bank Client ID':[111,222,333,444],'Bank Client Name':['Chanel','Steve','Mitch','rayan'],\n",
+ " 'Net Worth[$]':[3500,29000,10000,2000],\n",
+ " 'Years with bank':[3,4,9,5]})\n",
+ "bank_client_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Bank Client Name Net Worth[$] Years with bank\n2 333 Mitch 10000 9\n3 444 rayan 2000 5",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n 444 | \n rayan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 150,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_loyal=bank_client_df[bank_client_df['Years with bank']>=5]\n",
+ "df_loyal"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Pick certain rows that satisfy a certain criteria \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 151,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client Name Net Worth[$] Years with bank\n0 Chanel 3500 3\n1 Steve 29000 4\n2 Mitch 10000 9\n3 rayan 2000 5",
+ "text/html": "\n\n
\n \n \n | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 0 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n Steve | \n 29000 | \n 4 | \n
\n \n 2 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n rayan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 151,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Delete a column from a DataFrame\n",
+ "del bank_client_df['Bank Client ID']\n",
+ "bank_client_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #8:\n",
+ "- Using \"bank_client_df\" DataFrame, leverage pandas operations to only select high networth individuals with minimum $5000 \n",
+ "- What is the combined networth for all customers with 5000+ networth?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client Name Net Worth[$] Years with bank\n1 Steve 29000 4\n2 Mitch 10000 9",
+ "text/html": "\n\n
\n \n \n | \n Bank Client Name | \n Net Worth[$] | \n Years with bank | \n
\n \n \n \n 1 | \n Steve | \n 29000 | \n 4 | \n
\n \n 2 | \n Mitch | \n 10000 | \n 9 | \n
\n \n
\n
"
+ },
+ "execution_count": 156,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "networth_df=bank_client_df[bank_client_df['Net Worth[$]']>=5000]\n",
+ "networth_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 157,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "39000"
+ },
+ "execution_count": 157,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "networth_df['Net Worth[$]'].sum()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #9: PANDAS WITH FUNCTIONS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 158,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 Ryan 2000 5",
+ "text/html": "\n\n
\n \n \n | \n Bank client ID | \n Bank Client Name | \n Net worth [$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n 444 | \n Ryan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 158,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's define a dataframe as follows:\n",
+ "bank_client_df = pd.DataFrame({'Bank client ID':[111, 222, 333, 444], \n",
+ " 'Bank Client Name':['Chanel', 'Steve', 'Mitch', 'Ryan'], \n",
+ " 'Net worth [$]':[3500, 29000, 10000, 2000], \n",
+ " 'Years with bank':[3, 4, 9, 5]})\n",
+ "bank_client_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 160,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Define a function that increases all clients networth (stocks) by a fixed value of 20% (for simplicity sake) \n",
+ "def networth(balance):\n",
+ " return balance *1.2\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "0 4200.0\n1 34800.0\n2 12000.0\n3 2400.0\nName: Net worth [$], dtype: float64"
+ },
+ "execution_count": 161,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# You can apply a function to the DataFrame \n",
+ "bank_client_df['Net worth [$]'].apply(networth)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 162,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "0 6\n1 5\n2 5\n3 4\nName: Bank Client Name, dtype: int64"
+ },
+ "execution_count": 162,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bank_client_df['Bank Client Name'].apply(len)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #9:\n",
+ "- Define a function that triples the stock prices and adds $200\n",
+ "- Apply the function to the DataFrame\n",
+ "- Calculate the updated total networth of all clients combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 164,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "0 10700\n1 87200\n2 30200\n3 6200\nName: Net worth [$], dtype: int64"
+ },
+ "execution_count": 164,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def triple_stock(price):\n",
+ " return price*3 +200\n",
+ "bank_client_df['Net worth [$]'].apply(triple_stock)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #10: PERFORM SORTING AND ORDERING IN PANDAS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 165,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n2 333 Mitch 10000 9\n3 444 Ryan 2000 5",
+ "text/html": "\n\n
\n \n \n | \n Bank client ID | \n Bank Client Name | \n Net worth [$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n 3 | \n 444 | \n Ryan | \n 2000 | \n 5 | \n
\n \n
\n
"
+ },
+ "execution_count": 165,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's define a dataframe as follows:\n",
+ "bank_client_df = pd.DataFrame({'Bank client ID':[111, 222, 333, 444], \n",
+ " 'Bank Client Name':['Chanel', 'Steve', 'Mitch', 'Ryan'], \n",
+ " 'Net worth [$]':[3500, 29000, 10000, 2000], \n",
+ " 'Years with bank':[3, 4, 9, 5]})\n",
+ "bank_client_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 166,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n3 444 Ryan 2000 5\n2 333 Mitch 10000 9",
+ "text/html": "\n\n
\n \n \n | \n Bank client ID | \n Bank Client Name | \n Net worth [$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n 3 | \n 444 | \n Ryan | \n 2000 | \n 5 | \n
\n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n
\n
"
+ },
+ "execution_count": 166,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# You can sort the values in the dataframe according to number of years with bank\n",
+ "bank_client_df.sort_values(by='Years with bank')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 167,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Note that nothing changed in memory! you have to make sure that inplace is set to True\n",
+ "bank_client_df.sort_values(by='Years with bank',inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 168,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank client ID Bank Client Name Net worth [$] Years with bank\n0 111 Chanel 3500 3\n1 222 Steve 29000 4\n3 444 Ryan 2000 5\n2 333 Mitch 10000 9",
+ "text/html": "\n\n
\n \n \n | \n Bank client ID | \n Bank Client Name | \n Net worth [$] | \n Years with bank | \n
\n \n \n \n 0 | \n 111 | \n Chanel | \n 3500 | \n 3 | \n
\n \n 1 | \n 222 | \n Steve | \n 29000 | \n 4 | \n
\n \n 3 | \n 444 | \n Ryan | \n 2000 | \n 5 | \n
\n \n 2 | \n 333 | \n Mitch | \n 10000 | \n 9 | \n
\n \n
\n
"
+ },
+ "execution_count": 168,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Set inplace = True to ensure that change has taken place in memory \n",
+ "bank_client_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Note that now the change (ordering) took place \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #11: PERFORM CONCATENATING AND MERGING WITH PANDAS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Check this out: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 169,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df1=pd.DataFrame({'A':['A0','A1','A2','A3'],\n",
+ " 'B':['B0','B1','B2','B3'],\n",
+ " 'C':['C0','C1','C2','C3'],\n",
+ " 'D':['D0','D1','D2','D3']},\n",
+ " index=[0,1,2,3])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 170,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " A B C D\n0 A0 B0 C0 D0\n1 A1 B1 C1 D1\n2 A2 B2 C2 D2\n3 A3 B3 C3 D3",
+ "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n A1 | \n B1 | \n C1 | \n D1 | \n
\n \n 2 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n 3 | \n A3 | \n B3 | \n C3 | \n D3 | \n
\n \n
\n
"
+ },
+ "execution_count": 170,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 173,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " A B C D\n0 A4 B4 C4 D4\n1 A5 B5 C5 D5\n2 A6 B6 C6 D6\n3 A7 B7 C7 D7",
+ "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n A4 | \n B4 | \n C4 | \n D4 | \n
\n \n 1 | \n A5 | \n B5 | \n C5 | \n D5 | \n
\n \n 2 | \n A6 | \n B6 | \n C6 | \n D6 | \n
\n \n 3 | \n A7 | \n B7 | \n C7 | \n D7 | \n
\n \n
\n
"
+ },
+ "execution_count": 173,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2=pd.DataFrame({'A':['A4','A5','A6','A7'],\n",
+ " 'B':['B4','B5','B6','B7'],\n",
+ " 'C':['C4','C5','C6','C7'],\n",
+ " 'D':['D4','D5','D6','D7']},\n",
+ " index=[0,1,2,3])\n",
+ "df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 174,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " A B C D\n0 A0 B0 C0 D0\n1 A1 B1 C1 D1\n2 A2 B2 C2 D2\n3 A3 B3 C3 D3\n0 A4 B4 C4 D4\n1 A5 B5 C5 D5\n2 A6 B6 C6 D6\n3 A7 B7 C7 D7",
+ "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n A1 | \n B1 | \n C1 | \n D1 | \n
\n \n 2 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n 3 | \n A3 | \n B3 | \n C3 | \n D3 | \n
\n \n 0 | \n A4 | \n B4 | \n C4 | \n D4 | \n
\n \n 1 | \n A5 | \n B5 | \n C5 | \n D5 | \n
\n \n 2 | \n A6 | \n B6 | \n C6 | \n D6 | \n
\n \n 3 | \n A7 | \n B7 | \n C7 | \n D7 | \n
\n \n
\n
"
+ },
+ "execution_count": 174,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1,df2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# TASK #12: PROJECT AND CONCLUDING REMARKS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "- Define a dataframe named 'Bank_df_1' that contains the first and last names for 5 bank clients with IDs = 1, 2, 3, 4, 5 \n",
+ "- Assume that the bank got 5 new clients, define another dataframe named 'Bank_df_2' that contains a new clients with IDs = 6, 7, 8, 9, 10\n",
+ "- Let's assume we obtained additional information (Annual Salary) about all our bank customers (10 customers) \n",
+ "- Concatenate both 'bank_df_1' and 'bank_df_2' dataframes\n",
+ "- Merge client names and their newly added salary information using the 'Bank Client ID'\n",
+ "- Let's assume that you became a new client to the bank\n",
+ "- Define a new DataFrame that contains your information such as client ID (choose 11), first name, last name, and annual salary.\n",
+ "- Add this new dataframe to the original dataframe 'bank_df_all'."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 200,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID First Name Last Name\n0 1 Nancy Rob\n1 2 Alex Ali\n2 3 Shep George\n3 4 Max Mitch\n4 5 Allen Steve",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n First Name | \n Last Name | \n
\n \n \n \n 0 | \n 1 | \n Nancy | \n Rob | \n
\n \n 1 | \n 2 | \n Alex | \n Ali | \n
\n \n 2 | \n 3 | \n Shep | \n George | \n
\n \n 3 | \n 4 | \n Max | \n Mitch | \n
\n \n 4 | \n 5 | \n Allen | \n Steve | \n
\n \n
\n
"
+ },
+ "execution_count": 200,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'],\n",
+ " 'First Name': ['Nancy', 'Alex', 'Shep', 'Max', 'Allen'],\n",
+ " 'Last Name': ['Rob', 'Ali', 'George', 'Mitch', 'Steve']}\n",
+ "bank_df_1=pd.DataFrame(raw_data,columns=['Bank Client ID','First Name','Last Name'])\n",
+ "# bank_df_1=pd.DataFrame(raw_data)\n",
+ "bank_df_1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 201,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID First Name Last Name\n0 6 Bill Christian\n1 7 Dina Mo\n2 8 Sarah Steve\n3 9 Heather Bob\n4 10 Holly Michelle",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n First Name | \n Last Name | \n
\n \n \n \n 0 | \n 6 | \n Bill | \n Christian | \n
\n \n 1 | \n 7 | \n Dina | \n Mo | \n
\n \n 2 | \n 8 | \n Sarah | \n Steve | \n
\n \n 3 | \n 9 | \n Heather | \n Bob | \n
\n \n 4 | \n 10 | \n Holly | \n Michelle | \n
\n \n
\n
"
+ },
+ "execution_count": 201,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = {\n",
+ " 'Bank Client ID': ['6', '7', '8', '9', '10'],\n",
+ " 'First Name': ['Bill', 'Dina', 'Sarah', 'Heather', 'Holly'],\n",
+ " 'Last Name': ['Christian', 'Mo', 'Steve', 'Bob', 'Michelle']}\n",
+ "bank_df_2=pd.DataFrame(raw_data,columns=['Bank Client ID','First Name','Last Name'])\n",
+ "bank_df_2"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 202,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID Annual Salary [$/year]\n0 1 25000\n1 2 35000\n2 3 45000\n3 4 48000\n4 5 49000\n5 6 32000\n6 7 33000\n7 8 34000\n8 9 23000\n9 10 22000",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n Annual Salary [$/year] | \n
\n \n \n \n 0 | \n 1 | \n 25000 | \n
\n \n 1 | \n 2 | \n 35000 | \n
\n \n 2 | \n 3 | \n 45000 | \n
\n \n 3 | \n 4 | \n 48000 | \n
\n \n 4 | \n 5 | \n 49000 | \n
\n \n 5 | \n 6 | \n 32000 | \n
\n \n 6 | \n 7 | \n 33000 | \n
\n \n 7 | \n 8 | \n 34000 | \n
\n \n 8 | \n 9 | \n 23000 | \n
\n \n 9 | \n 10 | \n 22000 | \n
\n \n
\n
"
+ },
+ "execution_count": 202,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = {\n",
+ " 'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],\n",
+ " 'Annual Salary [$/year]': [25000, 35000, 45000, 48000, 49000, 32000, 33000, 34000, 23000, 22000]}\n",
+ "bank_df_salary=pd.DataFrame(raw_data,columns=['Bank Client ID','Annual Salary [$/year]'])\n",
+ "bank_df_salary"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 204,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID First Name Last Name\n0 1 Nancy Rob\n1 2 Alex Ali\n2 3 Shep George\n3 4 Max Mitch\n4 5 Allen Steve\n0 6 Bill Christian\n1 7 Dina Mo\n2 8 Sarah Steve\n3 9 Heather Bob\n4 10 Holly Michelle",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n First Name | \n Last Name | \n
\n \n \n \n 0 | \n 1 | \n Nancy | \n Rob | \n
\n \n 1 | \n 2 | \n Alex | \n Ali | \n
\n \n 2 | \n 3 | \n Shep | \n George | \n
\n \n 3 | \n 4 | \n Max | \n Mitch | \n
\n \n 4 | \n 5 | \n Allen | \n Steve | \n
\n \n 0 | \n 6 | \n Bill | \n Christian | \n
\n \n 1 | \n 7 | \n Dina | \n Mo | \n
\n \n 2 | \n 8 | \n Sarah | \n Steve | \n
\n \n 3 | \n 9 | \n Heather | \n Bob | \n
\n \n 4 | \n 10 | \n Holly | \n Michelle | \n
\n \n
\n
"
+ },
+ "execution_count": 204,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bank_df_all=pd.concat([bank_df_1,bank_df_2])\n",
+ "bank_df_all"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 205,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID First Name Last Name Annual Salary [$/year]\n0 1 Nancy Rob 25000\n1 2 Alex Ali 35000\n2 3 Shep George 45000\n3 4 Max Mitch 48000\n4 5 Allen Steve 49000\n5 6 Bill Christian 32000\n6 7 Dina Mo 33000\n7 8 Sarah Steve 34000\n8 9 Heather Bob 23000\n9 10 Holly Michelle 22000",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n First Name | \n Last Name | \n Annual Salary [$/year] | \n
\n \n \n \n 0 | \n 1 | \n Nancy | \n Rob | \n 25000 | \n
\n \n 1 | \n 2 | \n Alex | \n Ali | \n 35000 | \n
\n \n 2 | \n 3 | \n Shep | \n George | \n 45000 | \n
\n \n 3 | \n 4 | \n Max | \n Mitch | \n 48000 | \n
\n \n 4 | \n 5 | \n Allen | \n Steve | \n 49000 | \n
\n \n 5 | \n 6 | \n Bill | \n Christian | \n 32000 | \n
\n \n 6 | \n 7 | \n Dina | \n Mo | \n 33000 | \n
\n \n 7 | \n 8 | \n Sarah | \n Steve | \n 34000 | \n
\n \n 8 | \n 9 | \n Heather | \n Bob | \n 23000 | \n
\n \n 9 | \n 10 | \n Holly | \n Michelle | \n 22000 | \n
\n \n
\n
"
+ },
+ "execution_count": 205,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bank_df_all=pd.merge(bank_df_all,bank_df_salary,on='Bank Client ID')\n",
+ "bank_df_all"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 206,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID First Name Last Name Annual Salary [$/year]\n0 11 Rayan Ahemd 5000",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n First Name | \n Last Name | \n Annual Salary [$/year] | \n
\n \n \n \n 0 | \n 11 | \n Rayan | \n Ahemd | \n 5000 | \n
\n \n
\n
"
+ },
+ "execution_count": 206,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_client={'Bank Client ID':['11'],\n",
+ " 'First Name':['Rayan'],\n",
+ " 'Last Name':['Ahemd'],\n",
+ " 'Annual Salary [$/year]':[5000]}\n",
+ "new_client_df = pd.DataFrame(new_client, columns = ['Bank Client ID', 'First Name', 'Last Name','Annual Salary [$/year]'])\n",
+ "new_client_df"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 208,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Bank Client ID First Name Last Name Annual Salary [$/year]\n0 1 Nancy Rob 25000\n1 2 Alex Ali 35000\n2 3 Shep George 45000\n3 4 Max Mitch 48000\n4 5 Allen Steve 49000\n5 6 Bill Christian 32000\n6 7 Dina Mo 33000\n7 8 Sarah Steve 34000\n8 9 Heather Bob 23000\n9 10 Holly Michelle 22000\n0 11 Rayan Ahemd 5000",
+ "text/html": "\n\n
\n \n \n | \n Bank Client ID | \n First Name | \n Last Name | \n Annual Salary [$/year] | \n
\n \n \n \n 0 | \n 1 | \n Nancy | \n Rob | \n 25000 | \n
\n \n 1 | \n 2 | \n Alex | \n Ali | \n 35000 | \n
\n \n 2 | \n 3 | \n Shep | \n George | \n 45000 | \n
\n \n 3 | \n 4 | \n Max | \n Mitch | \n 48000 | \n
\n \n 4 | \n 5 | \n Allen | \n Steve | \n 49000 | \n
\n \n 5 | \n 6 | \n Bill | \n Christian | \n 32000 | \n
\n \n 6 | \n 7 | \n Dina | \n Mo | \n 33000 | \n
\n \n 7 | \n 8 | \n Sarah | \n Steve | \n 34000 | \n
\n \n 8 | \n 9 | \n Heather | \n Bob | \n 23000 | \n
\n \n 9 | \n 10 | \n Holly | \n Michelle | \n 22000 | \n
\n \n 0 | \n 11 | \n Rayan | \n Ahemd | \n 5000 | \n
\n \n
\n
"
+ },
+ "execution_count": 208,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_df=pd.concat([bank_df_all,new_client_df],axis=0)\n",
+ "new_df"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "dakv7iPFgw7Y",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# EXCELLENT JOB!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "pNWQrMz6KnIF",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# MINI CHALLENGES SOLUTIONS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "6AMqSvtAKj6o",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #1 SOLUTION: \n",
+ "- Write a code that creates the following 2x4 numpy array\n",
+ "\n",
+ "```\n",
+ "[[3 7 9 3] \n",
+ "[4 3 2 2]]\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "ivvFzDy-KlQP",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x = np.array([[[3, 7, 9, 3] , [4, 3, 2, 2]]])\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "2GskzNkOMFu0",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #2 SOLUTION:\n",
+ "- Write a code that takes in a positive integer \"x\" from the user and creates a 1x10 array with random numbers ranging from 0 to \"x\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 4089,
+ "status": "ok",
+ "timestamp": 1598026853681,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "0vnDfvwFMFu3",
+ "outputId": "0a7008bb-7820-42a6-d942-6e5f22b38672",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x = int(input(\"Please enter a positive integer value: \"))\n",
+ "x = np.random.randint(1, x, 10)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "tIVL0uEr4xsC",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "s1p4WaNB4yFf",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #3 SOLUTION:\n",
+ "- Given the X and Y values below, obtain the distance between them\n",
+ "\n",
+ "\n",
+ "```\n",
+ "X = [5, 7, 20]\n",
+ "Y = [9, 15, 4]\n",
+ "```\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 481,
+ "status": "ok",
+ "timestamp": 1598027988068,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "_GJmf2Pl4yFi",
+ "outputId": "6aed42b8-e013-4461-e0ac-343555153b51",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "X = np.array([5, 7, 20])\n",
+ "Y = np.array([9, 15, 4])\n",
+ "Z = np.sqrt(X**2 + Y**2)\n",
+ "Z"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "EZ9i2CLm6Iuf",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #4 SOLUTION:\n",
+ "- In the following matrix, replace the last row with 0\n",
+ "\n",
+ "```\n",
+ "X = [2 30 20 -2 -4]\n",
+ " [3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [25 45 34 22 12]\n",
+ " [13 24 22 32 37]\n",
+ "```\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 356,
+ "status": "ok",
+ "timestamp": 1598028573756,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "qB7S_wIx6Hsm",
+ "outputId": "176ca693-b427-4447-cd56-6cef02f8cbca",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "X = np.array([[2, 30, 20, -2, -4],\n",
+ " [3, 4, 40, -3, -2],\n",
+ " [-3, 4, -6, 90, 10],\n",
+ " [25, 45, 34, 22, 12],\n",
+ " [13, 24, 22, 32, 37]])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 102
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 307,
+ "status": "ok",
+ "timestamp": 1598028590866,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "y98hmGRe7Qtl",
+ "outputId": "ef92bacd-6440-411d-c860-aa99fbb705fa",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "X[4] = 0\n",
+ "X"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "2oikl6JS9YUs",
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #5 SOLUTION:\n",
+ "- In the following matrix, replace negative elements by 0 and replace odd elements with -2\n",
+ "\n",
+ "\n",
+ "```\n",
+ "X = [2 30 20 -2 -4]\n",
+ " [3 4 40 -3 -2]\n",
+ " [-3 4 -6 90 10]\n",
+ " [25 45 34 22 12]\n",
+ " [13 24 22 32 37]\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 102
+ },
+ "colab_type": "code",
+ "executionInfo": {
+ "elapsed": 346,
+ "status": "ok",
+ "timestamp": 1598029110658,
+ "user": {
+ "displayName": "Stemplicity",
+ "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gj2vCFt-bD5mPBhUWX0QA6xiBgoXQXtwUm2jgjaCg=s64",
+ "userId": "10668071569687886186"
+ },
+ "user_tz": 240
+ },
+ "id": "h105KzGr9YUu",
+ "outputId": "b5b352dd-e609-45f0-9571-73f25505b2b3",
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "X = np.array([[2, 30, 20, -2, -4],\n",
+ " [3, 4, 40, -3, -2],\n",
+ " [-3, 4, -6, 90, 10],\n",
+ " [25, 45, 34, 22, 12],\n",
+ " [13, 24, 22, 32, 37]])\n",
+ "\n",
+ "X[X<0] = 0\n",
+ "X[X%2==1] = -2\n",
+ "X"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #6 SOLUTION:\n",
+ "- A porfolio contains a collection of securities such as stocks, bonds and ETFs. Define a dataframe named 'portfolio_df' that holds 3 different stock ticker symbols, number of shares, and price per share (feel free to choose any stocks)\n",
+ "- Calculate the total value of the porfolio including all stocks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "portfolio_df = pd.DataFrame({'stock ticker symbols':['AAPL', 'AMZN', 'T'],\n",
+ " 'price per share [$]':[3500, 200, 40], \n",
+ " 'Number of stocks':[3, 4, 9]})\n",
+ "portfolio_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "stocks_dollar_value = portfolio_df['price per share [$]'] * portfolio_df['Number of stocks']\n",
+ "print(stocks_dollar_value)\n",
+ "print('Total portfolio value = {}'.format(stocks_dollar_value.sum()))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #7 SOLUTION:\n",
+ "- Write a code that uses Pandas to read tabular US retirement data\n",
+ "- You can use data from here: https://www.ssa.gov/oact/progdata/nra.html "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Read tabular data using read_html\n",
+ "retirement_age_df = pd.read_html('https://www.ssa.gov/oact/progdata/nra.html')\n",
+ "retirement_age_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #8 SOLUTION:\n",
+ "- Using \"bank_client_df\" DataFrame, leverage pandas operations to only select high networth individuals with minimum $5000 \n",
+ "- What is the combined networth for all customers with 5000+ networth?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df_high_networth = bank_client_df[ (bank_client_df['Net worth [$]'] >= 5000) ]\n",
+ "df_high_networth"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df_high_networth['Net worth [$]'].sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "MINI CHALLENGE #9 SOLUTION:\n",
+ "- Define a function that triples the stock prices and adds $200\n",
+ "- Apply the function to the DataFrame\n",
+ "- Calculate the updated total networth of all clients combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def networth_update(balance):\n",
+ " return balance * 3 + 200 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# You can apply a function to the DataFrame \n",
+ "results = bank_client_df['Net worth [$]'].apply(networth_update)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "results.sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "PROJECT SOLUTION:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Creating a dataframe from a dictionary\n",
+ "# Let's define a dataframe with a list of bank clients with IDs = 1, 2, 3, 4, 5 \n",
+ "\n",
+ "raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'],\n",
+ " 'First Name': ['Nancy', 'Alex', 'Shep', 'Max', 'Allen'], \n",
+ " 'Last Name': ['Rob', 'Ali', 'George', 'Mitch', 'Steve']}\n",
+ "\n",
+ "Bank_df_1 = pd.DataFrame(raw_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])\n",
+ "Bank_df_1\n",
+ "\n",
+ "\n",
+ "# Let's define another dataframe for a separate list of clients (IDs = 6, 7, 8, 9, 10)\n",
+ "raw_data = {\n",
+ " 'Bank Client ID': ['6', '7', '8', '9', '10'],\n",
+ " 'First Name': ['Bill', 'Dina', 'Sarah', 'Heather', 'Holly'], \n",
+ " 'Last Name': ['Christian', 'Mo', 'Steve', 'Bob', 'Michelle']}\n",
+ "Bank_df_2 = pd.DataFrame(raw_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])\n",
+ "Bank_df_2\n",
+ "\n",
+ "\n",
+ "# Let's assume we obtained additional information (Annual Salary) about our bank customers \n",
+ "# Note that data obtained is for all clients with IDs 1 to 10 \n",
+ "raw_data = {\n",
+ " 'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],\n",
+ " 'Annual Salary [$/year]': [25000, 35000, 45000, 48000, 49000, 32000, 33000, 34000, 23000, 22000]}\n",
+ "bank_df_salary = pd.DataFrame(raw_data, columns = ['Bank Client ID','Annual Salary [$/year]'])\n",
+ "bank_df_salary\n",
+ "\n",
+ "\n",
+ "# Let's concatenate both dataframes #1 and #2\n",
+ "# Note that we now have client IDs from 1 to 10\n",
+ "bank_df_all = pd.concat([Bank_df_1, Bank_df_2])\n",
+ "bank_df_all\n",
+ "\n",
+ "\n",
+ "# Let's merge all data on 'Bank Client ID'\n",
+ "bank_df_all = pd.merge(bank_df_all, bank_df_salary, on = 'Bank Client ID')\n",
+ "bank_df_all\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "new_client = {\n",
+ " 'Bank Client ID': ['11'],\n",
+ " 'First Name': ['Ry'], \n",
+ " 'Last Name': ['Aly'],\n",
+ " 'Annual Salary [$/year]' : [1000]}\n",
+ "new_client_df = pd.DataFrame(new_client, columns = ['Bank Client ID', 'First Name', 'Last Name', 'Annual Salary [$/year]'])\n",
+ "new_client_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "new_df = pd.concat([bank_df_all, new_client_df], axis = 0)\n",
+ "new_df"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [],
+ "name": "7. Python 101 - Python Libraries for Data Analysis - Numpy Solution.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
\ No newline at end of file