From bb3fbfd983cadb4bae92bdac03fc5b2d68e117a8 Mon Sep 17 00:00:00 2001
From: xm3van <xm3van@gmail.com>
Date: Fri, 8 Apr 2022 14:56:23 +0100
Subject: [PATCH] visualisation .py file

---
 pi_output_visualisation.py | 636 +++++++++++++++++++++++++++++++++++++
 1 file changed, 636 insertions(+)
 create mode 100755 pi_output_visualisation.py

diff --git a/pi_output_visualisation.py b/pi_output_visualisation.py
new file mode 100755
index 0000000..a12d571
--- /dev/null
+++ b/pi_output_visualisation.py
@@ -0,0 +1,636 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+# Set up
+## Configs
+from os import getcwd
+from os.path import join 
+
+## visualisation
+import pandas as pd 
+import matplotlib.pyplot as plt
+import hvplot.pandas #monkey patch for pandas --> upgrades plots 
+import plotly.express as px
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+
+## data manipulation 
+import numpy as np 
+from itertools import product
+
+## sampling 
+import random 
+
+## wallet which have done more than 100 actions
+def sample_more_naction(df, wallet_or_proposal, no):
+    
+    """
+    Goals: Sample proposal that have done more than n actions 
+    
+    Inputs:
+    - df = data frame to sample form
+    - wallet_or_proposal = if true then wallet else (string of wallet or porposal) 
+    - no = threshold no of actions (integer)
+    
+    Outputs:
+    - wallet addresses with top or bottom most average funds
+    
+    """
+    sub_sample = df.groupby(wallet_or_proposal).count()
+    sub_sample = sub_sample[sub_sample['action']>no].reset_index()
+    sub_sample = sub_sample[wallet_or_proposal]
+    
+    return list(sub_sample) 
+    
+sample_more_naction(df_pi, 'wallet', 50)
+    
+
+
+def sample_wallet_by_action(df, action, portion_of_wallets=0.3, ascending=True):
+    
+    """
+    Goals: Sample for wallet by action 
+    
+    Inputs:
+    - df = data frame to sample form
+    - action = action of evaluation (string)
+    - pop_pct = percentage of population to sample from (float between 0 to 1)
+    - ascending = ascending show bottom x%, vice versa (Bolean)
+    
+    Outputs:
+    - proposal addresses with top or bottom most average funds
+    
+    """
+
+    ### action per wallet 
+    sub_sample = df_pi.groupby(['wallet', 'action']).count().reset_index()
+
+    ### sort frequency by action pay in descending order 
+    sub_sample = sub_sample[sub_sample['action']==action].sort_values('proposal', ascending=ascending)
+
+    ### take top x% of wallets 
+    n_wallet = int(sub_sample.shape[0]*portion_of_wallets) 
+
+    ## sample wallet 
+    sub_sample = sub_sample[:n_wallet]['wallet']
+    
+    return list(sub_sample) 
+
+sample_wallet_by_action(df_pi, 'pay', 0.3, ascending=True)
+
+
+
+
+def sample_proposal_success(df, portion_of_wallets=0.3, unsuccessful=True):
+    
+    """
+    Goals: Sample for most successful proposal
+    
+    Inputs:
+    - df = data frame to sample form
+    - portion_of_wallets = proportion of population to sample from (float between 0 to 1)
+    - successful = If true show top x%, vice versa (Bolean)
+    
+    Outputs:
+    - proposal addresses with top or bottom most average funds
+    
+    """
+    
+    ### action per wallet 
+    sub_sample = df.groupby(['proposal']).mean().reset_index()
+
+    ### sort frequency by action pay in descending order 
+    sub_sample = sub_sample.sort_values('proposal_funds', ascending=unsuccessful)
+
+    ### take top x% of proposals
+    n_proposal = int(sub_sample.shape[0]*portion_of_wallets) 
+
+    ## sample wallet 
+    sub_sample = sub_sample[:n_proposal]['proposal'].unique()
+    
+    return list(sub_sample)
+
+sample_proposal_success(df_pi, 0.3, unsuccessful=True)
+
+
+# In[10]:
+
+
+# Q1: Which actions does each wallet perform on each proposal?
+
+def sampled_graph_action_of_wallet_on_proposal(dataframe, sqrt_sample_size=3, nmin_threshold_actions=50):
+    
+
+    ## set df 
+    df = dataframe[dataframe['action']!='initialize'].groupby(['wallet','proposal', 'action']).count().reset_index()
+
+    ## number of visualisation 
+    N=sqrt_sample_size
+
+    ## no_of_action samples 
+    sub_sample = sample_more_naction(dataframe, 'wallet', nmin_threshold_actions) # return list of wallets which have complete more than n min threshold of actions
+    sample_wallets = random.sample(sub_sample, N**2)
+
+    ## colourcode 
+    colors=dict(zip(df.action.unique(),['red', 'green', 'yellow', 'blue', 'cyan', 'brown']))
+
+    ## determine position of output
+    pos = list(product(range(1,(1+N)), range(1,(1+N))))
+
+    # set up fig & populate 
+    fig = make_subplots(rows=N, 
+                        cols=N, 
+                        horizontal_spacing=0.05,
+                        vertical_spacing=0.05,
+                        shared_xaxes=True,
+                        y_title='Frequency', 
+                        x_title='',
+                        subplot_titles=sample_wallets)
+
+    # create a figure 
+    for pos_df in zip(pos, sample_wallets):
+
+        pos, w_id = pos_df
+
+        r, c = pos 
+
+        for a in df.action.unique(): 
+
+            ## select sub df 
+            data=df[(df['wallet']==w_id) & (df['action']==a)].to_dict('series')
+
+            # add figures 
+            fig.add_trace(
+                        go.Bar(
+                        name=a,
+                        legendgroup=a, #one legend for all 
+                        x=data['proposal'],
+                        y=data['timestep'], # remember this is just a count 
+                        showlegend=False,
+                        marker_color=colors[a]
+                        ),
+
+                        row=r, col=c
+                    )  
+
+
+
+    fig.update_layout(barmode='stack',
+                      height=900, 
+                      width=900, 
+                      coloraxis=dict(colorscale='RdBu'),
+                      font=dict(size=8),
+                      title_text=f"Actions performed by each wallet on each proposal [Sampled: {N**2} Wallets]")
+
+
+
+
+    ## change size and colour of subtitles 
+    for i in fig['layout']['annotations']:
+        i['font'] = dict(size=8,color='#000000')
+
+    fig.show()
+
+
+# ## Dimensions: Proposals, Action, Time 
+# 
+# - Q1: How do the actions performed on each proposal change over time?
+# - Q2: How does the frequency of each action change over time?
+# - Q3: How are proposals performing over time?
+# - Q4: Which actions are being performed on each proposal?
+# 
+
+# In[11]:
+
+def sampled_graph_action_on_proposal(dataframe, sqrt_sample_size=3, portion_of_wallets=0.3, unsuccessful=True):
+
+    ## set df 
+    df = dataframe.groupby(['proposal','timestep', 'action']).count().reset_index()
+
+    ## number of visualisation 
+    N  = sqrt_sample_size
+
+    ## random samples 
+    sub_sample = sample_proposal_success(df, portion_of_wallets=portion_of_wallets, unsuccessful=unsuccessful)
+    sample_proposals = random.sample(sub_sample, N**2)
+
+    ## colourcode 
+    colors =dict(zip(df.action.unique(),['red', 'green', 'yellow', 'blue', 'cyan', 'brown']))
+
+    ## showledgend switch 
+    showlegend=True 
+
+
+    ## determine position of output
+    pos = list(product(range(1,(1+N)), range(1,(1+N))))
+
+    # set up fig & populate 
+    fig = make_subplots(rows=N, 
+                        cols=N, 
+    #                     shared_xaxes='all',
+    #                     shared_yaxes='all',
+                        horizontal_spacing=0.05,
+                        vertical_spacing=0.05,
+                        y_title='Frequency', 
+                        x_title='Timestep',
+                        subplot_titles=sample_proposals)
+
+    # create a figure 
+    for pos_df in zip(pos, sample_proposals):
+
+        pos, p_id = pos_df
+
+        r, c = pos 
+
+        for a in df.action.unique(): 
+
+            ## select sub df 
+            data=df[(df['proposal']==p_id) & (df['action']==a)].to_dict('series')
+
+            # add figures 
+            fig.add_trace(
+                        go.Bar(
+                        name=a,
+                        legendgroup=a, #one legend for all 
+                        x=data['timestep'],
+                        y=data['wallet'],
+                        showlegend=False,
+                        marker_color=colors[a]
+                        ),
+
+                        row=r, col=c
+                    )  
+
+        #showlegend=False 
+
+
+    fig.update_layout(barmode='stack',
+                      height=900, 
+                      width=900, 
+                      coloraxis=dict(colorscale='RdBu'),
+                      showlegend=True,
+    #                   title_pad={'b':0, 'l':0, 'r':0, 't':0}, 
+                      title_text= f"Actions performed on each proposal over time [Sampled: {N**2} Wallets]")
+
+    # fig.update_xaxes(title='timestep',title_font_size=8)
+    # fig.update_yaxes(title='frequency',title_font_size=8) #does not amend size
+
+
+
+    ## change size and colour of subtitles 
+    for i in fig['layout']['annotations']:
+        i['font'] = dict(size=8,color='#000000')
+
+    fig.show()
+
+
+
+
+# In[12]:
+
+
+# Q2: How does the frequency of each action change over time?
+
+def frequency_of_each_action_on_proposal(dataframe):
+    """
+    Goal: Show frequency of each action over time
+    """
+    
+    fig_df = dataframe.groupby(['timestep', 'action' ]).count().reset_index()
+    fig_df
+    fig = px.line(
+        fig_df,
+        x='timestep',
+        y='proposal',
+        color='action',
+        title='Actions performed on proposals over time',
+        labels={"timestep": "Timestep",
+                "proposal": "Frequency",             
+                "action": "Action",             
+               })
+
+    fig.show()
+    
+
+
+# In[13]:
+
+
+# Q3: How are proposals performing over time?
+
+def performance_of_proposal_over_time(dataframe):
+    fig_df = dataframe.groupby(['timestep', 'proposal' ]).sum().reset_index()
+
+    fig = px.line(
+        fig_df,
+        x='timestep',
+        y='proposal_funds',
+        color='proposal',
+        title='Performance of proposals over time',
+        labels={"timestep": "Timestep",
+                "proposal": "Frequency",             
+                "proposal_funds": "Proposal Funds",             
+               })
+
+    fig.show()
+
+# In[14]:
+
+
+def sampled_graph_action_on_proposal(dataframe, sqrt_sample_size=3, nmin_threshold_actions=50):
+    
+
+    ## set df 
+    df = dataframe.groupby(['proposal', 'action']).count().reset_index()
+
+    ## number of visualisation 
+    N=sqrt_sample_size
+
+    ## no_of_action samples 
+    sub_sample = sample_more_naction(dataframe, 'proposal', nmin_threshold_actions) # return list of proposal which have complete more than n min threshold of actions
+    sample_proposals = random.sample(sub_sample, N**2)
+
+    ## colourcode 
+    colors=dict(zip(df.action.unique(),['red', 'green', 'yellow', 'blue', 'cyan', 'brown']))
+
+    ## determine position of output
+    pos = list(product(range(1,(1+N)), range(1,(1+N))))
+
+    # set up fig & populate 
+    fig = make_subplots(rows=N, 
+                        cols=N, 
+                        horizontal_spacing=0.05,
+                        vertical_spacing=0.05,
+                        shared_xaxes=False,
+                        y_title='Frequency', 
+                        x_title='Proposal',
+                        subplot_titles=sample_proposals)
+
+    # create a figure 
+    for pos_df in zip(pos, sample_proposals):
+
+        pos, p_id = pos_df
+
+        r, c = pos 
+
+        for a in df.action.unique(): 
+
+            ## select sub df 
+            data=df[(df['proposal']==p_id) & (df['action']==a)].to_dict('series')
+
+            # add figures 
+            fig.add_trace(
+                        go.Bar(
+                        name=a,
+                        legendgroup=a, #one legend for all 
+                        x=data['proposal'],
+                        y=data['timestep'], # remember this is just a count 
+                        showlegend=False,
+                        marker_color=colors[a]
+                        ),
+
+                        row=r, col=c
+                    )  
+
+
+
+    fig.update_layout(barmode='group',
+                      height=900, 
+                      width=900, 
+                      coloraxis=dict(colorscale='RdBu'),
+                      font=dict(size=8),
+                      title_text=f"Actions performed by each wallet on each proposal [Sampled: {N**2} Wallets]")
+
+
+
+
+    ## change size and colour of subtitles 
+    for i in fig['layout']['annotations']:
+        i['font'] = dict(size=8,color='#000000')
+
+    fig.show()
+
+
+# ## Dimension: Wallet, Action, Time 
+# - Q1: Which actions does each wallet perform over time?
+# - Q2: How does the frequency of each action change over time?
+# - Q3: How does each wallet perform over time?
+# - Q4: Which actions does each wallet perform?
+
+# In[16]:
+
+
+# Q1: Which actions does each wallet perform over time?
+def sampled_graph_action_wallet_over_time(dataframe, sqrt_sample_size=3, nmin_threshold_actions=10):
+    ## set df 
+    df = dataframe.groupby(['wallet','timestep', 'action']).count().reset_index()
+
+    ## number of visualisation 
+    N=sqrt_sample_size
+
+    ## random samples 
+    sub_sample = sample_more_naction(dataframe, 'wallet', nmin_threshold_actions) # return list of wallets which have complete more than n min threshold of actions
+    sample_wallets = random.sample(sub_sample, N**2)
+
+
+    ## colourcode 
+    colors =dict(zip(df.action.unique(),['red', 'green', 'yellow', 'blue', 'cyan', 'brown']))
+
+
+    ## determine position of output
+    pos = list(product(range(1,(1+N)), range(1,(1+N))))
+
+    # set up fig & populate 
+    fig = make_subplots(rows=N, 
+                        cols=N, 
+                        horizontal_spacing=0.05,
+                        vertical_spacing=0.05,
+                        y_title='Frequency', 
+                        x_title='Timestep',
+                        subplot_titles=sample_wallets)
+
+    # create a figure 
+    for pos_df in zip(pos, sample_wallets):
+
+        pos, w_id = pos_df
+
+        r, c = pos 
+
+        for a in df.action.unique(): 
+
+            ## select sub df 
+            data=df[(df['wallet']==w_id) & (df['action']==a)].to_dict('series')
+
+            # add figures 
+            fig.add_trace(
+                        go.Bar(
+                        name=a,
+                        legendgroup=a, #grouped legend 
+                        x=data['timestep'],
+                        y=data['proposal'],
+                        showlegend=False,
+                        marker_color=colors[a]
+                        ),
+
+                        row=r, col=c
+                    )  
+
+
+    fig.update_layout(barmode='stack',
+                      height=900, 
+                      width=900, 
+                      coloraxis=dict(colorscale='RdBu'),
+                      showlegend=True,
+                      title_text="Actions performed on by each wallet over time [Sampled]")
+
+
+    ## change size and colour of subtitles 
+    for i in fig['layout']['annotations']:
+        i['font'] = dict(size=8,color='#000000')
+
+    fig.show()
+
+
+
+# In[19]:
+
+
+def sampled_graph_actions_of_wallet(dataframe, sqrt_sample_size=3, nmin_threshold_actions=50):
+    
+
+    ## set df 
+    df = dataframe.groupby(['wallet', 'action' ]).count().reset_index()
+
+    ## number of visualisation 
+    N=sqrt_sample_size
+
+    ## no_of_action samples 
+    sub_sample = sample_more_naction(dataframe, 'wallet', nmin_threshold_actions) # return list of proposal which have complete more than n min threshold of actions
+    sample_proposals = random.sample(sub_sample, N**2)
+
+    ## colourcode 
+    colors=dict(zip(df.action.unique(),['red', 'green', 'yellow', 'blue', 'cyan', 'brown']))
+
+    ## determine position of output
+    pos = list(product(range(1,(1+N)), range(1,(1+N))))
+
+    # set up fig & populate 
+    fig = make_subplots(rows=N, 
+                        cols=N, 
+                        horizontal_spacing=0.05,
+                        vertical_spacing=0.05,
+                        shared_xaxes=False,
+                        y_title='Frequency', 
+                        x_title='Wallet',
+                        #subplot_titles=sample_proposals
+                       )
+
+    # create a figure 
+    for pos_df in zip(pos, sample_proposals):
+
+        pos, w_id = pos_df
+
+        r, c = pos 
+
+        for a in df.action.unique(): 
+
+            ## select sub df 
+            data=df[(df['wallet']== w_id) & (df['action']==a)].to_dict('series')
+
+            # add figures 
+            fig.add_trace(
+                        go.Bar(
+                        name=a,
+                        legendgroup=a, #one legend for all 
+                        x=data['wallet'],
+                        y=data['timestep'], # remember this is just a count 
+                        showlegend=False,
+                        marker_color=colors[a]
+                        ),
+
+                        row=r, col=c
+                    )  
+
+
+
+    fig.update_layout(barmode='group',
+                      height=900, 
+                      width=900, 
+                      coloraxis=dict(colorscale='RdBu'),
+                      font=dict(size=8),
+                      title_text=f"Actions performed by each wallet on each proposal [Sampled: {N**2} Wallets]")
+
+
+
+
+    ## change size and colour of subtitles 
+    for i in fig['layout']['annotations']:
+        i['font'] = dict(size=8,color='#000000')
+
+    fig.show()
+
+
+# ## Dimension: Wallet, Proposal, Time 
+# - Q1: What are the common characteristics between wallets that join or fund a proposal? Do they cluster? [Pending]
+# - Q2: How does each wallet perform over time?
+# - Q3: How does each proposal perform over time?
+# - Q4: How do the wallet interactions with proposals change over time?
+
+# In[17]:
+
+
+# Q1: What are the common characteristics between wallets that join or fund a proposal? Do they cluster? [Pending]
+
+
+# In[22]:
+
+
+# Q2: How does each wallet perform over time?
+
+def wallet_funds_over_time(dataframe):
+    fig_df = dataframe.groupby(['timestep', 'wallet' ]).sum().reset_index()
+
+    fig = px.line(
+        fig_df,
+        x='timestep',
+        y='wallet_funds',
+        color='wallet',
+        title='Performance of wallets over time',
+        labels={"timestep": "Timestep",
+                "proposal": "Frequency",             
+                "wallet_funds": "Wallet Funds",             
+               })
+
+    fig.show()
+
+# In[25]:
+
+
+# Q4: How do the wallet interactions with proposals change over time?
+
+def df_action_over_time_by_type_of_action(dataframe):
+
+    ## re-structure df 
+    df_action_over_time = dataframe.groupby(['timestep', 'action']).count()['wallet']
+    df_action_over_time = df_action_over_time.reset_index()
+
+
+
+    fig = px.histogram(
+        df_action_over_time,
+        x='timestep',
+        y='wallet',
+        color='action',
+        nbins=int(100/10),
+        marginal="rug",
+        title='Action of all Wallets over time by type of action',
+        labels={"timestep": "Timestep",
+                'wallet': 'No. of actions performed', 
+                #'action': 'Action'
+
+               })
+
+    fig.show()