-
Notifications
You must be signed in to change notification settings - Fork 23
/
check_data.py
55 lines (41 loc) · 1.52 KB
/
check_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import pandas as pd
from collections import Counter
import os
'''
Run this script to check your data. The script will print the total number of
data along with the number of each label's occurence.
'''
n = int(input('Enter the batch number, 0 for final data: '))
if n == 0:
if os.path.isfile('data\\final_data.npy'):
train_data = np.load('data\\final_data.npy', allow_pickle=True)
print(f'Total amount of frames collected: {len(train_data)}')
df = pd.DataFrame(train_data)
print(df.head())
print('\n')
print(Counter(df[2].apply(str)))
else:
print('Final data does not exist.')
else:
train_data = 'data\\training_data_{}.npy'.format(n)
if os.path.isfile(train_data):
train_data_2 = np.load(train_data, allow_pickle=True)
print('Unbalanced Raw Data: ' + str(len(train_data_2)))
df = pd.DataFrame(train_data_2)
print(df.head())
print('\n')
print(Counter(df[2].apply(str)))
print('\n')
train_data_bal = 'data\\training_data_{}_balanced.npy'.format(n)
if os.path.isfile(train_data_bal):
train_data_3 = np.load(train_data_bal, allow_pickle=True)
print('New Balanced Data: ' + str(len(train_data_3)))
df = pd.DataFrame(train_data_3)
print(df.head())
print('\n')
print(Counter(df[2].apply(str)))
else:
print('Balanced data file does not exist.')
else:
print('Data does not exist.')