-
Notifications
You must be signed in to change notification settings - Fork 0
/
lab05-op-Exercise1_creditcard.py
72 lines (57 loc) · 2.63 KB
/
lab05-op-Exercise1_creditcard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
###
# Exercise - Credit Card Fraud Detection
# Using - tf.TextLineReader, tf.decode_csv
# DataSet: https://www.kaggle.com/dalpozz/creditcardfraud?
###
## not working well: cost and weight always gets nan
import tensorflow as tf
import numpy as np
filename_queue = tf.train.string_input_producer(
['data-creditcard_binaClass.csv'], shuffle=False, name='filename_queue')
reader = tf.TextLineReader(skip_header_lines=1) # skip header line
key, value = reader.read(filename_queue)
print(key, value)
# Default values, in case of empty columns. Also specifies the type of the decoded result.
# record_defaults = [[0.], [0.], [0.], [0.]]
record_defaults = [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.],
[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.],
[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.],
[0.]]
print(record_defaults)
xy = tf.decode_csv(value, record_defaults=record_defaults)
# collect batches of csv in
# time, V1 ~ V28, Amount, class
train_x_batch, train_y_batch = tf.train.batch([xy[1:6], xy[-1:]], batch_size=1000) # V1 ~ V5, class
print(train_x_batch)
print(train_y_batch)
# placeholders for a tensor that will be always fed
X = tf.placeholder(tf.float32, shape=[None, 5])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([5, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=1e-7).minimize(cost)
# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
# Launch graph
with tf.Session() as sess:
# Initialize Tensorflow variables
sess.run(tf.global_variables_initializer())
# Start populating the filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(1001):
x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
cost_val, _ = sess.run([cost, train], feed_dict={X: x_batch, Y: y_batch})
if step % 20 == 0:
print(step, cost_val)
coord.request_stop()
coord.join(threads)
# Accuracy report
h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_batch, Y: y_batch})
print("\nHypothesis: ", h, "\nCorrect(Y): ", c, "\nAccuracy: ", a)