-
Notifications
You must be signed in to change notification settings - Fork 0
/
Databases-Python2.py
340 lines (282 loc) · 16.9 KB
/
Databases-Python2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# coding: utf-8
# In[1]:
#################################################################################
# Step 1: Create a table in our database to hold the stock data
#################################################################################
# import some standard modules we can use to download URLs
import urllib2, cookielib
import zipfile, os
import time
import sqlite3
from datetime import datetime
conn = sqlite3.connect('example.db')
c = conn.cursor()
# Create table
c.execute('CREATE TABLE prices (SYMBOL text, SERIES text, OPEN real, HIGH real, LOW real, CLOSE real, LAST real, PREVCLOSE real, TOTTRDQTY real, TOTTRDVAL real, TIMESTAMP date, TOTALTRADES real, ISIN text, PRIMARY KEY (SYMBOL, SERIES, TIMESTAMP))')
conn.commit()
# In[2]:
#################################################################################
# Step 2: Download a bunch of files and unzip them
#################################################################################
def download(localZipFilePath,urlOfFileName):
# We already wrote the code for this bit in the drill on files. Lets just copy that over
# This bit of boiler plate code below is needed because the
# website of the National Stock Exchange tries to block automated
# programs (like this one!) from downloading files.
# This line is not needed with all websites, but there are a reasonable
# number that will block automated downloads, in which case the additional line below
# will circumvent the block.
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
# here below is the code that actually downloads the page and stores to file
# Make the web request - just as a web browser like Chrome or Mozilla would
# Note that we pass in the boilerplate code we just typed above
webRequest = urllib2.Request(urlOfFileName, headers=hdr)
# Doing stuff with files is quite prone to errors in the disk or in the network
# connection, so use a try:/except: pair as safety net
try:
# Make the web request
page = urllib2.urlopen(webRequest)
# Save the contents of the web request in a variable called 'content'.
# These contents are literally the zip file from the URL (i.e. what you'd get
# if you downloaded the URL manually)
content = page.read()
# Save the contents to the zip file on disk locally
# 1. open the barrel (file). The 'w' signifies that we intend to write, i.e
# put stuff into the barrel (file)
output = open(localZipFilePath, "wb")
# 2. write contents to file, i.e. actually put stuff in the barrel
output.write(bytearray(content))
# 3. close the barrel (i.e. the file)
output.close()
# this bit below, the except: block is what will get executed if any of the lines above throw errors
except urllib2.HTTPError, e:
# print out exactly what error, if any, resulted
print e.fp.read()
# Let the user know that the download did not work, and that file needs to be manually downloaded
print "Looks like the download did not go through. Please download manually \nFROM:" + urlOfFileName + "\nTO:" + localZipFilePath
def unzip(localZipFilePath, localExtractFilePath):
# This bit had also been written earlier. Copying that as well
# check if the zip file above was downloaded fine, else no point trying to extract
# note how the os.path.exists gives us a single-line way to check if a given file exists
if os.path.exists(localZipFilePath):
# if we are in here (the body of the if-statement), that means the file
# does indeed exist
print "Cool! " + localZipFilePath + " exists..proceeding"
# A zip file can contain any number of files - we don't know upfront how many
# so initialize an empty array in which to save the names of the files
listOfFiles = []
# Open the barrel (the zip file). The 'rb' is very significant
# r: read i.e. we plan to take stuff out of the barrel, not put stuff in
# b: binary i.e. the barrel contains stuff that needs processing.
# That further processing is exactly what the code below will do using the
# zipfile library.
fh = open(localZipFilePath, 'rb')
# We've opened the file, now pass it to the library zipfile, which will give
# us a variable, here called 'zipFileHandler' which knows exactly what to do
# with this file.
zipFileHandler = zipfile.ZipFile(fh)
# zipFileHandler knows how to read the list of zipped up files in the zip.
# Iterate over them in a for-loop. 'name' is the iterator variable.
for name in zipFileHandler.namelist():
# Extract this particular file, named 'name' to the directory called 'localExtractFilePath'
zipFileHandler.extract(name, localExtractFilePath)
# Add this file to the list of files that we are maintaining
listOfFiles.append(localExtractFilePath + name)
print "Extracted " + name + " from the zip file, and saved to " + (localExtractFilePath + name)
# note from the indentation that we are now outside the for loop!
# lets print the total number of files we extracted from our zip file
print "Extracted " + str(len(listOfFiles)) + " file in total"
fh.close()
def downloadAndUnzipForPeriod(listOfMonths, listOfyears):
for year in listOfYears:
# indentation changes - we are inside the first for loop ( for the years)
for month in listOfMonths:
# indentation changes yet again - we are inside the second for loop ( for each month in a given year)
for dayOfMonth in range(31) :
# indentation changes yet again - this is being executed for each day of each month of each year
# notice how we use the range(31) function to get a list with elements [0,1,2,....30]
date = dayOfMonth + 1
# lists are indexed from 0 but the dates start from 1, so add 1
############################################################################
# OK the code that follows basically constructs a URL for the file
# as saved in the NSE website. How do we know how to construct this?
# Simply by manually downloading these files and examining the pattern for
# different dates/months/years.
# A typical URL looks like this : http://www.nseindia.com/content/historical/EQUITIES/2015/JUL/cm07JUL2015bhav.csv.zip
#############################################################################
# Convert number to string
dateStr = str(date)
# Note how single digit dates have a leading 0
if date < 10:
# indentation shows we are inside the if condition
dateStr = "0"+dateStr
# tack on a leading zero
# indentation changes - we are out of the if loop
print dateStr, "-", month,"-", year
# Construct the filename
fileName = "cm" +str(dateStr) + str(month) + str(year) + "bhav.csv.zip"
# Construct the entire URL
urlOfFileName = "http://www.nseindia.com/content/historical/EQUITIES/"+ year +"/"+ month + "/" + fileName
# Construct the file on our local hard disk where we wish to save the downloaded file
# The file path would look different on Windows machines - something like "C:/Users/vitthal/PythonCodeExamples/"
localZipFilePath = "/Users/swethakolalapudi/PythonCodeExamples/" + fileName
# Make the call to the download function
download(localZipFilePath, urlOfFileName)
# MAke the call to the unzip function
unzip(localZipFilePath,localExtractFilePath)
# We want to make sure that we don't inadvertently overwhelm the NSE website
# so take a pause of 10 seconds to make sure we are not overloading it
time.sleep(10)
# done with all 3 for loops , the years, months and days
print "OK, all done downloading and extracting"
#initialize a variable with a local directory in which to extract the
# zip file above
localExtractFilePath = "/Users/swethakolalapudi/PythonCodeExamples/"
listOfMonths = ['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
listOfYears = ['2014']
downloadAndUnzipForPeriod(listOfMonths,listOfYears)
# In[3]:
#####################################################################
# Step 3: parse each file, and insert each row of each file into the database
#####################################################################
def insertRows(fileName,conn):
# conn is a connection to the database,
# given a connection we need a session in which to do stuff
c = conn.cursor()
lineNum = 0
with open(fileName, 'rb') as csvfile:
# the file is opened now let's get a csv handler. The handler needs to be told how strings in quotes
# will appear - we tell it that quotes will be marked by double quotes
lineReader = csv.reader(csvfile, delimiter = ',', quotechar = "\"")
# Ok! the CSV handler called 'lineReader' knows how to read a file 1 line at a time
# Iterate using a for loop and the iterator variable 'row'
for row in lineReader:
# what line are we on? Increase the counter by 1
lineNum = lineNum + 1
# if this was the first line - which contains a header - skip and
# go to the next via the 'continue' statement
if lineNum ==1:
print "Header row, skipping"
continue
# Insert a row of data
date_object = datetime.strptime(row[10], '%d-%b-%Y')
#############################################################################
# We just wrote to the database, so we must commit our writes
#############################################################################
# We will insert data into the database 1 row at a time. This data must correspond
# in the number and type to the column headers of the table. The columns in a table are
# collectively called the schema
oneTuple = [row[0], row[1], float(row[2]),float(row[3]),float(row[4]),float(row[5]),float(row[6]),float(row[7]),float(row[8]),float(row[9]),date_object,float(row[11]),row[12]]
# This statement will actually insert a single row into the table called prices
c.execute("INSERT INTO prices VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",oneTuple)
# As the changed indentation tells us, we are now out of the for - loop, as well as out of the 'with' block.
# The magic bit of the 'with...' statement is that the file (barrel) we had opened is closed without our doing
# anything explicit
##################################################################################
# We just wrote to the database, so we must commit our writes
###################################################################################
conn.commit()
print "Done iterating over file contents - the file has been closed now!"
# Insert into table
import os, csv, datetime
from datetime import datetime
localExtractFilePath = "/Users/swethakolalapudi/PythonCodeExamples"
# USe a python library to get a list of all files in a particular directory
for file in os.listdir(localExtractFilePath):
# as the indentation tells we are in body of the loop
# 'file' is the name of the loop variable
if file.endswith(".csv"):
# Yet another indentation change - we are now inside the if condition
# IF the filename ends with '.csv' its a file we care about - make a function
# call to the function above to insert into the database
insertRows(localExtractFilePath+"/"+file,conn)
# In[4]:
######################################################################
# Step 4: Run a test query against the database to make sure it is set up OK
######################################################################
t1 = 'ICICIBANK'
series = 'EQ'
c = conn.cursor()
cursor = c.execute ('SELECT symbol, max(close), min(close), max(timestamp), min(timestamp), count(timestamp) FROM prices WHERE symbol = ? and series = ? GROUP BY symbol ORDER BY timestamp', (t1,series))
for row in cursor:
print row
# In[5]:
##########################################################################
# Step 5: Run a query to get all data for a given stock, and create an excel summary
##########################################################################
import xlsxwriter
# we need xlsxwriter in order to create excel files
def createExcelWithDailyPriceMoves(ticker,conn):
# get a cursor from the connection
# we will use this cursor to execute SQL commands
c = conn.cursor()
# Now execute a query (i.e. read from the database) to get all information for this particular ticker
cursor = c.execute('SELECT symbol, timestamp, close FROM prices where symbol = ? and series = ? ORDER BY timestamp',(ticker,series))
# pay close attention to the way '?' is used - twice - once each as placeholder for the symbol and for the series
# This is a standard way of passing inputs to customize queries. Also note the syntax used to pass in the 2 inputs
# import the xlsxwriter which does all the magic
# Initialize a variable with the name of the excel file we will create -
# this will have the same name as the ticker that we care about
excelFileName = "/Users/swethakolalapudi/PythonCodeExamples/Outputs/"+ticker+".xlsx"
# create a workbook. This in a different way is opening a file
workbook = xlsxwriter.Workbook(excelFileName)
# Create an empty worksheet in this workbook, named 'summary'
worksheet = workbook.add_worksheet("Summary")
# The way we write stuff into this excel workbook is passing
# the cell address to start write stuff eg "A1"
# a list of values that will be written , 1 per cell starting from that address
worksheet.write_row("A1",["Top Traded Stocks"])
worksheet.write_row("A2",['Stock','Date','Closing'])
lineNum = 3
# Everything in life is a list - notice how the results of the query are a list
# and we can iterate over this list of results using a for loop
for row in cursor:
worksheet.write_row("A"+str(lineNum), list(row))
print "A"+str(lineNum),list(row)
lineNum = lineNum + 1
# Indentation changes - we are done with the for loop that writes the raw data
# Ok - all of the code after this is devoted to creating a line chart in excel
chart1 = workbook.add_chart({'type':'line'})
# Configure the first series
chart1.add_series({
'categories': '=Summary!$B$3:$B$' + str(lineNum),
'values': '=Summary!$C$3:$C$'+str(lineNum)
# check out this boiler plate code that tells xlsxwriter where to find the data for his chart
# We know that we started writing the data in cell B3, and that the last row we filled in
# is still stored in the 'lineNum' variable
})
# Add a chart title and some axis labels
chart1.set_title({'name': ticker})
chart1.set_x_axis({'name':'Date'})
chart1.set_y_axis({'name': 'Closing Price'})
# Insert the chart into the worksheet (with an offset)
worksheet.insert_chart('F2',chart1,{'x_offset': 25, 'y_offset': 10})
workbook.close()
# Create a connection
conn=sqlite3.connect('example.db')
# Call the function above to create an excel file for the ticker
# RELIANCE which is one of Indias most actively traded stocks
createExcelWithDailyPriceMoves('RELIANCE',conn)
# Close the connection. We only queried (read) from the database
# so there is no need to commit
# In[1]:
################################################################
# Step 6: We will also see how to drop a table to clear up
##################################################################
# Drop table
# Step 1: open up a connection
conn=sqlite3.connect('example.db')
# Step 2: get a cursor
c = conn.cursor()
# Step 3 : Drop the table so we leave the database in the same state in which we started with
c.execute('DROP TABLE prices')
# Step 4 : OK - we wrote to the database, so we must commit our changes
conn.commit()
# Step 5 : Close the connection
conn.close()
# In[ ]: