Skip to content

Commit

Permalink
quick refactor for python3 and db connect module
Browse files Browse the repository at this point in the history
  • Loading branch information
jdhayhurst committed Nov 12, 2021
1 parent e5c2d63 commit 45d5868
Showing 1 changed file with 102 additions and 133 deletions.
235 changes: 102 additions & 133 deletions curation-queue/curation_queue_with_ancestry.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,23 @@
# Activate Python venv for the script - uncomment to run script on commandline
activate_this_file = "/path/to/bin/activate_this.py"
execfile(activate_this_file, dict(__file__ = activate_this_file))

import cx_Oracle
import contextlib
import argparse
import sys
from tqdm import tqdm
import csv
import os.path

sys.path.insert(0, '/path/to/gwas_data_sources')
import gwas_data_sources

import datetime

from gwas_db_connect import DBConnection
import smtplib
from os.path import basename
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication


def get_curation_queue_data():
def get_curation_queue_data(database_name):
'''
Get Curation Queue data
'''

# List of queries
curation_queue_data_sql = """
SELECT DISTINCT (S.ID) AS STUDY_ID, TO_CHAR(H.STUDY_ADDED_DATE, 'yyyy-mm-dd'), P.PUBMED_ID,
A.FULLNAME, TO_CHAR(P.PUBLICATION_DATE, 'yyyy-mm-dd') AS Publication_Date, P.PUBLICATION,
SELECT DISTINCT (S.ID) AS STUDY_ID, TO_CHAR(H.STUDY_ADDED_DATE, 'yyyy-mm-dd'), P.PUBMED_ID,
A.FULLNAME, TO_CHAR(P.PUBLICATION_DATE, 'yyyy-mm-dd') AS Publication_Date, P.PUBLICATION,
P.TITLE, S.USER_REQUESTED, S.FULL_PVALUE_SET, CS.STATUS, S.OPEN_TARGETS, S.INITIAL_SAMPLE_SIZE, S.REPLICATE_SAMPLE_SIZE
FROM STUDY S, HOUSEKEEPING H, PUBLICATION P, AUTHOR A, CURATION_STATUS CS
WHERE S.HOUSEKEEPING_ID = H.ID AND H.IS_PUBLISHED = 0
Expand All @@ -40,9 +27,9 @@ def get_curation_queue_data():


study_reported_trait_sql = """
SELECT listagg(DT.TRAIT, ', ') WITHIN GROUP (ORDER BY DT.TRAIT)
FROM STUDY S, STUDY_DISEASE_TRAIT SDT, DISEASE_TRAIT DT
WHERE S.ID=SDT.STUDY_ID and SDT.DISEASE_TRAIT_ID=DT.ID
SELECT listagg(DT.TRAIT, ', ') WITHIN GROUP (ORDER BY DT.TRAIT)
FROM STUDY S, STUDY_DISEASE_TRAIT SDT, DISEASE_TRAIT DT
WHERE S.ID=SDT.STUDY_ID and SDT.DISEASE_TRAIT_ID=DT.ID
and S.ID= :study_id
"""

Expand All @@ -58,170 +45,155 @@ def get_curation_queue_data():
study_association_cnt_sql = """
SELECT COUNT(A.ID)
FROM STUDY S, ASSOCIATION A
WHERE S.ID=A.STUDY_ID
WHERE S.ID=A.STUDY_ID
and S.ID= :study_id
"""


study_ancestry_initial_sql = """
SELECT SUM(A.NUMBER_OF_INDIVIDUALS)
SELECT SUM(A.NUMBER_OF_INDIVIDUALS)
FROM STUDY S, ANCESTRY A
WHERE A.STUDY_ID=S.ID
and S.ID= :study_id
WHERE A.STUDY_ID=S.ID
and S.ID= :study_id
and A.TYPE='initial'
"""


study_ancestry_replication_sql = """
SELECT SUM(A.NUMBER_OF_INDIVIDUALS)
SELECT SUM(A.NUMBER_OF_INDIVIDUALS)
FROM STUDY S, ANCESTRY A
WHERE A.STUDY_ID=S.ID
and S.ID= :study_id
WHERE A.STUDY_ID=S.ID
and S.ID= :study_id
and A.TYPE='replication'
"""


all_curation_queue_data = []

curation_queue_attr_list = ['STUDY_ID', 'STUDY_CREATION_DATE', 'PUBMEDID', 'FIRST_AUTHOR', \
'PUBLICATION_DATE', 'JOURNAL', 'TITLE', 'REPORTED_TRAIT', 'EFO_TRAIT', \
'ASSOCIATION_COUNT', 'NUMBER_OF_INDIVIDUALS_INITIAL', 'NUMBER_OF_INDIVIDUALS_REPLICATION', \
'USER_REQ$UESTED?', 'FULL P-VALUE SET?', 'CURATION_STATUS', 'IS_OPEN_TARGETS?', \
'INITIAL_SAMPLE_DESCRIPTION', 'REPLICATION_SAMPLE_DESCRIPTION']
curation_queue_attr_list = ['STUDY_ID', 'STUDY_CREATION_DATE', 'PUBMEDID', 'FIRST_AUTHOR',
'PUBLICATION_DATE', 'JOURNAL', 'TITLE', 'REPORTED_TRAIT', 'EFO_TRAIT',
'ASSOCIATION_COUNT', 'NUMBER_OF_INDIVIDUALS_INITIAL', 'NUMBER_OF_INDIVIDUALS_REPLICATION',
'USER_REQ$UESTED?', 'FULL P-VALUE SET?', 'CURATION_STATUS', 'IS_OPEN_TARGETS?',
'INITIAL_SAMPLE_DESCRIPTION', 'REPLICATION_SAMPLE_DESCRIPTION']



TIMESTAMP = get_timestamp()
outfile = open("data_queue_"+TIMESTAMP+".csv", "w")
csvout = csv.writer(outfile)

csvout.writerow(curation_queue_attr_list)

db_handler = DBConnection.gwasCatalogDbConnector(database_name)
cursor = db_handler.cursor
cursor.execute(curation_queue_data_sql)

try:
ip, port, sid, username, password = gwas_data_sources.get_db_properties(DATABASE_NAME)
dsn_tns = cx_Oracle.makedsn(ip, port, sid)
connection = cx_Oracle.connect(username, password, dsn_tns)

with contextlib.closing(connection.cursor()) as cursor:

cursor.execute(curation_queue_data_sql)

curation_queue_data = cursor.fetchall()

curation_queue_data = cursor.fetchall()

for data in tqdm(curation_queue_data, desc='Get Curation Queue data'):
for data in tqdm(curation_queue_data, desc='Get Curation Queue data'):

curation_data = []
curation_data = []

curation_data.insert(0, data[0])

curation_data.insert(0, data[0])

curation_data.insert(1, data[1])
curation_data.insert(1, data[1])

curation_data.insert(2, data[2])
curation_data.insert(2, data[2])

curation_data.insert(3, data[3])
curation_data.insert(3, data[3])

curation_data.insert(4, data[4])
curation_data.insert(4, data[4])

curation_data.insert(5, data[5])
curation_data.insert(5, data[5])

curation_data.insert(6, data[6])
curation_data.insert(6, data[6])

curation_data.insert(12, data[7])

curation_data.insert(12, data[7])
curation_data.insert(13, data[8])

curation_data.insert(13, data[8])
curation_data.insert(14, data[9])

curation_data.insert(14, data[9])
curation_data.insert(15, data[10])

curation_data.insert(15, data[10])
curation_data.insert(16, data[11])

curation_data.insert(16, data[11])
curation_data.insert(17, data[12])

curation_data.insert(17, data[12])
##########################
# Get Reported Trait
##########################
cursor.prepare(study_reported_trait_sql)
cursor.execute(None, {'study_id': data[0]})
reported_trait = cursor.fetchone()

##########################
# Get Reported Trait
##########################
cursor.prepare(study_reported_trait_sql)
r = cursor.execute(None, {'study_id': data[0]})
reported_trait = cursor.fetchone()

if reported_trait[0] is None:
curation_data.insert(7, 'No values')
else:
curation_data.insert(7, reported_trait[0])

if reported_trait[0] is None:
curation_data.insert(7, 'No values')
else:
curation_data.insert(7, reported_trait[0])

##########################
# Get Mapped/EFO Trait
##########################
cursor.prepare(study_mapped_trait_sql)
cursor.execute(None, {'study_id': data[0]})
mapped_trait = cursor.fetchone()

##########################
# Get Mapped/EFO Trait
##########################
cursor.prepare(study_mapped_trait_sql)
r = cursor.execute(None, {'study_id': data[0]})
mapped_trait = cursor.fetchone()
if mapped_trait[0] is None:
curation_data.insert(8,'No values')
else:
curation_data.insert(8, mapped_trait[0])

if mapped_trait[0] is None:
curation_data.insert(8,'No values')
else:
curation_data.insert(8, mapped_trait[0])

##########################
# Get Association count
##########################
cursor.prepare(study_association_cnt_sql)
cursor.execute(None, {'study_id': data[0]})
association_cnt = cursor.fetchone()

##########################
# Get Association count
##########################
cursor.prepare(study_association_cnt_sql)
r = cursor.execute(None, {'study_id': data[0]})
association_cnt = cursor.fetchone()
curation_data.insert(9, association_cnt[0])

curation_data.insert(9, association_cnt[0])

###############################
# Get Num Individuals Initial
###############################
cursor.prepare(study_ancestry_initial_sql)
cursor.execute(None, {'study_id': data[0]})
ancestry_initial_cnt = cursor.fetchone()

###############################
# Get Num Individuals Initial
###############################
cursor.prepare(study_ancestry_initial_sql)
r = cursor.execute(None, {'study_id': data[0]})
ancestry_initial_cnt = cursor.fetchone()
if ancestry_initial_cnt[0] is None:
curation_data.insert(10, 'No values')
else:
curation_data.insert(10, ancestry_initial_cnt[0])

if ancestry_initial_cnt[0] is None:
curation_data.insert(10, 'No values')
else:
curation_data.insert(10, ancestry_initial_cnt[0])


#########################################
# Get Num Individuals Replication
#########################################
cursor.prepare(study_ancestry_replication_sql)
r = cursor.execute(None, {'study_id': data[0]})
ancestry_replication_cnt = cursor.fetchone()
#########################################
# Get Num Individuals Replication
#########################################
cursor.prepare(study_ancestry_replication_sql)
cursor.execute(None, {'study_id': data[0]})
ancestry_replication_cnt = cursor.fetchone()

if ancestry_replication_cnt[0] is None:
curation_data.insert(11, 'No values')
else:
curation_data.insert(11, ancestry_replication_cnt[0])
if ancestry_replication_cnt[0] is None:
curation_data.insert(11, 'No values')
else:
curation_data.insert(11, ancestry_replication_cnt[0])


###############################
# Write out row data to file
##############################
csvout.writerow(curation_data)
###############################
# Write out row data to file
##############################
csvout.writerow(curation_data)



connection.close()

return all_curation_queue_data

except cx_Oracle.DatabaseError, exception:
print exception
db_handler.close()
return all_curation_queue_data


def get_timestamp():
"""
Get timestamp of current date and time.
"""
Get timestamp of current date and time.
"""
timestamp = '{:%Y-%m-%d}'.format(datetime.datetime.now())
return timestamp
Expand All @@ -231,7 +203,7 @@ def send_email(*args):
'''
Email report file.
'''

# Today's date
now = datetime.datetime.now()
datestamp = str(now.day)+"_"+str(now.strftime("%b"))+"_"+str(now.year)
Expand All @@ -243,7 +215,7 @@ def send_email(*args):
fil.read(),
Name=basename(file_name)
)

# create a text/plain message
msg = MIMEMultipart()

Expand All @@ -255,7 +227,6 @@ def send_email(*args):
# create headers
me = '[email protected]'
you = ['[email protected]', '[email protected]']
# you = ['[email protected]']
msg['Subject'] = 'GWAS Curation Queue '+datestamp
msg['From'] = me
msg['To'] = ", ".join(you)
Expand All @@ -274,17 +245,15 @@ def send_email(*args):

# Commandline arguments
parser = argparse.ArgumentParser()
parser.add_argument('--database', default='SPOTPRO', choices=['DEV3', 'SPOTPRO'],
help='Run as (default: SPOTPRO).')
parser.add_argument('--database', default='spotpro', choices=['dev3', 'spotpro'],
help='Run as (default: spotpro).')
args = parser.parse_args()

global DATABASE_NAME
DATABASE_NAME = args.database
database_name = args.database

curation_queue_data = get_curation_queue_data(database_name=database_name)

curation_queue_data = get_curation_queue_data()

# Email data to curators
TIMESTAMP = get_timestamp()
report_filename = "data_queue_"+TIMESTAMP+".csv"
send_email(report_filename)

send_email(report_filename)

0 comments on commit 45d5868

Please sign in to comment.