Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lira vvedit #2

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The following include tested versions in parenthesis when applicable; later vers
1. python (2.7.12)
+ argparse
+ os
+ pysam
2. samtools (1.2)
3. bcftools (1.2)
4. htslib (1.2.1)
Expand Down
38 changes: 38 additions & 0 deletions global-config.human.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#This file ignores blank and commented ('#') lines.
#Values are noted as "<KEY> <VALUE>"

#Path to snpEff installation
SNPEFF /n/data1/hms/dbmi/park/cbohrson/installed/local/bin/snpEff

#Path to DBSNP database
DBSNP /n/data1/hms/dbmi/park/cbohrson/common_all_20160601.vcf.gz

#Path to downloaded 1000 genomes haplotype reference panel
KGEN /n/data1/hms/dbmi/park/simon_chu/projects/data/1000G

#Path to picard JAR file
PICARD /n/data1/hms/dbmi/park/cbohrson/installed/local/bin/picard.jar

#Gap between SNPs required for regions to be chunked separately
GAP_REQUIREMENT 2000

#Target number of reads per LiRA job
READS_TARGET 100000

#Partition to submit to for parallelization
PARTITION short

#Batch size for parallelization (number of scripts per job)
BATCH_SIZE 10

#Method to use for creating sSNV control curve of composite coverage vs. genome-wide sSNV rate. Options are 'germline' (use germline sSNVs) or 'general' (use all powered sites). 'general' is not yet implemented.
CONTROL_METHOD germline

#For 10X, the max distance between two SNPs to be considered within a barcode family
MAX_DISTANCE_10X 50000

#Script in $LIRA_DIR/scripts to use for parallelization
PARALLEL_SCRIPT slurm.R

#Number of sampling replicates to use for CONTROL_METHOD
BOOTSTRAP_REPLICATES 100
7 changes: 5 additions & 2 deletions global-config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
SNPEFF /n/data1/hms/dbmi/park/cbohrson/installed/local/bin/snpEff

#Path to DBSNP database
DBSNP /n/data1/hms/dbmi/park/cbohrson/common_all_20160601.vcf.gz
#DBSNP /n/data1/hms/dbmi/park/cbohrson/common_all_20160601.vcf.gz
DBSNP /n/data1/hms/dbmi/park/splitseq_analysis/vcf_contig_rename/mgp.v5.merged.snps_all.dbSNP142.contig_rename_2.vcf.gz

#Path to downloaded 1000 genomes haplotype reference panel
KGEN /n/data1/hms/dbmi/park/simon_chu/projects/data/1000G
#KGEN /n/data1/hms/dbmi/park/simon_chu/projects/data/1000G

#Path to picard JAR file
PICARD /n/data1/hms/dbmi/park/cbohrson/installed/local/bin/picard.jar
Expand Down Expand Up @@ -36,3 +37,5 @@ PARALLEL_SCRIPT slurm.R

#Number of sampling replicates to use for CONTROL_METHOD
BOOTSTRAP_REPLICATES 100

reference_identifier mm10
167 changes: 145 additions & 22 deletions scripts/functions.R

Large diffs are not rendered by default.

14 changes: 12 additions & 2 deletions scripts/linkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,24 @@
with open(args.bed) as bed:
reader = csv.reader(bed, delimiter="\t")
sites = list(reader)

for site in sites:
start = int(site[1])
end = int(site[2])
# print(start) # this prints
pileup = bamfile.pileup(site[0],start,end,stepper="all",max_depth=500000)
for pileupColumn in pileup:
for pileupRead in pileupColumn.pileups:
if (pileupColumn.pos >= start) and (pileupColumn.pos < end) and (not pileupRead.is_refskip) and (pileupRead.alignment.mapping_quality == 60) and (pileupRead.alignment.is_proper_pair):

# print(pileupRead.is_refskip)
# print(pileupRead.alignment.is_proper_pair)
#print(pileupRead.alignment.mapping_quality)
# print('----------')

#print(pileupColumn.pos >= start,pileupColumn.pos < end,pileupRead.is_refskip)

if (pileupColumn.pos >= start) and (pileupColumn.pos < end) and (not pileupRead.is_refskip) and (pileupRead.alignment.mapping_quality > 0): #and (pileupRead.alignment.mapping_quality == 60): #and (pileupRead.alignment.is_proper_pair):
#print(pileupRead.alignment.cigartuples)
if(len(pileupRead.alignment.cigartuples) == 1):
if(pileupRead.is_del):
base = "*"
Expand Down
1 change: 1 addition & 0 deletions scripts/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ if(cmd == "plink") {
scripts <- scripts[!ind]
}
tot <- length(scripts)
print(tot)
cmds <- paste(getwd(),"/job_scripts/",scripts,sep="")
batches <- batcher(cmds,batch.size)
if(tot > 0) {
Expand Down
5 changes: 5 additions & 0 deletions scripts/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ get.chromosomes <- function(config) {
if(config$gender == "female") {
chromosomes <- c(chromosomes,"X")
}
} else if(config$reference_identifier == "mm10") {
chromosomes <- paste("chr",1:19,sep="")
if(config$gender == "female") {
chromosomes <- c(chromosomes,"chrX")
}
} else {
stop("Cannot get chromosomes.")
}
Expand Down