Skip to content

Commit

Permalink
indexcov: exclude funky chromosomes by default.
Browse files Browse the repository at this point in the history
This can be adjusted with the `--excludepatt` flag.

Closes #26.
  • Loading branch information
brentp committed May 5, 2017
1 parent 5a472aa commit 0f6ce83
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 15 deletions.
7 changes: 4 additions & 3 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
v0.1.16-dev
===========
v0.1.16
=======
+ `indexcov`: report and plot number of mapped and unmapped reads as reported by the index.
+ `covmed`: rename to `covstats`
+ `covstats`: report samplename(s) from read groups as well as bam path.
+ `covstats`: skip first 100K reads to give better estimates of depth.
+ `covstats`: report percent of bad (QC-Fail|Duplicate) and of umapped reads.

+ `indexcov`: automatically exclude chromosomes that match pattern: `^chrEBV$|^NC|_random$|Un_|^HLA\-|_alt$|hap\d$`.
this can be adjust from the command-line.

v0.1.15
=======
Expand Down
2 changes: 1 addition & 1 deletion goleft.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
package goleft

const Version = "0.1.16-dev"
const Version = "0.1.16"
11 changes: 10 additions & 1 deletion indexcov/functional-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ run check_single_sample ./goleft_test indexcov -d /tmp/tt sample_name_0001.bam
assert_exit_code 0
assert_in_stderr "not plotting"
assert_equal $(num_colcounts /tmp/tt/tt-indexcov.ped) 1
assert_equal $(zgrep -wc ^1 /tmp/tt-indexcov.bed.gz ) 14748



run check_sex ./goleft_test indexcov -d /tmp/tt --sex "X,Y" sample_name_0001.bam
assert_exit_code 0
Expand Down Expand Up @@ -89,7 +92,13 @@ fi
run check_cohort ./goleft_test indexcov -d /tmp/tt samples/*.bam
assert_exit_code 0
assert_equal $(num_colcounts /tmp/tt/tt-indexcov.ped) 1
exit


rm -f /tmp/tt/tt-indexcov.bed.gz
run check_exclude ./goleft_test indexcov --excludepatt '^1$' -d /tmp/tt samples/sample_paper_0001.bam
assert_equal $(zgrep -wc ^1 /tmp/tt/tt-indexcov.bed.gz ) 0
assert_exit_code 0
cat $STDERR_FILE

run check_crai ./goleft_test indexcov --fai human_g1k_v37.fasta.fai -d /tmp/1kg NA21144.mapped.ILLUMINA.bwa.GIH.low_coverage.20130415.bam.cram.crai
assert_exit_code 0
Expand Down
31 changes: 21 additions & 10 deletions indexcov/indexcov.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"reflect"
"regexp"
"sort"
"strconv"
"strings"
Expand All @@ -34,14 +35,16 @@ import (
var Ploidy = 2

var cli = &struct {
Directory string `arg:"-d,required,help:directory for output files"`
IncludeGL bool `arg:"-e,help:plot GL chromosomes like: GL000201.1 which are not plotted by default"`
Sex string `arg:"-X,help:comma delimited names of the sex chromosome(s) used to infer sex. Set to '' if no sex chromosomes are present."`
Chrom string `arg:"-c,help:optional chromosome to extract depth. default is entire genome."`
Fai string `arg:"-f,help:fasta index file. Required when crais are used."`
Bam []string `arg:"positional,required,help:bam(s) or crais for which to estimate coverage"`
sex []string `arg:"-"`
}{Sex: "X,Y"}
Directory string `arg:"-d,required,help:directory for output files"`
IncludeGL bool `arg:"-e,help:plot GL chromosomes like: GL000201.1 which are not plotted by default"`
ExcludePatt string `arg:-p,help:regular expression of chromosome names to exclude"`
Sex string `arg:"-X,help:comma delimited names of the sex chromosome(s) used to infer sex. Set to '' if no sex chromosomes are present."`
Chrom string `arg:"-c,help:optional chromosome to extract depth. default is entire genome."`
Fai string `arg:"-f,help:fasta index file. Required when crais are used."`
Bam []string `arg:"positional,required,help:bam(s) or crais for which to estimate coverage"`
sex []string `arg:"-"`
exclude *regexp.Regexp `arg:"-"`
}{Sex: "X,Y", ExcludePatt: `^chrEBV$|^NC|_random$|Un_|^HLA\-|_alt$|hap\d$`}

// MaxCN is the maximum normalized value.
var MaxCN = float32(6)
Expand Down Expand Up @@ -135,7 +138,7 @@ func tint(f float32) int {
// CountsAtDepth calculates the count of items in depths that are at 100 * d
func CountsAtDepth(depths []float32, counts []int) {
if len(counts) != slots {
panic(fmt.Sprintf("indexcov: expecting counts to be length %d", slots))
panic(fmt.Sprintf("indexcov: expecting counts to be length %d, was: %d", slots, len(counts)))
}
for _, d := range depths {
counts[tint(d*(slots*float32(slotsMid))+0.5)]++
Expand Down Expand Up @@ -326,6 +329,9 @@ func Main() {
if len(cli.Sex) > 0 {
cli.sex = strings.Split(strings.TrimSpace(cli.Sex), ",")
}
if cli.ExcludePatt != "" {
cli.exclude = regexp.MustCompile(cli.ExcludePatt)
}

if exists, err := getDirectory(cli.Directory); err != nil || !exists {
log.Fatalf("indexcov: error creating specified directory: %s, %s", cli.Directory, err)
Expand Down Expand Up @@ -497,8 +503,13 @@ func run(refs []*sam.Reference, idxs []*Index, names []string, base string) (map
chromNames := make([]string, 0, len(refs))

fmt.Fprintf(bgz, "#chrom\tstart\tend\t%s\n", strings.Join(names, "\t"))
for ir, ref := range refs {
ir := -1
for _, ref := range refs {
chrom := ref.Name()
if cli.exclude != nil && cli.exclude.Match([]byte(chrom)) {
continue
}
ir++
// Some samples may not have all the data, so we always take the longest sample for printing.
longest, longesti := 0, 0

Expand Down

0 comments on commit 0f6ce83

Please sign in to comment.