diff --git a/HISTORY.md b/HISTORY.md index 0c0f8a8..950571d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,11 +1,12 @@ -v0.1.16-dev -=========== +v0.1.16 +======= + `indexcov`: report and plot number of mapped and unmapped reads as reported by the index. + `covmed`: rename to `covstats` + `covstats`: report samplename(s) from read groups as well as bam path. + `covstats`: skip first 100K reads to give better estimates of depth. + `covstats`: report percent of bad (QC-Fail|Duplicate) and of umapped reads. - ++ `indexcov`: automatically exclude chromosomes that match pattern: `^chrEBV$|^NC|_random$|Un_|^HLA\-|_alt$|hap\d$`. + this can be adjust from the command-line. v0.1.15 ======= diff --git a/goleft.go b/goleft.go index 0b22519..877b714 100644 --- a/goleft.go +++ b/goleft.go @@ -1,3 +1,3 @@ package goleft -const Version = "0.1.16-dev" +const Version = "0.1.16" diff --git a/indexcov/functional-tests.sh b/indexcov/functional-tests.sh index 3e2eed9..13f6da0 100644 --- a/indexcov/functional-tests.sh +++ b/indexcov/functional-tests.sh @@ -43,6 +43,9 @@ run check_single_sample ./goleft_test indexcov -d /tmp/tt sample_name_0001.bam assert_exit_code 0 assert_in_stderr "not plotting" assert_equal $(num_colcounts /tmp/tt/tt-indexcov.ped) 1 +assert_equal $(zgrep -wc ^1 /tmp/tt-indexcov.bed.gz ) 14748 + + run check_sex ./goleft_test indexcov -d /tmp/tt --sex "X,Y" sample_name_0001.bam assert_exit_code 0 @@ -89,7 +92,13 @@ fi run check_cohort ./goleft_test indexcov -d /tmp/tt samples/*.bam assert_exit_code 0 assert_equal $(num_colcounts /tmp/tt/tt-indexcov.ped) 1 -exit + + +rm -f /tmp/tt/tt-indexcov.bed.gz +run check_exclude ./goleft_test indexcov --excludepatt '^1$' -d /tmp/tt samples/sample_paper_0001.bam +assert_equal $(zgrep -wc ^1 /tmp/tt/tt-indexcov.bed.gz ) 0 +assert_exit_code 0 +cat $STDERR_FILE run check_crai ./goleft_test indexcov --fai human_g1k_v37.fasta.fai -d /tmp/1kg NA21144.mapped.ILLUMINA.bwa.GIH.low_coverage.20130415.bam.cram.crai assert_exit_code 0 diff --git a/indexcov/indexcov.go b/indexcov/indexcov.go index ed71922..1539b61 100644 --- a/indexcov/indexcov.go +++ b/indexcov/indexcov.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "reflect" + "regexp" "sort" "strconv" "strings" @@ -34,14 +35,16 @@ import ( var Ploidy = 2 var cli = &struct { - Directory string `arg:"-d,required,help:directory for output files"` - IncludeGL bool `arg:"-e,help:plot GL chromosomes like: GL000201.1 which are not plotted by default"` - Sex string `arg:"-X,help:comma delimited names of the sex chromosome(s) used to infer sex. Set to '' if no sex chromosomes are present."` - Chrom string `arg:"-c,help:optional chromosome to extract depth. default is entire genome."` - Fai string `arg:"-f,help:fasta index file. Required when crais are used."` - Bam []string `arg:"positional,required,help:bam(s) or crais for which to estimate coverage"` - sex []string `arg:"-"` -}{Sex: "X,Y"} + Directory string `arg:"-d,required,help:directory for output files"` + IncludeGL bool `arg:"-e,help:plot GL chromosomes like: GL000201.1 which are not plotted by default"` + ExcludePatt string `arg:-p,help:regular expression of chromosome names to exclude"` + Sex string `arg:"-X,help:comma delimited names of the sex chromosome(s) used to infer sex. Set to '' if no sex chromosomes are present."` + Chrom string `arg:"-c,help:optional chromosome to extract depth. default is entire genome."` + Fai string `arg:"-f,help:fasta index file. Required when crais are used."` + Bam []string `arg:"positional,required,help:bam(s) or crais for which to estimate coverage"` + sex []string `arg:"-"` + exclude *regexp.Regexp `arg:"-"` +}{Sex: "X,Y", ExcludePatt: `^chrEBV$|^NC|_random$|Un_|^HLA\-|_alt$|hap\d$`} // MaxCN is the maximum normalized value. var MaxCN = float32(6) @@ -135,7 +138,7 @@ func tint(f float32) int { // CountsAtDepth calculates the count of items in depths that are at 100 * d func CountsAtDepth(depths []float32, counts []int) { if len(counts) != slots { - panic(fmt.Sprintf("indexcov: expecting counts to be length %d", slots)) + panic(fmt.Sprintf("indexcov: expecting counts to be length %d, was: %d", slots, len(counts))) } for _, d := range depths { counts[tint(d*(slots*float32(slotsMid))+0.5)]++ @@ -326,6 +329,9 @@ func Main() { if len(cli.Sex) > 0 { cli.sex = strings.Split(strings.TrimSpace(cli.Sex), ",") } + if cli.ExcludePatt != "" { + cli.exclude = regexp.MustCompile(cli.ExcludePatt) + } if exists, err := getDirectory(cli.Directory); err != nil || !exists { log.Fatalf("indexcov: error creating specified directory: %s, %s", cli.Directory, err) @@ -497,8 +503,13 @@ func run(refs []*sam.Reference, idxs []*Index, names []string, base string) (map chromNames := make([]string, 0, len(refs)) fmt.Fprintf(bgz, "#chrom\tstart\tend\t%s\n", strings.Join(names, "\t")) - for ir, ref := range refs { + ir := -1 + for _, ref := range refs { chrom := ref.Name() + if cli.exclude != nil && cli.exclude.Match([]byte(chrom)) { + continue + } + ir++ // Some samples may not have all the data, so we always take the longest sample for printing. longest, longesti := 0, 0