forked from johnomics/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vcf.jl
67 lines (55 loc) · 1.32 KB
/
vcf.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
using GZip
import Base.length
type VCFReader
v::IO
VCFReader(filename::String)=new(gzopen(filename))
end
function start(vcf::VCFReader)
readline(vcf.v)
return 0
end
next(vcf::VCFReader,snps)= readline(vcf.v), snps+1
done(vcf::VCFReader,snps) = eof(vcf.v)
type Scaffold
name::String
length::Int
snps::Vector{String}
Scaffold(scf::String,len::Int)=new(scf,len,String[])
Scaffold(scf::String)=Scaffold(scf,0)
Scaffold()=Scaffold("")
end
type Genome
vcf::VCFReader
length::Int
scaffolds::Array{Scaffold}
partsz::Int
function Genome(filename,np::Int)
l=0
s::Array{Scaffold}={}
v=VCFReader(filename)
for header in v
if ismatch(r"^#CHROM",header)
break
end
m=match(r"^##contig=<ID=(.+),length=(\d+)>$",header)
if m != nothing
scfname = m.captures[1]
scflen = int(m.captures[2])
push!(s,Scaffold(scfname,scflen))
l += scflen
end
end
new(v,l,s,int(l/np))
end
end
function outputScaffold(s::Scaffold)
println("$(s.name)\t$(s.length)")
end
function outputScaffolds(g::Genome)
for s in g.scaffolds
outputScaffold(s)
end
end
function length(g::Genome)
g.length
end