-
Notifications
You must be signed in to change notification settings - Fork 0
/
GC_content_hash
48 lines (40 loc) · 1.35 KB
/
GC_content_hash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# usr/bin/perl -w
use strict;
unless (@ARGV==2) { #@ARGV传给脚本的命令行参数列表
die"Usage: perl $0 <input.fa> <out.GCcontent\n"; #当命令行参数不是2的时候输出使用说明
}
my ($infile,$outfile) = @ARGV; #把命令行参数赋值给输入文件和输出文件
open(IN,$infile) || die "error: can't open infile: $infile"; #打开输入文件句柄IN
open(OUT,">$outfile") || die "error: can't make outfile: $outfile"; #打开输出文件句柄OUT
#申明哈希
my %count;
my $seq;
my $line=<IN>;
chomp($line);
print OUT "$line";
while($line=<IN>)
{
if($line !~ /^>/){
$line=~s/[\r\n\s+]//g;
$seq.=$line;
my @array=split("",$line);
for(my $i=0;$i<@array;$i++){
$count{$array[$i]}++;
}
}
elsif($line=~/^>(.*)\n$/){
my $len=$count{A}+$count{T}+$count{G}+$count{C};
my $GC=( $count{G} + $count{C} );
my $gc_count = $len ? $GC / $len : 0; #计算GC含量,如果长度为0,GC含量算0
print OUT "\t$len\t$gc_count\n$seq\n";
%count="";
$seq="";
print OUT ">$1";
}
}
my $len=$count{A}+$count{T}+$count{G}+$count{C};
my $GC=( $count{G} + $count{C} );
my $gc_count = $len ? $GC / $len : 0;
print OUT "\t$len\t$gc_count\n$seq";
close IN;
close OUT;