Abstract:
Here are the two equivalent codes for extract sequences from a FASTA
file. One implement Bio::SeqIO, and the other implement only standard
Perl codes.
The
style of Perl coding using Bio-perl:
#bioperl
style
sub
chr_info{
my
($infile)=@_;
#generate
genome_info_csv file
my
$in_obj = Bio::SeqIO->new(-file => $infile, -format =>
'fasta');
while
(my $seq_obj = $in_obj->next_seq() ) {#3
my
$chr=$seq_obj->display_id;
my
$chr_seq=$seq_obj->seq;
my
$chr_len=$seq_obj->length;
my
$GC_num=map {/G|C/gi} $chr_seq;
my
$GC_cont=int($GC_num*100/$chr_len+0.5);
my
$enzyme_num=map {/'GAATTC'/gi} $chr_seq;
print
join(',', $chr, $chr_len, $GC_cont, $enzyme_num, ), "\n";
}#
}
#only
standard Perl codes
sub
my_chr_info{
my
($infile)=@_;
#generate
genome_info_csv file
open
my($IN), "<", $infile or die ;
my($chr,$seq);
while(
defined ($chr=<$IN>) && defined ($seq=<$IN>) ){#3
chomp($chr,
$seq);
$chr=~s/^>//;
my
$seq_len=length($seq);
my
$GC_num=map {/G|C/gi} $seq;
my
$GC_cont=int($GC_num*100/$seq_len+0.5);
my
$enzyme_num=map {/'GAATTC'/gi} $seq;
print
join(',', $chr, $seq_len, $GC_cont, $enzyme_num, ), "\n";
}#
close($IN);
}
No comments:
Post a Comment