Abstract:
Truncation, reverse-complementary, translation of DNA sequence.
Two methods for
DNA/protein manipulations with Bio-perl module or self-defined
subroutines are present. Here is a Perl script to show how finish
those manipulations of DNA/protein sequences using Bio-perl.
#! /usr/bin/perl -w
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Perl;
#
my $DNA='CTTCATGTGATTGAGGATTTCATTGACCAGGACACCCCCTTGGAGCCTA';
print "Original DNA sequence: $DNA\n";
#objected-DNA
my $seq_obj=Bio::Seq->new(-seq=>$DNA, -display_id=>'test_DNA',
-alphabet=>'DNA');
#DNA manipulations
my $seq_len=$seq_obj->length();
print "Length of DNA sequence: $seq_len\n";
my $seq_trunc=$seq_obj->subseq(5, 10);
print "Truncated DNA sequence at 5-10: $seq_trunc\n";
#protein manipulations
my $pro_obj=$seq_obj->translate();
my $pro_seq=$pro_obj->seq();
my $pro_len=$pro_obj->length();
print "Translation of DNA sequence into protein: $pro_seq\n";
print "Number of amino acid sequence: $pro_len\n";
#reverse-complementary DNA
my $rev_obj=Bio::Perl::revcom($DNA);
my $rev_seq=$rev_obj->seq();
print "Reversed-complementary sequence: $rev_seq\n";
Here is the output:
Original DNA sequence:
CTTCATGTGATTGAGGATTTCATTGACCAGGACACCCCCTTGGAGCCTA
Length of DNA sequence: 49
Truncated DNA sequence at 5-10: ATGTGA
Translation of DNA sequence into protein: LHVIEDFIDQDTPLEP
Number of amino acid sequence: 16
Reversed-complementary sequence:
TAGGCTCCAAGGGGGTGTCCTGGTCAATGAAATCCTCAATCACATGAAG
OK
I give another
method known as DNA_translation() to translate DNA sequence into
protein sequence without Bio-Perl modules. This function ca meet your
special requirements for protein translation
sub DNA_translation{
my ($seq)=@_;
#separate DNA sequence by three letters
my @three_code=$seq=~/.../sg;
#Genetic code
#(Phe/F) Phenylalanine #(Trp/W) Tryptophan #(Gln/Q) Glutamine
#(Asn/N) Asparagine
#(Asp/D) Aspartic acid #(Glu/E) Glutamic acid #(Arg/R) Arginine
#(Lys/K) Lysine
#(Ser/S) Serine #(Tyr/Y) Tyrosine #(Cys/C) Cysteine #(Leu/L)
Leucine
#(Pro/P) Proline #(His/H) Histidine #(Ile/I) Isoleucine #(Thr/T)
Threonine
#(Ser/S) Serine #(Met/M) Methionine #(Val/V) Valine #(Ala/A)
Alanine #(Gly/G) Glycine
my %genetic_code=( TAA=>'.', TGA=>'.', TAG=>'.', ATG=>'M',
TGG=>'W',
CGT=>'R', CGC=>'R', CGA=>'R',
CGG=>'R',AGA=>'R',AGG=>'R',
TTA=>'L', TTG=>'L', CTT=>'L', CTC=>'L', CTA=>'L',
CTG=>'L',
TCT=>'S', TCC=>'S', TCA=>'S', TCG=>'S', AGT=>'S',
AGC=>'S',
GCC=>'A', GCT=>'A', GCA=>'A', GCG=>'A', GGT=>'G',
GGC=>'G', GGA=>'G', GGG=>'G',
CCT=>'P', CCC=>'P', CCA=>'P', CCG=>'P', GTT=>'V',
GTC=>'V', GTA=>'V', GTG=>'V',
ACT=>'T', ACC=>'T', ACA=>'T', ACG=>'T', ATT=>'I',
ATC=>'I', ATA=>'I', AAA=>'K', AAG=>'K',
GAA=>'E', GAG=>'E', GAT=>'D', GAC=>'D', TAT=>'Y',
TAC=>'Y', TGT=>'C', TGC=>'C',
TTT=>'F', TTC=>'F', CAT=>'H', CAC=>'H', CAA=>'Q',
CAG=>'Q', AAT=>'N', AAC=>'N',
);
#tranlation
my @aa;
foreach my $tc(@three_code){
my $amino_acid= (exists $genetic_code{$tc}) ? $genetic_code{$tc} :
'*';
push(@aa, $amino_acid);
}
my $aa_seq=join("", @aa);
print "$aa_seq\n";
return($aa_seq);
}
Here is another subroutine for reversed-complementary DNA
sub reverse_complement {
my $dna_seq = shift;
# reverse the DNA sequence
my $revcom = reverse($dna_seq);
# complement the reversed DNA sequence
$revcom =~
tr/ABCDGHMNRSTUVWXYabcdghmnrstuvwxy/TVGHCDKNYSAABWXRtvghcdknysaabwxr/;
return ($revcom);
}
Writing data: 2013.08.10
No comments:
Post a Comment