Tuesday, July 28, 2015

python-Bioinformatics: translate DNA sequences


Abstract: my code and biopython

The result:
Input DNA sequence: TGTGTAactgtGGGTCcccAATGTCTC
My code: CVTVGPQCL
Biopython: CVTVGPQCL


The script:
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 28 10:27:58 2015

@author: yuan
"""

import string
import re
from Bio.Seq import Seq

class DNA:
   
    def __init__(self, DNA):
        self.DNA=DNA
    #Isoleucine:I; Leucine:L, Valine:V, Phenylalanine: F
    #Methionine:M     , Cysteine:C, Alanine:A, Glycine:G
    #Proline:P, Threonine:T, Serine:    S, Tyrosine:Y
    #Tryptophan:W, Glutamine:Q, Asparagine:N, Histidine:H
    #Glutamic acid:E, Aspartic acid:D, Lysine:K
    #Arginine:R
    #Stop codons     Stop     TAA, TAG, TGA
        self.DNA_codons={'ATT':'I', 'ATC':'I', 'ATA':'I',
        'CTT':'L', 'CTC':'L', 'CTA':'L', 'CTG':'L', 'TTA':'L', 'TTG':'L',
        'GTT':'V', 'GTC':'V', 'GTA':'V', 'GTG':'V',
        'TTT':'F', 'TTC':'F', 'ATG':'M', 'TGT':'C', 'TGC':'C',
        'GCT':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A',
        'GGT':'G', 'GGC':'G', 'GGA':'G', 'GGG':'G',
        'CCT':'P', 'CCC':'P', 'CCA':'P', 'CCG':'P',
        'ACT':'T', 'ACC':'T', 'ACA':'T', 'ACG':'T',
        'TCT':'S', 'TCC':'S', 'TCA':'S', 'TCG':'S', 'AGT':'S', 'AGC':'S',
        'TAT':'Y', 'TAC':'Y', 'TGG':'W', 'CAA':'Q', 'CAG':'Q',
        'AAT':'N', 'AAC':'N', 'CAT':'H', 'CAC':'H',
        'GAA':'E', 'GAG':'E', 'GAT':'D', 'GAC':'D', 'AAA':'K', 'AAG':'K',
        'CGT':'R', 'CGC':'R', 'CGA':'R', 'CGG':'R', 'AGA':'R', 'AGG':'R',
        'TAA':'.', 'TAG':'.', 'TGA':'.' }
  
    def format_DNA(self):
        #capitalize
        self.DNA=self.DNA.upper()
        #remove characters except A/T/G/C
        self.DNA=re.sub("[^A|T|C|G]", "", self.DNA)
      
        return self.DNA
       
    def translate_DNA(self, de=0):
        print 'Input DNA sequence:', self.DNA
        DNA=self.format_DNA()
       
        #slice DNA sequence
        #de=0,1,2
        n=de
        aa=''
        while n < len(DNA):
            coden=DNA[n:n+3]
            if coden in self.DNA_codons.keys():
                aa += self.DNA_codons[coden]
            else:
                aa += 'X'
            n +=3
        return aa
       

if __name__ == "__main__":
    #
    seq='TGTGTAactgtGGGTCcccAATGTCTC'
    aa_seq=DNA(seq).translate_DNA()
    print 'My code:', aa_seq
   
    #use biopython
    s=Seq(seq)
    print 'Biopython:', s.translate()

No comments:

Post a Comment