Tuesday, July 28, 2015

python-bioinformatics: reverse complement of DNA


Abstract: The methods of my own code and biopython are introduced here.

My code allows some unlawful characters in DNA sequences.

The result:
Input DNA sequence: TIGTGTAactgtG3XGGT-CNcccAATGTCT
format DNA sequence: TNGTGTAACTGTGNGGTCNCCCAATGTCT
Reversed-complementary DNA: AGACATTGGGNGACCNCACAGTTACACNA
Input DNA sequence: 123NNNNNNNN
format DNA sequence: NNNNNNNN
Reversed-complementary DNA: NNNNNNNN
Biopython: AGACATTgggNGACCCacagtTACACA



The script:
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 28 09:46:16 2015

@author: yuan
"""
import string
import re
from Bio.Seq import Seq

class DNA:
  
    def __init__(self, DNA):
        self.DNA=DNA
 
    def format_DNA(self):
        #capitalize
        self.DNA=self.DNA.upper()
        #remove characters except A/T/G/C/N
        self.DNA=re.sub("[^A-Z]", "", self.DNA)
        #replace with N except A/T/C/G
        self.DNA=re.sub("[^A|T|C|G]", "N", self.DNA)
     
        return self.DNA
      
    def revcom_DNA(self):
      
        print 'Input DNA sequence:', self.DNA
        DNA=self.format_DNA()
        print 'format DNA sequence:', DNA
      
        #
        revcom_DNA=''
        if DNA!='':
            for base in DNA[::-1]:
                if base=='A':
                    revcom_DNA += 'T'
                elif base=='T':
                    revcom_DNA += 'A'
                elif base=='G':
                    revcom_DNA += 'C'
                elif base=='C':
                    revcom_DNA += 'G'
                else:
                    revcom_DNA +='N'
            print 'Reversed-complementary DNA:', revcom_DNA
        else:
            print 'Error: No DNA sequence input!'
        return revcom_DNA
      

if __name__ == "__main__":
    #
    seq='TIGTGTAactgtG3XGGT-CNcccAATGTCT'
    revcom_seq=DNA(seq).revcom_DNA()
  
    #
    seq='123NNNNNNNN'
    revcom_seq=DNA(seq).revcom_DNA()
  
    #use biopython
    seq='TGTGTAactgtGGGTCNcccAATGTCT'
    s=Seq(seq)
    print 'Biopython:', s.reverse_complement()

No comments:

Post a Comment