Tuesday, August 4, 2015

Bioinformatics in Python: enzyme digestion

Abstract: module the process of enzyme digestion

The result:
Positions of enzyme site: [1, 14, 34, 68]
fragments: ['GAATTCACTGATC', 'GAATTCGATTACGTATAGTA', 'GAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTC']
All possible fragments: (['GAATTCACTGATC', 'GAATTCGATTACGTATAGTA', 'GAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTC', 'GAATTCACTGATCGAATTCGATTACGTATAGTA', 'GAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTCTATCATACATATATATCGATGCGTTCATGAATTC', 'GAATTCACTGATCGAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCATGAATTC'], [13, 20, 34
, 6, 33, 54, 40, 67, 60])
ok

The script:
# -*- coding: utf-8 -*-
"""
Created on Sat Aug  1 12:02:06 2015

@author: yuan
"""
#module enzyme digestion

import re

class enzyme_digestion:
    def __init__(self, seq, enzyme, start=None, end=None):
        self.enzyme=enzyme
        self.seq_len=len(seq)
        if start is None or start<1 or start>=self.seq_len :
            self.start=0
        else:
            self.start=start-1
        if end is None or  end<=start:
            self.end=self.seq_len-1
        else:
            self.end=end-1
        self.seq=seq
        #print self.seq
        self.sites=[]
        self.fragment=[]
           
    def site_pos(self):
        #read enzyme sites into list
        for m in re.finditer(self.enzyme, self.seq):
            pos=m.start()+1
            if self.start <= pos <= self.end:
                self.sites.append(pos)
        return self.sites
   
    def fragmentation(self):
        if self.sites==[]:
            self.site_pos()
        #
        sites_num=len(self.sites)-1
        if self.sites[0]>self.start+1:
            first_site=self.sites[0]-1
            self.fragment.append(self.seq[self.start:first_site])
        for i in range(sites_num):
            site_start=self.sites[i]-1
            site_end=self.sites[i+1]-1
            self.fragment.append(self.seq[site_start:site_end])
        if self.sites[-1]<self.end:
            last_site=self.sites[-1]-1
            self.fragment.append(self.seq[last_site:self.end])
        return self.fragment
  
    def all_fragments(self):
        if self.fragment==[]:
            self.fragmentation()
           
        #all possible fragments including partial digestion
        all_fragments=[]
        all_fragments_len=[]
        for in_sites in range(1,len(self.sites)):
            start_index=0
            end_index=in_sites
            n=0
            while end_index<=len(self.sites):
                frag="".join(self.fragment[start_index:end_index])
                all_fragments.append(frag)
                all_fragments_len.append(len(frag))
                #print '===', in_sites, start_index, end_index, frag               
                start_index +=1
                end_index += 1
        return all_fragments, all_fragments_len

               
           
   

if __name__=="__main__":
    seq='GAATTCACTGATCGAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCATGAATTCT'
    e=enzyme_digestion(seq, 'GAATTC')
    print 'Positions of enzyme site:', e.site_pos()
    print 'fragments:', e.fragmentation()
    print 'All possible fragments:', e.all_fragments()
    print 'ok'
   
   

No comments:

Post a Comment