Abstract: module the process of enzyme digestion
The result:
Positions of enzyme site: [1, 14, 34, 68]
fragments: ['GAATTCACTGATC', 'GAATTCGATTACGTATAGTA', 'GAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTC']
All possible fragments: (['GAATTCACTGATC', 'GAATTCGATTACGTATAGTA', 'GAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTC', 'GAATTCACTGATCGAATTCGATTACGTATAGTA', 'GAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTCTATCATACATATATATCGATGCGTTCATGAATTC', 'GAATTCACTGATCGAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCAT', 'GAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCATGAATTC'], [13, 20, 34, 6, 33, 54, 40, 67, 60])
ok
The script:
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 1 12:02:06 2015
@author: yuan
"""
#module enzyme digestion
import re
class enzyme_digestion:
def __init__(self, seq, enzyme, start=None, end=None):
self.enzyme=enzyme
self.seq_len=len(seq)
if start is None or start<1 or start>=self.seq_len :
self.start=0
else:
self.start=start-1
if end is None or end<=start:
self.end=self.seq_len-1
else:
self.end=end-1
self.seq=seq
#print self.seq
self.sites=[]
self.fragment=[]
def site_pos(self):
#read enzyme sites into list
for m in re.finditer(self.enzyme, self.seq):
pos=m.start()+1
if self.start <= pos <= self.end:
self.sites.append(pos)
return self.sites
def fragmentation(self):
if self.sites==[]:
self.site_pos()
#
sites_num=len(self.sites)-1
if self.sites[0]>self.start+1:
first_site=self.sites[0]-1
self.fragment.append(self.seq[self.start:first_site])
for i in range(sites_num):
site_start=self.sites[i]-1
site_end=self.sites[i+1]-1
self.fragment.append(self.seq[site_start:site_end])
if self.sites[-1]<self.end:
last_site=self.sites[-1]-1
self.fragment.append(self.seq[last_site:self.end])
return self.fragment
def all_fragments(self):
if self.fragment==[]:
self.fragmentation()
#all possible fragments including partial digestion
all_fragments=[]
all_fragments_len=[]
for in_sites in range(1,len(self.sites)):
start_index=0
end_index=in_sites
n=0
while end_index<=len(self.sites):
frag="".join(self.fragment[start_index:end_index])
all_fragments.append(frag)
all_fragments_len.append(len(frag))
#print '===', in_sites, start_index, end_index, frag
start_index +=1
end_index += 1
return all_fragments, all_fragments_len
if __name__=="__main__":
seq='GAATTCACTGATCGAATTCGATTACGTATAGTAGAATTCTATCATACATATATATCGATGCGTTCATGAATTCT'
e=enzyme_digestion(seq, 'GAATTC')
print 'Positions of enzyme site:', e.site_pos()
print 'fragments:', e.fragmentation()
print 'All possible fragments:', e.all_fragments()
print 'ok'
No comments:
Post a Comment