Abstract:Trim adapter’s sequence at the 3‘-end of reads
The points:
1.pass functions as a parameter into another function for reuse codes.
2. implement regular expression
3.consider more possible patterns when trim adapters.
The script:
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 5 19:27:31 2015
@author: yuan
"""
import itertools
import re
class trim_adapter:
def __init__(self,in_file, out_file):
self.in_file=in_file
self.out_file=out_file
self.adapter3='TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCCGTCCATCTCGTATGCCGTCTTCT'
self.len_adapter3=len(self.adapter3)
def read_fastq(self, FUN=None, *args):
#store accession: sequences
reads={}
out_obj=open(self.out_file,'w')
with open(self.in_file, 'r') as in_obj:
for line1, line2, line3, line4 in itertools.izip_longest(*[in_obj]*4):
#first line
line1=line1.rstrip('\n')
items=line1.split()[0]
items=items.split(':')
coordinate=items[-2]+'-'+items[-1]
#print items, coordinate
#second line: sequence
seq=line2.rstrip('\n')
#print seq
#ref information
if FUN is None:
reads[coordinate]=seq
else:
reads[coordinate]=FUN(seq)
if len(reads[coordinate])>6:
print reads[coordinate]
out_obj.write(coordinate+'\t'+reads[coordinate]+'\n')
in_obj.close()
out_obj.close()
return reads
def trim_3end(self, seq):
#print seq
flag=0
#all adapter sequence
index=re.search(self.adapter3, seq)
if index is not None:
trimmed_seq=seq[0:index.start()]
flag=1
else:
#partial adapter at the 3 end of seq
for i in range(6,self.len_adapter3+1)[::-1]:
part_adapter_seq=self.adapter3[0:i]
#print part_adapter_seq
index=re.search(r''+self.adapter3+r'\b', seq)
if index is not None:
trimmed_seq=seq[0:index.start()]
#print '=', trimmed_seq
flag=1
break
#
if flag==1:
seq=trimmed_seq
#print seq
return seq
if __name__=="__main__":
fq_file='/home/yuan/mysql_pre/cookbooks/test.fq'
out_file='/home/yuan/mysql_pre/cookbooks/test_trim.txt'
#my code
t=trim_adapter(fq_file,out_file)
t.read_fastq( t.trim_3end ) #Note should not be t.trim_3end()
print 'ok'
No comments:
Post a Comment