Abstract: read fasta file using mycode and biopython
The script:
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 28 11:42:06 2015
@author: yuan
"""
# GenBank gb|accession|locus
# EMBL Data Library emb|accession|locus
# DDBJ, DNA Database of Japan dbj|accession|locus
# NBRF PIR pir||entry
# Protein Research Foundation prf||name
# SWISS-PROT sp|accession|entry name
# Brookhaven Protein Data Bank pdb|entry|chain
# Patents pat|country|number
# GenInfo Backbone Id bbs|number
# General database identifier gnl|database|identifier
# NCBI Reference Sequence ref|accession|locus
# Local Sequence identifier lcl|identifier
import re
import itertools
#mycode
def read_fasta(file):
#store accession: sequences
ref={}
with open(file, 'r') as in_obj:
for line1,line2 in itertools.izip_longest(*[in_obj]*2):
#first line
line1=line1.rstrip('\n')
acc=0
if line1.find('|')>0:
items=line1.split('|')
acc=items[1]
else:
acc=re.sub("^>", "", line1)
#
line2=line2.rstrip('\n')
#ref information
ref[acc]=line2
print acc
in_obj.close()
return ref
#
def biopython_fasta(file):
from Bio import SeqIO
ref={}
in_obj = open(file, "r")
for record in SeqIO.parse(in_obj, "fasta") :
acc, seq=record.id, record.seq
ref[acc]=seq
print acc
in_obj.close()
return ref
if __name__=="__main__":
fa_file='/home/yuan/eRNA/ref_seq/human_Ensembl_GRCh38R80.fa'
#fa_file='/home/yuan/mysql_pre/cookbooks/test.fa'
#my code
ref=read_fasta(fa_file)
#biopython
ref=biopython_fasta(fa_file)
print 'ok'
No comments:
Post a Comment