Friday, May 15, 2015

Python: file format

Abstract:Transformation among *.txt, *xls file formats

This script contains two class known as os_file and file_format. Both combined can finish:
1. Read txt file into dictionary
2. Convert *.txt/*.csv file into *xls file

Here is the script:


import os
import xlwt
import xlrd # work xls file
import csv
import shutil
import sh

######################
#class
class os_file:
def __init__(self, file):
self.file=file
def file_name(self):
items=self.file.split('/')
file_name=items[-1]
return file_name
def name_head(self):
file_name=self.file_name()
head, tail=file_name.split('.')
return(head)
def name_tail(self):
file_name=self.file_name()
head, tail=file_name.split('.')
return(tail)
def change_tail(self, tail):
head=self.name_head()
new_name=head+'.'+tail
return new_name
def file_size(self):
statinfo = os.stat(self.file)
file_size=statinfo.st_size
return(file_size)
def format_dir(self, dir):
if not dir[-1] == '/':
dir=dir+'/'
if not os.path.isdir(dir):
os.mkdir(dir, 0755)
return dir
def cp_file(self, dir):
shutil.copy(self.file, dir)
out_file=self.format_dir(dir)+self.file_name()
return out_file
def soft_cp(self, dir):
out_file=self.format_dir(dir)+self.file_name()
if os.path.isfile(out_file):
return 'no'
else:
shutil.copy(self.file, dir)
return out_file
######################################
#class
class file_format:
def __init__(self, infile):
self.infile=infile
self.OF=os_file(infile)
self.infile_tail=self.OF.name_tail()
if self.infile_tail == 'txt':
self.sep="\t"
elif self.infile_tail == 'csv':
self.sep=","
#self.outfile=outfile
def out_file(self, outdir="", file_tail=""):
if not os.path.isdir(outdir):
outdir=os.path.dirname(self.infile)
outfile=self.OF.format_dir(outdir)+self.OF.name_head()
if not file_tail=="":
outfile=outfile+'.'+file_tail
return outfile
def read_file(self):
arr=[]
in_obj=open(self.infile,'r')
for line in in_obj:
arr.append(line)
print line
in_obj.close()
return arr
def read_lines(self, num=1):
arr=[]
in_obj=open(self.infile,'r')
for i in range(num):
line=in_obj.readline()
if line=="":
break
else:
line=line.strip("\n")
arr.append(line)
in_obj.close()
return arr

def txt_to_nested_dict(self):
dict={}
in_obj=open(self.infile,'r')
first_line=in_obj.readline()
first_line=first_line.strip("\n")
header=first_line.split("\t")#colnames
header.pop(0)#remove the first item
#
for line in in_obj:
line=line.strip("\n")
items=line.split("\t")
rowname=items.pop(0)
dict2={}
for index, value in enumerate(items):
colname=header[index]
dict2[colname]=value
dict[rowname]=dict2
return dict
def gothrough_dict(self, dict):
for key in dict:
print key, dict[key], dict[key]['PC_DU145b']
def txt_to_xls(self, outdir=""):
outfile=self.out_file(outdir, 'xls')
#read infile
file_obj = open(self.infile, 'r')
row_list = []
for row in file_obj:
row_list.append(row.split())
column_list = zip(*row_list)
#set workbook and add sheet
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Sheet1')
i = 0
for column in column_list:
for item in range(len(column)):
worksheet.write(item, i, column[item])
workbook.save(outfile)
i+=1
return(outfile)
#############
#main program

ff=file_format('/home/yuan/data_2/test/test.txt')

#read the first 1 lines
print ff.read_lines(1)

#read txt into dict
dict=ff.txt_to_nested_dict()

#read dict
ff.gothrough_dict(dict)

#convert txt file to xls file
ff.txt_to_xls()

Writing date: 20150515

No comments:

Post a Comment