Friday, August 14, 2015

Python: File (4)

Abstract: The script for compress/decompress *.gz files and download files from web linkage.

The script:
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 14 12:47:11 2015

@author: yuan
"""
import gzip
import os
import urllib
import urllib2

class file_directory:
    def __init__(self, in_file):
        self.in_file=in_file
       
    def format_dir(self, directory):
         if not directory[-1] == '/':
            directory=directory+'/'
         if not os.path.isdir(directory):
             os.mkdir(directory, 0755)
         return directory
       
    def file_name(self):
        #file directory
        file_dir=self.in_file[:self.in_file.rindex('/')]+'/'
        #file name
        items=self.in_file.split("/")
        file_name=items[-1]
        if file_name.rfind(".")>0:
            name_head=file_name[:file_name.rfind(".")]
            name_tail=file_name[file_name.rfind("."):]
        else:
            name_head=file_name
            name_tail=''
        #print file_dir, file_name, name_head, name_tail
        return file_dir, file_name, name_head, name_tail
       
    def decompress_gz(self, out_dir=None):
        inF = gzip.GzipFile(self.in_file, 'rb')
        s = inF.read()
        inF.close()
        if out_dir is None:
            out_file=self.file_name()[0]+self.file_name()[2]
        else:
            out_dir=self.format_dir(out_dir)
            out_file=out_dir+self.file_name()[2]
        outF = file(out_file, 'wb')
        outF.write(s)
        outF.close()
        print 'Decompressed file:', out_file
        return out_file
   
    def compress_gz(self, out_dir=None):
        inF = file(self.in_file, 'rb')
        s = inF.read()
        inF.close()
        if out_dir is None:
            out_file=self.in_file+'.gz'
        else:
            out_dir=self.format_dir(out_dir)
            out_file=out_dir+self.file_name()[1]+'.gz'       
        outF = gzip.GzipFile(out_file, 'wb')
        outF.write(s)
        outF.close()
        print 'Compressed file:', out_file
        return out_file

    def download_file(self, out_dir):
        out_dir=self.format_dir(out_dir)
        out_file=out_dir+self.file_name()[1]
        #
        web_obj=urllib.URLopener()
        web_obj.retrieve(self.in_file, out_file)
        print "Download file %s and save it as %s" % (self.in_file, out_file)
        return out_file

       
if __name__=="__main__":
   
    #decompress a file
    in_file='/home/yuan/mysql_pre/cookbooks/test.gz'
    #file_directory(in_file).decompress_gz()
    #file_directory(in_file).decompress_gz('/home/yuan/mysql_pre/')

    #compress a file
    in_file='/home/yuan/mysql_pre/test'
    #file_directory(in_file).compress_gz()
    #file_directory(in_file).compress_gz('/home/yuan/')
   
    #download a file
    in_file='ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens/dna/CHECKSUMS'
    #file_directory(in_file).download_file('/home/yuan/mysql_pre/cookbooks/')

    in_file='ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.MT.fa.gz'
    gz_file=file_directory(in_file).download_file('/home/yuan/mysql_pre/cookbooks/')
    file_directory(gz_file).decompress_gz()
   
    print 'ok'
   

No comments:

Post a Comment