Thursday, March 31, 2016

Python: data structure (6)



Abstract: Storage of 2-dimension data into nested dictionary, numpy matrix, and pandas data frame.


The room used:
Size of dictionary (1000x1000) is 48 kB
Size of numpy matrix ((1000, 1000)) is 80 byte
Size of pandas data frame ((1000, 1000)) is 64 byte


Here is the codes:
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 31 10:20:30 2016



@author: yuan
"""



import numpy as np
import pandas as pd
import random
import sys
#get a nested dictionary
def nested_dict(Dimension=100):
data={}
for x in xrange(Dimension):
rowname='A'+str(x)
data[rowname]={}
for y in xrange(Dimension):
colname='B'+str(y)
data[rowname][colname]=random.randint(0,1000)
return data
#get a numpy matrix
def numpy_matrix(Dimension=100):
data=[]
for x in xrange(Dimension):
a=[]
for y in xrange(Dimension):
a.append(random.randint(0,100))
data.append(a )
data=np.array(data)
#print data
return data



#get panda data frame
def pandas_dataframe(Dimension=100):
data=pd.DataFrame(nested_dict(Dimension))
data=data.transpose()
return data
#
d=1000
dict_matrix=nested_dict(d)
size=int(sys.getsizeof(dict_matrix))/1024
print "Size of dictionary (%sx%s) is %d kB" % (d, d,size)



np_matrix=numpy_matrix(d)
#print np_matrix
size=int(sys.getsizeof(np_matrix))
print "Size of numpy matrix (%s) is %d byte" % (np_matrix.shape,size)



df_matrix=pandas_dataframe(d)
#print df_matrix
size=int(sys.getsizeof(df_matrix))
print "Size of pandas data frame (%s) is %d byte" % (df_matrix.shape,size)






print 'ok'





No comments:

Post a Comment