Friday, July 17, 2015

Python: data structure (3)


Abstract: Implement bitmap to store huge lists for saving memory

The advantage of bitmap required much less memory than a common list. The class includes the methods of creating a bitmap from a list, converting a bitmap to sorted list, adding an element into a bitmap, remove an element from a bitmap, and judge if an element is in a bitmap

The result:
Memory usage (byte) of 1000 numbers:
    The list: 1009
    The bitmap: 41
Memory usage (byte) of 8 numbers:
    The list: 17
    The bitmap: 23
[139, 2048, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 524288]
10001
4 5
Does 100 exists?: 1
Convert bitmap to list: [0, 1, 3, 7, 43, 96, 100, 435]
Add 334 into a bitmap: [0, 1, 3, 7, 43, 96, 100, 334, 435]
Remove 435 into a bitmap: [0, 1, 3, 7, 43, 96, 100, 334]



The script:
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 16 15:27:58 2015

@author: yuan
"""

#implement bitmap  to store list.
import sys
import math

#
class bitmap:
    def __init__(self, arr):
        self.num=len(arr)
        self.arr=arr
   
    def to_bitmap(self):
        bitmap_len=int(math.ceil(max(self.arr)/32.))
        bitmap=[0]*bitmap_len
        #print self.bitmap_len
        for v in self.arr:
            divide, remainder = divmod(v, 32)
            #print divide, remainder
            #print 'old', format(self.bitmap[divide], 'b')
            bitmap[divide]=(2**remainder)+bitmap[divide]
            #print 'add', remainder, format(2**remainder, 'b')
            #print 'final', format(self.bitmap[divide],'b')
            #print "\n"
        return bitmap

    def to_list(self, bitmap):
        mylist=[]
        bitmap_len=len(bitmap)
        for i in range(bitmap_len):
            bit_str=format(bitmap[i], 'b')[::-1]
            #print bit_str
            for index, element in enumerate(bit_str):
                a=0
                if int(element)==1:
                    a=i*32+index
                    mylist.append(a)
                #print index,element, a
        return mylist
       
    def test_element(self, bitmap, number):
        pool_remainder=0
        print bitmap
        bimap_len=len(bitmap)
        divide, remainder = divmod(number, 32)
        if(divide<=(bimap_len-1) and number>=0 ):
            #bitstring in bitmap       
            bit_str=format(bitmap[divide],'b')[::-1]
            print bit_str
            #the pos in bitamp of query number
            bit_pos=len(format(2**remainder,'b'))
            print remainder, bit_pos
            pool_remainder=bit_str[(bit_pos-1):bit_pos:1]
        return pool_remainder

    def add_element(self, bitmap, number):
        divide, remainder=divmod(number, 32)
        bitmap[divide] +=(2**remainder)
        return bitmap
   
    def remove_element(self, bitmap, number):
        divide, remainder=divmod(number, 32)
        bitmap[divide]=bitmap[divide]-(2**remainder)
        return bitmap           
#
sea=range(1000)
b=bitmap(sea)

#convert a list to a bitmap
mybit=b.to_bitmap()
print 'Memory usage (byte) of',len(sea), 'numbers:'
print '\tThe list:', sys.getsizeof(sea)/8
print '\tThe bitmap:', sys.getsizeof(mybit)/8

#
sea=[1,7,3,43,96,0,100,435]
b=bitmap(sea)
mybit=b.to_bitmap()
print 'Memory usage (byte) of', len(sea), 'numbers:'
print '\tThe list:', sys.getsizeof(sea)/8
print '\tThe bitmap:', sys.getsizeof(mybit)/8

#judge if a given number in a bitmap
number=100
judge=b.test_element(mybit,number)
print 'Does', number, 'exists?:', judge

#export list from bitmap
sorted_sea=b.to_list(mybit)
print 'Convert bitmap to list:', sorted_sea

#add
number=334
mylist=b.add_element(mybit,number)
sorted_sea=b.to_list(mybit)
print 'Add', number, 'into a bitmap:', sorted_sea

#remove
number=435
mylist=b.remove_element(mybit,number)
sorted_sea=b.to_list(mybit)
print 'Remove', number, 'into a bitmap:', sorted_sea

No comments:

Post a Comment