Tiezheng Yuan Ph.D.: Python: Pandas(5) group data frame

Abstract: Statistics of the data frame using the function groupby().

#create a data frame

df = pd.DataFrame({

'A' : ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],

'B' : ['one', 'one', 'two', 'three','two', 'two', 'one', 'three'],

'C' : np.random.randn(8), 'D' : np.random.randn(8)*10})

print df.shape

print df

print '\ngroup data frame into dictionary using groupby():'

#group by the column known as 'A'

df_obj=df.groupby('A')

print df_obj.groups

print df_obj.size()

print df_obj.sum()

print df_obj.describe()

print 'group by two factors:'

df_obj=df.groupby(['A','B'])

print df_obj.groups

#aggregation based groupby

df_obj=df.groupby(['A','B'],as_index=False)

print df_obj.aggregate(np.sum)

print df_obj.aggregate([np.sum,np.mean])

#agg based on groupby

df_obj=df.groupby(['A','B'],as_index=False)

print df_obj.agg({'C':np.mean,'D':np.std})

Tiezheng Yuan Ph.D.