grouped = df['需要计算的列名'].groupby(df['需要分组的列名']).sum() --求和或者其他函数
小tips:数值区间筛选查询数量
小于10:
df[(df.列名<10)].count()
大于等于10,小于20:
df[(df.列名>=10)&(df.列名<20)].count() --重点:中间连接用 &
2、排序
df.sort_values(by='要排序的列',axis=0,ascending=False)
axis=0 按照列排序, =1 按照行排序
ascending=False 降序 , =True 升序
不知道理解题意是否正确,见如下代码:
group1 = {}group2 = {}
for item in raw_items_list:
if item['prov1'] not in group1.keys():
group1[item['prov1']] = list()
else:
group1[item['prov1']].append((item['count'],item['value']))
if item['prov2'] not in group2.keys():
group2[item['prov2']] = list()
else:
group2[item['prov2']].append((item['count'],item['value']))
# Now to compute the average of group1
for g in group1.keys():
value_list = group1[g]
count = 0.0
value = 0.0
for v in value_list: count += v[0]
for v in value_list: value += v[0] / count * v[1]
print 'Average of group1 - %s is: %f' (g, value / len(value_list))
# average of group2
# ...