
昨天寫(xiě)了一段用來(lái)做分層隨機(jī)抽樣的代碼,很粗糙,不過(guò)用公司的2萬(wàn)名導(dǎo)購(gòu)名單試了一下,結(jié)果感人,我覺(jué)得此刻的我已經(jīng)要上天了,哈哈哈哈哈哈

代碼如下:
#分層隨機(jī)抽樣 stratified sampling
import xlrd, xlwt, time, random
xl = xlrd.open_workbook(r'C:\Users\Administrator\Desktop\分層抽樣.xlsx')
xl_sht1 = xl.sheets()[0]
xl_sht1_nrows = xl_sht1.nrows
#表頭
title = xl_sht1.row_values(0)
#把樣本寫(xiě)進(jìn)列表 sample
sample = []
for i in range(xl_sht1_nrows):
sample.append(xl_sht1.row_values(i))
#打亂樣本
random.shuffle(sample)
#把層的內(nèi)容寫(xiě)進(jìn)列表 col
col = xl_sht1.col_values(0)
#對(duì)col中的內(nèi)容進(jìn)行計(jì)數(shù),獲得每一類(lèi)的名稱對(duì)應(yīng)個(gè)數(shù)的字典
col_dict = {}
for i in col:
col_dict[i] = col_dict.get(i, 0) + 1
p = eval(input('每層抽取的比例(小數(shù)):'))
#獲得每一類(lèi)的名稱對(duì)應(yīng)抽取個(gè)數(shù)的字典
col_p = {}
k = 0
for i in col_dict.keys():
col_p[i] = int(round(col_dict[i] * p)) #round用來(lái)四舍五入,不加int結(jié)果會(huì)變成無(wú)數(shù)個(gè)p
#開(kāi)始抽樣,把抽取結(jié)果寫(xiě)進(jìn)result_l列表
result_l = []
for i in sample:
if col_p.get(i[0], 0) > 0:
result_l.append(i)
col_p[i[0]] -= 1
#新建文檔,寫(xiě)入結(jié)果
f = xlwt.Workbook()
f_sht1 = f.add_sheet('result')
n = 0
for i in title:
f_sht1.write(0, n, i)
n += 1
r = 1
for i in result_l:
c = 0
for k in i:
f_sht1.write(r, c, k)
c += 1
r += 1
nowtime = time.strftime("%Y%m%d_%H%M%S", time.localtime())
f.save(r'C:\Users\Administrator\Desktop\Result_{0}.xls'.format(nowtime))
print('\n抽樣成功!')
input()