});

Wrapped Dataset 已包装数据集

来自皓文的S13-S47数据集

简介

采集区段是365到523环,原始数据为xlsx格式,为了方便python调用,包装为dataframe并储存为.pkl
当某段落出现滚刀磨损量为负值等异常情况时,将剔除上次开仓到本次开仓间的所有数据。

数据打包源码解释

用于理解打包后的dataframe格式

import pandas as pd
import pickle

df_total = pd.read_excel( r'###\x1.xlsx')

# 抽取对应样本区间
def pick_df(start_index, end_index):
    df = df_total.iloc[start_index-2:end_index-1] # 首尾缩进
    print('磨损量:', round(df['修正磨损量'].sum())) # 输出这一区段的修正磨损量总和,可用于检查该段落是否取对(每个开仓段磨损量总和是整数)
    return df

# 以S14段落整理代码为例,完整版见原文件
# 其他刀号类似
start_end_index = [[51,57],[58,81],[82,107]] # 按照Excel显示行号输入
df_S14 = pd.concat((pick_df(i[0],i[1]) for i in start_end_index), keys=list(range(1,len(start_end_index)+1))) # 此处的keys是按照开仓段落标记
# Out:
# 磨损量: 1
# 磨损量: 2
# 磨损量: 4

df_list = [df_S13,df_S14,df_S15,df_S16,df_S17,df_S18,df_S20,df_S21,df_S22,df_S23,df_S24,df_S25,df_S26,df_S27,df_S28,
             df_S29,df_S30,df_S31,df_S32,df_S33,df_S34,df_S35,df_S36,df_S37,df_S38,df_S39,df_S40,df_S41,df_S42,df_S43,
             df_S44,df_S45,df_S46,df_S47]

key_list=['S'+str(i) for i in range(13,48)]
key_list.remove('S19')

df_total_reset = pd.concat(df_list, keys=key_list)

# 写入测试
df_write=open(r'###\df_total_reset.pkl','wb')
pickle.dump(df_total_reset,df_write)
df_write.close()

# 读取测试
df_read=open(r'###\df_total_reset.pkl','rb')
df_total_reset=pickle.load(df_read)
df_read.close()

# 笔者推荐采用 'with open() as :' 格式
# 可以省略 .clos()语句,示例如下:
with open(PATH+'data#.pkl', mode='rb') as f:
data = pickle.load(f)