python学习笔记之读写excel文件

版权声明:未经本人许可,不得用于商业用途及传统媒体。转载请注明出处! https://blog.csdn.net/qikaihuting/article/details/84590436

python 处理excel数据的两种方式:

  • 首选pandas库里pandas.read_excel函数,相对比较简单
  • 其次使用xlrd库,感觉没有pandas好用
  • 如何写excel,后续更新
#coding=utf-8 
"""
Created on Wed Nov 28 18:39:17 2018
@author: **
"""
import xlrd
import xlwt
import pandas
import numpy as np

def pandas_parse_xls(filename, imgname_col_index = [1], sub_index = None):
    """
    reference: 
        http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html#pandas.read_excel
    func:
        parse the xlsx file into the ndarray or list
    args:
        imgname_col_index:, list of int, refer the col index of image name field
        mos_col_index: list of int, refer the col index of MOS field
        std_index: list of ints, refer the col index of subjetive score area
    return:
        list of imgname, MOS and std_value
    """
    #header=1,index_col=1,usecols=[1,3]
    #DataFrame:Attributes and underlying data
    #DataFrame.get_values()
    #DataFrame.values:Return a Numpy representation of the DataFrame.
    
    #区域对应的列索引列表,从第4列到第15列
    if sub_index is None:
        Sub_score_area = range(3,15)   
    
    #解析字符型数据字段时,注意将‘unicode’类型转为‘str’型,
    imgname_list = pandas.read_excel(filename, parse_cols = imgname_col_index).astype('str').values
    #将返回的数组降维,并转为列表
    imgname_list = np.squeeze(imgname_list).tolist()   
    
    ##读取数值区域,计算每行的标准差,返回一维标准差## 
    #<class 'pandas.core.frame.DataFrame'>            
    sub_DataFrame = pandas.read_excel(filename, parse_cols  = Sub_score_area)
    # 将DataFrame类型转为ndarray,二维数组
    sub_array = sub_DataFrame.values
    #计算标准差
    std_array = np.std(sub_array,axis=1,ddof=1)
                     
    return imgname_list, std_array


def xlrd_parse_xls(path_xls):
    """
    reference: https://xlrd.readthedocs.io/en/latest/api.html
    func:
        #Cell object in the given row and column.
        xlrd.sheet.cell(rowx,colx)
        #Value of the cell in the given row,column.
        xlrd.sheet.cell_value(rowx,colx) 
        #Returns a slice of the values of the cells in the given column.
        xlrd.sheet.col_values(index_col)  # the first index value is 1
        #Returns a slice of the values of the cells in the given row.
        xlrd.sheet.row_values(index_col) 
    """
    data = xlrd.open_workbook(path_xls)
    
    # get sheet
    table = data.sheets()[0] # equal to data.sheet_by_index(0)

    #将‘unicode’转为‘str’
    name_list = [item.encode('utf-8') for item in table.col_values(1,1)]
    
    score_list = table.col_values(15,1)

    #print(name_list,score_list)
    return name_list,score_list
       
#==============================================================================
#     result = []
#     #循环遍历读取
#     row ,col = table.nrows ,table.ncols
#     for i in range(1,2):
#         for j in range(1,row):
#             #get cell value
#             temp_str = table.cell_value(j,i)
#             print(temp_str)
#             result.append(temp_str)
#==============================================================================
def write_xls(dest_xls):
    #creat workbook
    work_book = xlwt.Workbook(encoding = 'ascii')
    work_sheet = work_book.add_sheet('sheet1')
    work_sheet.write(0, 0, label = 'Row 0, Column 0 Value') 
    work_book.save('Excel_Workbook.xls')
    
if __name__=='__main__':
    pandas_parse_xls('MOS.xlsx')
    xlrd_parse_xls('MOS.xlsx')


参考地址

猜你喜欢

转载自blog.csdn.net/qikaihuting/article/details/84590436