##查看缺失值,并且缺失的个数要从高到低排序miss = full.isnull().sum()#统计出空值的个数miss[miss>0].sort_values(ascending=True)#由低到高排好序full.info()#看空值数量
空值的填充与删除
对字符类型的进行填充
cols1 = ["PoolQC" , "MiscFeature", "Alley", "Fence", "FireplaceQu", "GarageQual", "GarageCond", "GarageFinish", "GarageYrBlt", "GarageType", "BsmtExposure", "BsmtCond", "BsmtQual", "BsmtFinType2", "BsmtFinType1", "MasVnrType"]for col in cols1: full[col].fillna("None",inplace=True)
对数值类型的进行填充
cols=["MasVnrArea", "BsmtUnfSF", "TotalBsmtSF", "GarageCars", "BsmtFinSF2", "BsmtFinSF1", "GarageArea"]for col in cols: full[col].fillna(0, inplace=True)
对某一列空值进行填充(用这一列的均值)
full["LotFrontage"].fillna(np.mean(full["LotFrontage"]),inplace=True)
对这些列进行众数填充
cols2 = ["MSZoning", "BsmtFullBath", "BsmtHalfBath", "Utilities", "Functional", "Electrical", "KitchenQual", "SaleType","Exterior1st", "Exterior2nd"]for col in cols2: full[col].fillna(full[col].mode()[0], inplace=True)
查看哪些是还没填充好的
full.isnull().sum()[full.isnull().sum()>0]##至此我们已经把空值填充好了