Python 建模步骤-编程学习网

#%%
#载入数据 、查看相关信息
import pandas as pd
import numpy as np
from  sklearn.preprocessing import LabelEncoder

print('第一步：加载、查看数据')

file_path = r'D:\train\201905data\liwang.csv'

band_data = pd.read_csv(file_path,encoding='UTF-8')

band_data.info()

band_data.shape

#%%
#
print('第二步：清洗、处理数据，某些数据可以使用数据库处理数据代替')

#数据清洗:缺失值处理：丢去、
#查看缺失值
band_data.isnull().sum

band_data = band_data.dropna()
#band_data = band_data.drop(['state'],axis=1)
# 去除空格
band_data['voice_mail_plan'] = band_data['voice_mail_plan'].map(lambda x: x.strip())
band_data['intl_plan'] = band_data['intl_plan'].map(lambda x: x.strip())
band_data['churned'] = band_data['churned'].map(lambda x: x.strip())
band_data['voice_mail_plan'] = band_data['voice_mail_plan'].map({'no':0, 'yes':1})
band_data.intl_plan = band_data.intl_plan.map({'no':0, 'yes':1})

for column in band_data.columns:
    if band_data[column].dtype == type(object):
        le = LabelEncoder()
        band_data[column] = le.fit_transform(band_data[column])

#band_data = band_data.drop(['phone_number'],axis=1)
#band_data['churned'] = band_data['churned'].replace([' True.',' False.'],[1,0])
#band_data['intl_plan'] = band_data['intl_plan'].replace([' yes',' no'],[1,0])
#band_data['voice_mail_plan'] = band_data['voice_mail_plan'].replace([' yes',' no'],[1,0])


#%%
# 模型  [重复、调优]
print('第三步：选择、训练模型')

x = band_data.drop(['churned'],axis=1)
y = band_data['churned']

from sklearn import model_selection
train,test,t_train,t_test = model_selection.train_test_split(x,y,test_size=0.3,random_state=1)

from sklearn import tree
model = tree.DecisionTreeClassifier(max_depth=2)
model.fit(train,t_train)

fea_res = pd.DataFrame(x.columns,columns=['features'])
fea_res['importance'] = model.feature_importances_

t_name= band_data['churned'].value_counts()
t_name.index

import graphviz

import os
os.environ["PATH"] += os.pathsep + r'D:\software\developmentEnvironment\graphviz-2.38\release\bin'

dot_data= tree.export_graphviz(model,out_file=None,feature_names=x.columns,max_depth=2,
                         class_names=t_name.index.astype(str),
                         filled=True, rounded=True,
                         special_characters=False)
graph = graphviz.Source(dot_data)
#graph
graph.render("dtr")

#%%
print('第四步：查看、分析模型')

#结果预测
res = model.predict(test)

#混淆矩阵
from sklearn.metrics import confusion_matrix
confmat = confusion_matrix(t_test,res)
print(confmat)

#分类指标 https://blog.csdn.net/akadiao/article/details/78788864
from sklearn.metrics import classification_report
print(classification_report(t_test,res))

#%%
print('第五步：保存模型')

from sklearn.externals import joblib
joblib.dump(model,r'D:\train\201905data\mymodel.model')

#%%
print('第六步：加载新数据、使用模型')
file_path_do = r'D:\train\201905data\do_liwang.csv'

deal_data = pd.read_csv(file_path_do,encoding='UTF-8')

#数据清洗:缺失值处理

deal_data = deal_data.dropna()
deal_data['voice_mail_plan'] = deal_data['voice_mail_plan'].map(lambda x: x.strip())
deal_data['intl_plan'] = deal_data['intl_plan'].map(lambda x: x.strip())
deal_data['churned'] = deal_data['churned'].map(lambda x: x.strip())
deal_data['voice_mail_plan'] = deal_data['voice_mail_plan'].map({'no':0, 'yes':1})
deal_data.intl_plan = deal_data.intl_plan.map({'no':0, 'yes':1})

for column in deal_data.columns:
    if deal_data[column].dtype == type(object):
        le = LabelEncoder()
        deal_data[column] = le.fit_transform(deal_data[column])
#数据清洗

#加载模型
model_file_path = r'D:\train\201905data\mymodel.model'
deal_model = joblib.load(model_file_path)
#预测
res = deal_model.predict(deal_data.drop(['churned'],axis=1))

#%%
print('第七步：执行模型，提供数据')
result_file_path = r'D:\train\201905data\result_liwang.csv'

deal_data.insert(1,'pre_result',res)
deal_data[['state','pre_result']].to_csv(result_file_path,sep=',',index=True,encoding='UTF-8')

文章详情

Python 建模步骤

软考中级精品资料免费领

相关文章

猜你喜欢

Python 建模步骤

Python api构建tensorrt加速模型的步骤详解

构建 Python 机器学习模型的八个步骤

python导入模块的步骤

python代码创建数据库步骤

hadoop伪分布模式搭建(详细步骤）

Python离线安装openpyxl模块的步骤

python模块安装的步骤是什么

OGG搭建步骤

python模块导入方式浅析步骤

搭建Python Web环境的详细步骤

Python 开发环境搭建十大步骤

linux搭建k8s的步骤步骤是什么

redis主备模式搭建的步骤是什么

python中argparse模块基础及使用步骤

Oracle重建awr步骤

python搭建web网站的步骤是什么

python搭建虚拟环境的步骤详解

搭建 Selenium+Python开发环境详细步骤

Python 构建 HTTP 服务器的8个步骤