|
03、基于Keras的人工神经网络解决鸢尾花分类问题的代码实现
准确率达到了98%
神经网络分类问题的经典数据(集鸢尾花数据集)介绍,神经网络Python库Keras的介绍
使用Pandas读取鸢尾花数据集, 使用LabelEncoder对类别标签进行编码
使用Keras创建一个用于鸢尾花分类识别的神经网络
训练用于鸢尾花分类的神经网络 解读训练输出的日志 了解如何评价神经网络的性能
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
TypeError: 'int' object is not iterable
损失函数并不使用测试数据(test/validation data)来衡量网络的性能
损失函数用来指导网络的训练过程!使得网络的参数向损失降低的方向改变
分类问题的损失函数:hinge loss (SVM里用到) cross-entropy loss 交叉熵
One-Hot encoding 哑编码 独热编码
非概率的解释:hinge loss
概率解释:将输出转换为概率函数 softmax probability
示例:手动计算3分类问题的accuracy、交叉熵损失、mse均方差损失
东方老师AI官网:http://www.ai111.vip
有任何问题可联系东方老师微信:dfy_88888
【微信二维码图片】
- # -*- coding: utf-8 -*-
- __author__ = u'东方耀 微信:dfy_88888'
- __date__ = '2019/10/21 9:38'
- __product__ = 'PyCharm'
- __filename__ = '人工神经网络解决鸢尾花分类'
- import numpy as np
- import pandas as pd
- from keras.models import Sequential
- from keras.layers import Dense
- from keras.optimizers import Adam, SGD
- from keras.wrappers.scikit_learn import KerasClassifier
- from keras.utils import np_utils
- from sklearn.model_selection import cross_val_score
- # k 折 交叉验证
- from sklearn.model_selection import KFold
- # 将分类的字符串 变成 数字 0 1 2 三个类别
- from sklearn.preprocessing import LabelEncoder
- # 保存模型为json 利用json文件预测
- from keras.models import model_from_json
- from sklearn import datasets
- from sklearn.model_selection import train_test_split
- from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
- from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
- from sklearn.metrics import mean_squared_error, mean_absolute_error
- SEED = 666
- np.random.seed(SEED)
- # load data
- # header=0(default)表示第一行为标题行 header=None时表示原始文件没有表头数据
- df = pd.read_csv(filepath_or_buffer='datasets/iris.csv', header=0)
- # iris = datasets.load_iris()
- # print(iris.keys())
- X = df.values[:, 1:5].astype(float)
- y = df.values[:, 5]
- print(X.shape, y.ndim)
- print(X[:3])
- print(y[:10])
- encoder = LabelEncoder()
- y = encoder.fit_transform(y)
- print(y, y.ndim)
- Y_onehot = np_utils.to_categorical(y, num_classes=3)
- print(Y_onehot.shape, Y_onehot.ndim)
- print(Y_onehot[:5, :])
- # X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=SEED)
- # log_reg = LogisticRegressionCV(cv=5, n_jobs=-1, verbose=0, multi_class='ovr')
- # log_reg = LogisticRegression()
- # log_reg.fit(X_train, y_train)
- # y_test_predict = log_reg.predict(X_test)
- # print(log_reg.score(X_test, y_test))
- # print(accuracy_score(y_test, y_test_predict)) # 0.9473684210526315
- # print(confusion_matrix(y_test, y_test_predict))
- # print(precision_score(y_test, y_test_predict, average='micro'))
- # print(recall_score(y_test, y_test_predict, average='micro'))
- # 定义神经网络结构
- def baseline_model():
- model = Sequential(name='baseline_model_dfy')
- # input_dim=4 or input_shape=(4,) 都是可以的
- model.add(Dense(units=7, activation='tanh', input_dim=4))
- model.add(Dense(units=3, activation='softmax'))
- # 分类问题的损失 交叉熵 'categorical_crossentropy' accuracy 92% hinge loss
- # 回归问题的损失 'mean_squared_error' accuracy 71.3%
- # epochs 从20 调大到 200 accuracy 92% ---> 98%
- # 隐藏层神经元units=7 调大到 128 accuracy 98% ---> 98% 还是恢复为7
- model.compile(optimizer=SGD(learning_rate=0.1), loss='categorical_crossentropy', metrics=['accuracy'])
- return model
- # from keras.wrappers.scikit_learn import KerasClassifier
- estimator = KerasClassifier(build_fn=baseline_model, epochs=20, batch_size=10, verbose=1)
- # evalute
- kfold = KFold(n_splits=10, shuffle=True, random_state=SEED)
- result = cross_val_score(estimator, X, Y_onehot, cv=kfold)
- print(baseline_model().summary())
- print('Accuracy of cv:', result)
- print('Accuracy of CV mean:%.2f, std:%.3f' % (result.mean(), result.std()))
- # train model
- estimator.fit(X, Y_onehot)
- print('model accuracy score:', estimator.score(X, Y_onehot))
- # save model
- model_json = estimator.model.to_json()
- with open('model.json', 'w') as json_file:
- json_file.write(model_json)
- estimator.model.save_weights('model_weights.h5')
- print('saved保存了 model to disk! Done')
- # laod model and use it for predict
- with open('model.json', 'r') as file:
- loaded_model_json = file.read()
- loaded_model = model_from_json(loaded_model_json)
- loaded_model.load_weights('model_weights.h5')
- print('loaded加载了 model from disk! Done')
- predicted = loaded_model.predict(X)
- print('predicted probability:', predicted)
- predicted_label = loaded_model.predict_classes(X)
- print('predicted预测的 label:\n' + str(predicted_label))
- print('实际值 label:\n' + str(y))
- print('预测的准确度:', accuracy_score(y, predicted_label))
- print(confusion_matrix(y, predicted_label))
- print('预测的精准率:', precision_score(y, predicted_label, average='micro'))
- print('预测的召回率:', recall_score(y, predicted_label, average='micro'))
- print('预测的F1 score:', f1_score(y, predicted_label, average='micro'))
- # print('roc_auc(area under curve):', roc_auc_score(y, y_score=?))
复制代码
|
-
01tanh.png
(658.01 KB, 下载次数: 104)
-
02softmax.png
(741.43 KB, 下载次数: 105)
-
03softmax.png
(731.82 KB, 下载次数: 101)
-
04softmax.png
(703.61 KB, 下载次数: 105)
-
05.png
(511.82 KB, 下载次数: 102)
-
06.png
(226.95 KB, 下载次数: 108)
-
07.png
(278.61 KB, 下载次数: 101)
-
08.png
(288.33 KB, 下载次数: 102)
-
09.png
(194.31 KB, 下载次数: 104)
-
10.png
(148.74 KB, 下载次数: 99)
-
11.png
(340.25 KB, 下载次数: 104)
-
12.png
(230.68 KB, 下载次数: 104)
-
13.png
(199.72 KB, 下载次数: 103)
-
14.png
(190.53 KB, 下载次数: 99)
-
15.png
(241.03 KB, 下载次数: 105)
-
-
iris.csv
4.86 KB, 阅读权限: 10, 下载次数: 1
|