文章目录:
一.什么是过拟合
1.过拟合
2.过拟合解决方法
二.tensorflow+sklearn实现数字分类
三.dropout解决过拟合问题
四.总结
https://github.com/eastmountyxz/
AI-for-TensorFlow
https://github.com/eastmountyxz/
AI-for-Keras
学Python近八年,认识了很多大佬和朋友,感恩。作者的本意是帮助更多初学者入门,因此在github开源了所有代码,也在公众号同步更新。深知自己很菜,得拼命努力前行,编程也没有什么捷径,干就对了。希望未来能更透彻学习和撰写文章,也能在读博几年里学会真正的独立科研。同时非常感谢参考文献中的大佬们的文章和分享。
- https://blog.csdn.net/eastmount
activate tensorflow
pip install scikit-learn
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
# 加载数据data和target
digits = load_digits()
X = digits.data
y = digits.target
# 转换y为Binarizer 如果y是数字1则第二个长度放上1
y = LabelBinarizer().fit_transform(y)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
# 函数:输入变量 输入大小 输出大小 神经层名称 激励函数默认None
def add_layer(inputs, in_size, out_size, layer_name, activation_function=None):
# 权重为随机变量矩阵
Weights = tf.Variable(tf.random_normal([in_size, out_size])) #行*列
# 定义偏置 初始值增加0.1 每次训练中有变化
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) #1行多列
# 定义计算矩阵乘法 预测值
Wx_plus_b = tf.matmul(inputs, Weights) + biases
# 激活操作
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
tf.summary.histogram(layer_name+'/outputs', outputs)
return outputs
# 设置传入的值xs和ys
xs = tf.placeholder(tf.float32, [None, 64]) #8*8=64个点
ys = tf.placeholder(tf.float32, [None, 10]) #每个样本有10个输出
隐藏层L1:输入是64(load_digits数据集每个样本64个像素点),输出为100个,从而更好地展示过拟合的问题,激励函数为tanh。
输出层prediction:输入是100(L1的输出),输出是10,对应数字0-9,激励函数为softmax。
# 隐藏层 输入是8*8=64 输出是100 激励函数tanh
L1 = add_layer(xs, 64, 100, 'L1', activation_function=tf.nn.tanh)
# 输入是100 10个输出值 激励函数softmax常用于分类
prediction = add_layer(L1, 100, 10, 'L2', activation_function=tf.nn.softmax)
# 预测值与真实值误差
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) #loss
# 记录loss tensorboard显示变化曲线
tf.summary.scalar('loss', cross_entropy)
# 训练学习 学习效率通常小于1 这里设置为0.6可以进行对比
train_step = tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy) #减小误差
# 定义Session
sess = tf.Session()
# 合并所有summary
merged = tf.summary.merge_all()
# summary写入操作
train_writer = tf.summary.FileWriter('logs/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/test', sess.graph)
# 初始化
init = tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
# 训练
sess.run(train_step, feed_dict={xs:X_train, ys:y_train})
# 每隔50步输出一次结果
if i % 50 == 0:
# 运行和赋值
train_result = sess.run(merged,feed_dict={xs:X_train, ys:y_train})
test_result = sess.run(merged,feed_dict={xs:X_test, ys:y_test})
# 写入Tensorboard可视化
train_writer.add_summary(train_result, i)
test_writer.add_summary(test_result, i)
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 18 15:50:08 2019
@author: xiuzhang CSDN Eastmount
"""
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
#---------------------------------载入数据---------------------------------
# 加载数据data和target
digits = load_digits()
X = digits.data
y = digits.target
# 转换y为Binarizer 如果y是数字1则第二个长度放上1
y = LabelBinarizer().fit_transform(y)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
#---------------------------------定义神经层---------------------------------
# 函数:输入变量 输入大小 输出大小 神经层名称 激励函数默认None
def add_layer(inputs, in_size, out_size, layer_name, activation_function=None):
# 权重为随机变量矩阵
Weights = tf.Variable(tf.random_normal([in_size, out_size])) #行*列
# 定义偏置 初始值增加0.1 每次训练中有变化
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) #1行多列
# 定义计算矩阵乘法 预测值
Wx_plus_b = tf.matmul(inputs, Weights) + biases
# 激活操作
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
tf.summary.histogram(layer_name+'/outputs', outputs)
return outputs
#--------------------------------定义placeholder-------------------------------
# 设置传入的值xs和ys
xs = tf.placeholder(tf.float32, [None, 64]) #8*8=64个点
ys = tf.placeholder(tf.float32, [None, 10]) #每个样本有10个输出
#---------------------------------增加神经层---------------------------------
# 隐藏层 输入是8*8=64 输出是100 激励函数tanh
L1 = add_layer(xs, 64, 100, 'L1', activation_function=tf.nn.tanh)
# 输入是100 10个输出值 激励函数softmax常用于分类
prediction = add_layer(L1, 100, 10, 'L2', activation_function=tf.nn.softmax)
#------------------------------定义loss和训练-------------------------------
# 预测值与真实值误差
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) #loss
# 记录loss tensorboard显示变化曲线
tf.summary.scalar('loss', cross_entropy)
# 训练学习 学习效率通常小于1 这里设置为0.6可以进行对比
train_step = tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy) #减小误差
#-----------------------------------初始化-----------------------------------
# 定义Session
sess = tf.Session()
# 合并所有summary
merged = tf.summary.merge_all()
# summary写入操作
train_writer = tf.summary.FileWriter('logs/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/test', sess.graph)
# 初始化
init = tf.initialize_all_variables()
sess.run(init)
#---------------------------------神经网络学习---------------------------------
for i in range(1000):
# 训练
sess.run(train_step, feed_dict={xs:X_train, ys:y_train})
# 每隔50步输出一次结果
if i % 50 == 0:
# 运行和赋值
train_result = sess.run(merged,feed_dict={xs:X_train, ys:y_train})
test_result = sess.run(merged,feed_dict={xs:X_test, ys:y_test})
# 写入Tensorboard可视化
train_writer.add_summary(train_result, i)
test_writer.add_summary(test_result, i)
activate tensorflow
cd\
cd C:\Users\xiuzhang\Desktop\TensorFlow\blog
tensorboard --logdir=logs
- 重新关闭运行环境Spyder或删除logs再运行,怀疑是内存中有很多Placeholder需要释放。
- sess.run(train_step, feed_dict = {xs: batch_xs, ys: batch_ys}) 中 feed_dict 缺少参数,增加keep_prob:0.5。
keep_prob = tf.placeholder(tf.float32)
训练的时候保留50%的结果,keep_prob设置为0.5
输出loss记录的时候,需要显示所有的结果,故keep_prob设置为1.0
for i in range(1000):
# 训练 保留50%结果
sess.run(train_step, feed_dict={xs:X_train, ys:y_train, keep_prob:0.5})
# 每隔50步输出一次结果
if i % 50 == 0:
# 运行和赋值
train_result = sess.run(merged,feed_dict={xs:X_train, ys:y_train, keep_prob:1.0})
test_result = sess.run(merged,feed_dict={xs:X_test, ys:y_test, keep_prob:1.0})
# 写入Tensorboard可视化
train_writer.add_summary(train_result, i)
test_writer.add_summary(test_result, i)
# 函数:输入变量 输入大小 输出大小 神经层名称 激励函数默认None
def add_layer(inputs, in_size, out_size, layer_name, activation_function=None):
# 权重为随机变量矩阵
Weights = tf.Variable(tf.random_normal([in_size, out_size])) #行*列
# 定义偏置 初始值增加0.1 每次训练中有变化
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) #1行多列
# 定义计算矩阵乘法 预测值
Wx_plus_b = tf.matmul(inputs, Weights) + biases
# dropout
Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
# 激活操作
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
tf.summary.histogram(layer_name+'/outputs', outputs)
return outputs
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 18 15:50:08 2019
@author: xiuzhang CSDN Eastmount
"""
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
#---------------------------------载入数据---------------------------------
# 加载数据data和target
digits = load_digits()
X = digits.data
y = digits.target
# 转换y为Binarizer 如果y是数字1则第二个长度放上1
y = LabelBinarizer().fit_transform(y)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
#---------------------------------定义神经层---------------------------------
# 函数:输入变量 输入大小 输出大小 神经层名称 激励函数默认None
def add_layer(inputs, in_size, out_size, layer_name, activation_function=None):
# 权重为随机变量矩阵
Weights = tf.Variable(tf.random_normal([in_size, out_size])) #行*列
# 定义偏置 初始值增加0.1 每次训练中有变化
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) #1行多列
# 定义计算矩阵乘法 预测值
Wx_plus_b = tf.matmul(inputs, Weights) + biases
# dropout
Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
# 激活操作
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
tf.summary.histogram(layer_name+'/outputs', outputs)
return outputs
#--------------------------------定义placeholder-------------------------------
# 设置传入的值xs和ys
xs = tf.placeholder(tf.float32, [None, 64]) #8*8=64个点
ys = tf.placeholder(tf.float32, [None, 10]) #每个样本有10个输出
# keeping probability
keep_prob = tf.placeholder(tf.float32)
#---------------------------------增加神经层---------------------------------
# 隐藏层 输入是8*8=64 输出是100 激励函数tanh
L1 = add_layer(xs, 64, 50, 'L1', activation_function=tf.nn.tanh)
# 输入是100 10个输出值 激励函数softmax常用于分类
prediction = add_layer(L1, 50, 10, 'L2', activation_function=tf.nn.softmax)
#------------------------------定义loss和训练-------------------------------
# 预测值与真实值误差
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) #loss
# 记录loss tensorboard显示变化曲线
tf.summary.scalar('loss', cross_entropy)
# 训练学习 学习效率通常小于1 这里设置为0.6可以进行对比
train_step = tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy) #减小误差
#-----------------------------------初始化-----------------------------------
# 定义Session
sess = tf.Session()
# 合并所有summary
merged = tf.summary.merge_all()
# summary写入操作
train_writer = tf.summary.FileWriter('logs/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/test', sess.graph)
# 初始化
init = tf.initialize_all_variables()
sess.run(init)
#---------------------------------神经网络学习---------------------------------
for i in range(1000):
# 训练 保留50%结果
sess.run(train_step, feed_dict={xs:X_train, ys:y_train, keep_prob:0.5})
# 每隔50步输出一次结果
if i % 50 == 0:
# 运行和赋值
train_result = sess.run(merged,feed_dict={xs:X_train, ys:y_train, keep_prob:1.0})
test_result = sess.run(merged,feed_dict={xs:X_test, ys:y_test, keep_prob:1.0})
# 写入Tensorboard可视化
train_writer.add_summary(train_result, i)
test_writer.add_summary(test_result, i)
天行健,君子以自强不息。
地势坤,君子以厚德载物。
[1] 神经网络和机器学习基础入门分享 - 作者的文章
[2] 斯坦福机器学习视频NG教授:https://class.coursera.org/ml/class/index
[3] 书籍《游戏开发中的人工智能》、《游戏编程中的人工智能技术》
[4] 网易云莫烦老师视频(强推 我付费支持老师一波):https://study.163.com/course/courseLearn.htm?courseId=1003209007
[5] 神经网络激励函数 - deeplearning
[6] tensorflow架构 - NoMorningstar
[7] Tensorflow实现CNN用于MNIST识别 - siucaan
[8] MNIST手写体识别任务 - chen645096127
[9] https://github.com/siucaan/CNN_MNIST
[10] https://github.com/eastmountyxz/AI-for-TensorFlow