以下文章来源于机器学习算法那些事 ,作者石头
号主是大厂人工智能专家,专注于机器学习,深度学习以及计算机视觉等研究方向,每天会更新人工智能最前沿知识和分享自己的论文总结和学习笔记,让你系统化的学习每个知识点,每天进步一点点。
m3 = nn.BatchNorm2d(3, eps=0, momentum=0.5, affine=True, track_running_stats=True).cuda()
# 为了方便验证,设置模型参数的值
m3.running_mean = (torch.ones([3])*4).cuda() # 设置模型的均值是4
m3.running_var = (torch.ones([3])*2).cuda() # 设置模型的方差是2
# 查看模型参数的值
print('trainning:',m3.training)
print('running_mean:',m3.running_mean)
print('running_var:',m3.running_var)
# gamma对应模型的weight,默认值是1
print('weight:',m3.weight)
# gamma对应模型的bias,默认值是0
print('bias:',m3.bias)
#>
trainning: True
running_mean: tensor([4., 4., 4.], device='cuda:0')
running_var: tensor([2., 2., 2.], device='cuda:0')
weight: Parameter containing:
tensor([1., 1., 1.], device='cuda:0', requires_grad=True)
bias: Parameter containing:
tensor([0., 0., 0.], device='cuda:0', requires_grad=True)
# 生成通道3,416行416列的输入数据
torch.manual_seed(21)
input3 = torch.randn(1, 3, 416, 416).cuda()
# 输出第一个通道的数据
input3[0][0]
#>
tensor([[-0.2386, -1.0934, 0.1558, ..., -0.3553, -0.1205, -0.3859],
[ 0.2582, 0.2833, 0.7942, ..., 1.1228, 0.3332, -1.2364],
[-0.8235, -1.1512, -0.5026, ..., 0.9393, -0.5026, -0.4719],
...,
[-0.2843, -1.3638, -0.4599, ..., 1.6502, 0.4864, -0.1804],
[ 0.3813, -0.6426, 0.4879, ..., 2.7496, 1.8501, 1.7092],
[ 0.8221, -0.5702, 0.1705, ..., 1.0553, 1.0248, 0.5127]],
device='cuda:0')
# 数据归一化
output3 = m3(input3)
# 输出归一化后的第一个通道的数据
output3[0][0]
#>
tensor([[-0.2427, -1.0955, 0.1508, ..., -0.3592, -0.1249, -0.3897],
[ 0.2529, 0.2779, 0.7876, ..., 1.1154, 0.3277, -1.2382],
[-0.8262, -1.1531, -0.5061, ..., 0.9323, -0.5061, -0.4755],
...,
[-0.2884, -1.3652, -0.4635, ..., 1.6416, 0.4805, -0.1847],
[ 0.3757, -0.6458, 0.4820, ..., 2.7383, 1.8410, 1.7004],
[ 0.8154, -0.5735, 0.1654, ..., 1.0480, 1.0176, 0.5067]],
device='cuda:0', grad_fn=<SelectBackward>)
# 计算更新后的均值和方差
momentum = m3.momentum # 更新参数
# 更新均值
ex_new = (1 - momentum) * ex_old + momentum * obser_mean
# 更新方差
var_new = (1 - momentum) * var_old + momentum * obser_var
# 打印
print('ex_new:',ex_new)
print('var_new:',var_new)
#>
ex_new: tensor([2.0024, 2.0015, 2.0007], device='cuda:0')
var_new: tensor([1.5024, 1.4949, 1.5012], device='cuda:0')
# 输入数据的均值
obser_mean = torch.Tensor([input3[0][i].mean() for i in range(3)]).cuda()
# 输入数据的方差
obser_var = torch.Tensor([input3[0][i].var() for i in range(3)]).cuda()
# 编码归一化
output3_source = (input3[0][0] - obser_mean[0])/(pow(obser_var[0] + m3.eps,0.5))
output3_source
#>
tensor([[-0.2427, -1.0955, 0.1508, ..., -0.3592, -0.1249, -0.3897],
[ 0.2529, 0.2779, 0.7876, ..., 1.1154, 0.3277, -1.2382],
[-0.8262, -1.1531, -0.5061, ..., 0.9323, -0.5061, -0.4755],
...,
[-0.2884, -1.3652, -0.4635, ..., 1.6416, 0.4805, -0.1847],
[ 0.3757, -0.6458, 0.4820, ..., 2.7383, 1.8410, 1.7004],
[ 0.8154, -0.5735, 0.1654, ..., 1.0480, 1.0176, 0.5067]],
device='cuda:0')
m3.running_mean,m3.running_var
#>
(tensor([2.0024, 2.0015, 2.0007], device='cuda:0'),
tensor([1.5024, 1.4949, 1.5012], device='cuda:0'))
# 初始化模型,并设置模型处于测试阶段
import torch
import torch.nn as nn
m3 = nn.BatchNorm2d(3, eps=0, momentum=0.5, affine=True, track_running_stats=True).cuda()
# 测试阶段
m3.eval()
# 为了方便验证,设置模型参数的值
m3.running_mean = (torch.ones([3])*4).cuda() # 设置模型的均值是4
m3.running_var = (torch.ones([3])*2).cuda() # 设置模型的方差是2
# 查看模型参数的值
print('trainning:',m3.training)
print('running_mean:',m3.running_mean)
print('running_var:',m3.running_var)
# gamma对应模型的weight,默认值是1
print('weight:',m3.weight)
# gamma对应模型的bias,默认值是0
print('bias:',m3.bias)
#>
trainning: False
running_mean: tensor([4., 4., 4.], device='cuda:0')
running_var: tensor([2., 2., 2.], device='cuda:0')
weight: Parameter containing:
tensor([1., 1., 1.], device='cuda:0', requires_grad=True)
bias: Parameter containing:
tensor([0., 0., 0.], device='cuda:0', requires_grad=True)
# 初始化输入数据,并计算输入数据的均值和方差
# 生成通道3,416行416列的输入数据
torch.manual_seed(21)
input3 = torch.randn(1, 3, 416, 416).cuda()
# 输入数据的均值
obser_mean = torch.Tensor([input3[0][i].mean() for i in range(3)]).cuda()
# 输入数据的方差
obser_var = torch.Tensor([input3[0][i].var() for i in range(3)]).cuda()
# 打印
print('obser_mean:',obser_mean)
print('obser_var:',obser_var)
#>
obser_mean: tensor([0.0047, 0.0029, 0.0014], device='cuda:0')
obser_var: tensor([1.0048, 0.9898, 1.0024], device='cuda:0')
# 数据归一化
output3 = m3(input3)
# 输出归一化后的第一个通道的数据
output3[0][0]
#>
tensor([[-2.9971, -3.6016, -2.7182, ..., -3.0797, -2.9136, -3.1013],
[-2.6459, -2.6281, -2.2668, ..., -2.0345, -2.5928, -3.7027],
[-3.4107, -3.6424, -3.1838, ..., -2.1642, -3.1838, -3.1621],
...,
[-3.0295, -3.7928, -3.1536, ..., -1.6615, -2.4845, -2.9560],
[-2.5588, -3.2828, -2.4834, ..., -0.8842, -1.5202, -1.6199],
[-2.2471, -3.2316, -2.7078, ..., -2.0822, -2.1038, -2.4659]],
device='cuda:0', grad_fn=<SelectBackward>)
# 归一化函数实现
output3_source = (input3[0][0] - m3.running_mean[0])/(pow(m3.running_var[0] + m3.eps,0.5))
output3_source
#>
tensor([[-2.9971, -3.6016, -2.7182, ..., -3.0797, -2.9136, -3.1013],
[-2.6459, -2.6281, -2.2668, ..., -2.0345, -2.5928, -3.7027],
[-3.4107, -3.6424, -3.1838, ..., -2.1642, -3.1838, -3.1621],
...,
[-3.0295, -3.7928, -3.1536, ..., -1.6615, -2.4845, -2.9560],
[-2.5588, -3.2828, -2.4834, ..., -0.8842, -1.5202, -1.6199],
[-2.2471, -3.2316, -2.7078, ..., -2.0822, -2.1038, -2.4659]],
device='cuda:0')
# 查看模型的running_mean和running_var
print(m3.running_mean,m3.running_var)
#>
tensor([4., 4., 4.], device='cuda:0') tensor([2., 2., 2.], device='cuda:0')
往期精彩回顾
交流群
欢迎加入机器学习爱好者微信群一起和同行交流,目前有机器学习交流群、博士群、博士申报交流、CV、NLP等微信群,请扫描下面的微信号加群,备注:”昵称-学校/公司-研究方向“,例如:”张小明-浙大-CV“。请按照格式备注,否则不予通过。添加成功后会根据研究方向邀请进入相关微信群。请勿在群内发送广告,否则会请出群,谢谢理解~(也可以加入机器学习交流qq群772479961)