当前位置：首页 > news >正文

台湾省网站建设_网站建设公司_CSS_seo优化

news 2026/1/16 7:34:34

网站建设轮播图,网站优化价格查询,wordpress添加水印有必要,沈阳唐朝网络的服务内容本文介绍一些注意力机制的实现#xff0c;包括EA/MHSA/SK/DA/EPSA。【深度学习】注意力机制#xff08;一#xff09; 【深度学习】注意力机制#xff08;三#xff09; 目录一、EA#xff08;External Attention#xff09; 二、Multi Head Self Attention 三、…本文介绍一些注意力机制的实现包括EA/MHSA/SK/DA/EPSA。【深度学习】注意力机制一【深度学习】注意力机制三目录一、EAExternal Attention 二、Multi Head Self Attention 三、SKSelective Kernel Networks 四、DADual Attention 五、EPSAEfficient Pyramid Squeeze Attention 一、EAExternal Attention EA可以关注全局的空间信息论文论文地址如下图代码如下代码连接 import numpy as np import torch from torch import nn from torch.nn import initclass External_attention(nn.Module):Arguments:c (int): The input and output channel number.def __init__(self, c):super(External_attention, self).__init__()self.conv1 nn.Conv2d(c, c, 1)self.k 64self.linear_0 nn.Conv1d(c, self.k, 1, biasFalse)self.linear_1 nn.Conv1d(self.k, c, 1, biasFalse)self.linear_1.weight.data self.linear_0.weight.data.permute(1, 0, 2) self.conv2 nn.Sequential(nn.Conv2d(c, c, 1, biasFalse),norm_layer(c)) for m in self.modules():if isinstance(m, nn.Conv2d):n m.kernel_size[0] * m.kernel_size[1] * m.out_channelsm.weight.data.normal_(0, math.sqrt(2. / n))elif isinstance(m, nn.Conv1d):n m.kernel_size[0] * m.out_channelsm.weight.data.normal_(0, math.sqrt(2. / n))elif isinstance(m, _BatchNorm):m.weight.data.fill_(1)if m.bias is not None:m.bias.data.zero_()def forward(self, x):idn xx self.conv1(x)b, c, h, w x.size()n h*wx x.view(b, c, h*w) # b * c * n attn self.linear_0(x) # b, k, nattn F.softmax(attn, dim-1) # b, k, nattn attn / (1e-9 attn.sum(dim1, keepdimTrue)) # # b, k, nx self.linear_1(attn) # b, c, nx x.view(b, c, h, w)x self.conv2(x)x x idnx F.relu(x)return x 二、Multi Head Self Attention 注意力机制的经典Transformer的基石。论文论文地址如下图代码如下代码连接 import numpy as np import torch from torch import nn from torch.nn import initclass ScaledDotProductAttention(nn.Module):Scaled dot-product attentiondef __init__(self, d_model, d_k, d_v, h,dropout.1)::param d_model: Output dimensionality of the model:param d_k: Dimensionality of queries and keys:param d_v: Dimensionality of values:param h: Number of headssuper(ScaledDotProductAttention, self).__init__()self.fc_q nn.Linear(d_model, h * d_k)self.fc_k nn.Linear(d_model, h * d_k)self.fc_v nn.Linear(d_model, h * d_v)self.fc_o nn.Linear(h * d_v, d_model)self.dropoutnn.Dropout(dropout)self.d_model d_modelself.d_k d_kself.d_v d_vself.h hself.init_weights()def init_weights(self):for m in self.modules():if isinstance(m, nn.Conv2d):init.kaiming_normal_(m.weight, modefan_out)if m.bias is not None:init.constant_(m.bias, 0)elif isinstance(m, nn.BatchNorm2d):init.constant_(m.weight, 1)init.constant_(m.bias, 0)elif isinstance(m, nn.Linear):init.normal_(m.weight, std0.001)if m.bias is not None:init.constant_(m.bias, 0)def forward(self, queries, keys, values, attention_maskNone, attention_weightsNone):Computes:param queries: Queries (b_s, nq, d_model):param keys: Keys (b_s, nk, d_model):param values: Values (b_s, nk, d_model):param attention_mask: Mask over attention values (b_s, h, nq, nk). True indicates masking.:param attention_weights: Multiplicative weights for attention values (b_s, h, nq, nk).:return:b_s, nq queries.shape[:2]nk keys.shape[1]q self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k)k self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk)v self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v)att torch.matmul(q, k) / np.sqrt(self.d_k) # (b_s, h, nq, nk)if attention_weights is not None:att att * attention_weightsif attention_mask is not None:att att.masked_fill(attention_mask, -np.inf)att torch.softmax(att, -1)attself.dropout(att)out torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v) # (b_s, nq, h*d_v)out self.fc_o(out) # (b_s, nq, d_model)return out 三、SKSelective Kernel Networks SK是通道注意力机制。论文地址论文连接如下图代码如下代码连接 import numpy as np import torch from torch import nn from torch.nn import init from collections import OrderedDictclass SKAttention(nn.Module):def __init__(self, channel512,kernels[1,3,5,7],reduction16,group1,L32):super().__init__()self.dmax(L,channel//reduction)self.convsnn.ModuleList([])for k in kernels:self.convs.append(nn.Sequential(OrderedDict([(conv,nn.Conv2d(channel,channel,kernel_sizek,paddingk//2,groupsgroup)),(bn,nn.BatchNorm2d(channel)),(relu,nn.ReLU())])))self.fcnn.Linear(channel,self.d)self.fcsnn.ModuleList([])for i in range(len(kernels)):self.fcs.append(nn.Linear(self.d,channel))self.softmaxnn.Softmax(dim0)def forward(self, x):bs, c, _, _ x.size()conv_outs[]### splitfor conv in self.convs:conv_outs.append(conv(x))featstorch.stack(conv_outs,0)#k,bs,channel,h,w### fuseUsum(conv_outs) #bs,c,h,w### reduction channelSU.mean(-1).mean(-1) #bs,cZself.fc(S) #bs,d### calculate attention weightweights[]for fc in self.fcs:weightfc(Z)weights.append(weight.view(bs,c,1,1)) #bs,channelattention_weughtstorch.stack(weights,0)#k,bs,channel,1,1attention_weughtsself.softmax(attention_weughts)#k,bs,channel,1,1### fuseV(attention_weughts*feats).sum(0)return V四、DADual Attention DA融合了通道注意力和空间注意力机制。论文论文地址如下图代码代码连接 import numpy as np import torch from torch import nn from torch.nn import init from model.attention.SelfAttention import ScaledDotProductAttention from model.attention.SimplifiedSelfAttention import SimplifiedScaledDotProductAttentionclass PositionAttentionModule(nn.Module):def __init__(self,d_model512,kernel_size3,H7,W7):super().__init__()self.cnnnn.Conv2d(d_model,d_model,kernel_sizekernel_size,padding(kernel_size-1)//2)self.paScaledDotProductAttention(d_model,d_kd_model,d_vd_model,h1)def forward(self,x):bs,c,h,wx.shapeyself.cnn(x)yy.view(bs,c,-1).permute(0,2,1) #bs,h*w,cyself.pa(y,y,y) #bs,h*w,creturn yclass ChannelAttentionModule(nn.Module):def __init__(self,d_model512,kernel_size3,H7,W7):super().__init__()self.cnnnn.Conv2d(d_model,d_model,kernel_sizekernel_size,padding(kernel_size-1)//2)self.paSimplifiedScaledDotProductAttention(H*W,h1)def forward(self,x):bs,c,h,wx.shapeyself.cnn(x)yy.view(bs,c,-1) #bs,c,h*wyself.pa(y,y,y) #bs,c,h*wreturn yclass DAModule(nn.Module):def __init__(self,d_model512,kernel_size3,H7,W7):super().__init__()self.position_attention_modulePositionAttentionModule(d_model512,kernel_size3,H7,W7)self.channel_attention_moduleChannelAttentionModule(d_model512,kernel_size3,H7,W7)def forward(self,input):bs,c,h,winput.shapep_outself.position_attention_module(input)c_outself.channel_attention_module(input)p_outp_out.permute(0,2,1).view(bs,c,h,w)c_outc_out.view(bs,c,h,w)return p_outc_out 五、EPSAEfficient Pyramid Squeeze Attention 论文论文地址如下图代码如下代码连接 import torch.nn as nnclass SEWeightModule(nn.Module):def __init__(self, channels, reduction16):super(SEWeightModule, self).__init__()self.avg_pool nn.AdaptiveAvgPool2d(1)self.fc1 nn.Conv2d(channels, channels//reduction, kernel_size1, padding0)self.relu nn.ReLU(inplaceTrue)self.fc2 nn.Conv2d(channels//reduction, channels, kernel_size1, padding0)self.sigmoid nn.Sigmoid()def forward(self, x):out self.avg_pool(x)out self.fc1(out)out self.relu(out)out self.fc2(out)weight self.sigmoid(out)return weightdef conv(in_planes, out_planes, kernel_size3, stride1, padding1, dilation1, groups1):standard convolution with paddingreturn nn.Conv2d(in_planes, out_planes, kernel_sizekernel_size, stridestride,paddingpadding, dilationdilation, groupsgroups, biasFalse)def conv1x1(in_planes, out_planes, stride1):1x1 convolutionreturn nn.Conv2d(in_planes, out_planes, kernel_size1, stridestride, biasFalse)class PSAModule(nn.Module):def __init__(self, inplans, planes, conv_kernels[3, 5, 7, 9], stride1, conv_groups[1, 4, 8, 16]):super(PSAModule, self).__init__()self.conv_1 conv(inplans, planes//4, kernel_sizeconv_kernels[0], paddingconv_kernels[0]//2,stridestride, groupsconv_groups[0])self.conv_2 conv(inplans, planes//4, kernel_sizeconv_kernels[1], paddingconv_kernels[1]//2,stridestride, groupsconv_groups[1])self.conv_3 conv(inplans, planes//4, kernel_sizeconv_kernels[2], paddingconv_kernels[2]//2,stridestride, groupsconv_groups[2])self.conv_4 conv(inplans, planes//4, kernel_sizeconv_kernels[3], paddingconv_kernels[3]//2,stridestride, groupsconv_groups[3])self.se SEWeightModule(planes // 4)self.split_channel planes // 4self.softmax nn.Softmax(dim1)def forward(self, x):batch_size x.shape[0]x1 self.conv_1(x)x2 self.conv_2(x)x3 self.conv_3(x)x4 self.conv_4(x)feats torch.cat((x1, x2, x3, x4), dim1)feats feats.view(batch_size, 4, self.split_channel, feats.shape[2], feats.shape[3])x1_se self.se(x1)x2_se self.se(x2)x3_se self.se(x3)x4_se self.se(x4)x_se torch.cat((x1_se, x2_se, x3_se, x4_se), dim1)attention_vectors x_se.view(batch_size, 4, self.split_channel, 1, 1)attention_vectors self.softmax(attention_vectors)feats_weight feats * attention_vectorsfor i in range(4):x_se_weight_fp feats_weight[:, i, :, :]if i 0:out x_se_weight_fpelse:out torch.cat((x_se_weight_fp, out), 1)return out

查看全文

http://www.ihoyoo.com/news/99372.html