零基础入门深度学习(4) - 卷积神经网络

前向传播:

\begin{align}\notag x_j^l = f(\sum_ {i\in M_j} x_i^{l-1} * k_{ij}^l + b_j^l)\end{align}

def forward(self, input_array):
        '''
        计算卷积层的输出
        输出结果保存在self.output_array
        '''
        self.input_array = input_array
        self.padded_input_array = padding(input_array, self.zero_padding)
        for f in range(self.filter_number):
            filter = self.filters[f]
            conv(self.padded_input_array, filter.get_weights(), self.output_array[f], self.stride, filter.get_bias())
        element_wise_op(self.output_array, self.activator.forward)

# 计算卷积
def conv(input_array, kernel_array, output_array, stride, bias):
    '''
    计算卷积,自动适配输入为2D和3D的情况
    '''
    channel_number = input_array.ndim
    output_width = output_array.shape[1]
    output_height = output_array.shape[0]
    kernel_width = kernel_array.shape[-1]
    kernel_height = kernel_array.shape[-2]
    for i in range(output_height):
        for j in range(output_width):
            output_array[i][j] = (get_patch(input_array, i, j, kernel_width, kernel_height, stride) * kernel_array).sum() + bias

# 获取卷积区域
def get_patch(input_array, i, j, filter_width, filter_height, stride):
    '''
    从输入数组中获取本次卷积的区域,
    自动适配输入为2D和3D的情况
    '''
    start_i = i * stride
    start_j = j * stride
    if input_array.ndim == 2:
        return input_array[start_i: start_i + filter_height, start_j: start_j + filter_width]
    elif input_array.ndim == 3:
        return input_array[:, start_i: start_i + filter_height, start_j: start_j + filter_width]

前向传播:

\begin{align}\notag x_j^l = f(\beta_j^l down(x_j^{l-1}) + b_j^l)\end{align}

def forward(self, input_array):
        for d in range(self.channel_number):
            for i in range(int(self.output_height)):
                for j in range(int(self.output_width)):
                    self.output_array[d, i, j] = (get_patch(input_array[d], i, j, self.filter_width, self.filter_height, self.stride).max())

反向传播:

\begin{align}\notag\delta_j^l = f^\prime(u_j^l)\circ conv2(\delta_j^{l+1},rot180(k_j^{l+1}),‘full‘)\end{align}

def bp_sensitivity_map(self, sensitivity_array, activator):
        '''
        计算传递到上一层的sensitivity map
        sensitivity_array: 本层的sensitivity map
        activator: 上一层的激活函数
        '''
        # 处理卷积步长,对原始sensitivity map进行扩展
        expanded_array = self.expand_sensitivity_map(sensitivity_array)
        # full卷积,对sensitivitiy map进行zero padding
        # 虽然原始输入的zero padding单元也会获得残差
        # 但这个残差不需要继续向上传递,因此就不计算了
        expanded_width = expanded_array.shape[2]
        zp = (self.input_width + self.filter_width - 1 - expanded_width) / 2
        padded_array = padding(expanded_array, zp)
        # 初始化delta_array,用于保存传递到上一层的
        # sensitivity map
        self.delta_array = self.create_delta_array()
        # 对于具有多个filter的卷积层来说,最终传递到上一层的
        # sensitivity map相当于所有的filter的
        # sensitivity map之和
        for f in range(self.filter_number):
            filter = self.filters[f]
            # 将filter权重翻转180度
            '''
            flipped_weights = np.array(map(lambda i: np.rot90(i, 2), filter.get_weights()))
            '''
            flipped_weights = np.rot90(filter.get_weights(), 2, (1, 2))
            # 计算与一个filter对应的delta_array
            delta_array = self.create_delta_array()
            for d in range(delta_array.shape[0]):
                conv(padded_array[f], flipped_weights[d], delta_array[d], 1, 0)
            self.delta_array += delta_array
        # 将计算结果与激活函数的偏导数做element-wise乘法操作
        derivative_array = np.array(self.input_array)
        element_wise_op(derivative_array, activator.backward)
        self.delta_array *= derivative_array

    def expand_sensitivity_map(self, sensitivity_array):
        depth = sensitivity_array.shape[0]
        # 确定扩展后sensitivity map的大小
        # 计算stride为1时sensitivity map的大小
        expanded_width = (self.input_width - self.filter_width + 2 * self.zero_padding + 1)
        expanded_height = (self.input_height - self.filter_height + 2 * self.zero_padding + 1)
        # 构建新的sensitivity_map
        expand_array = np.zeros((depth, expanded_height, expanded_width))
        # 从原始sensitivity map拷贝误差值
        for i in range(int(self.output_height)):
            for j in range(int(self.output_width)):
                i_pos = i * self.stride
                j_pos = j * self.stride
                expand_array[:, i_pos, j_pos] = sensitivity_array[:, i, j]
        return expand_array

    def create_delta_array(self):
        return np.zeros((self.channel_number, self.input_height, self.input_width))

反向传播:

\begin{align}\notag\delta_j^l = \beta_j^{l+1}(f^\prime(u_j^l) \circ up(\delta_j^{l+1}))\end{align}

def backward(self, input_array, sensitivity_array):
        self.delta_array = np.zeros(input_array.shape)
        for d in range(self.channel_number):
            for i in range(int(self.output_height)):
                for j in range(int(self.output_width)):
                    patch_array = get_patch(input_array[d], i, j, self.filter_width, self.filter_height, self.stride)
                    k, l = get_max_index(patch_array)
                    self.delta_array[d, i * self.stride + k, j * self.stride + l] = sensitivity_array[d, i, j]

相关推荐