sp.py 17.1 KB
Newer Older
Larkin Heintzman's avatar
Larkin Heintzman committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
"""
Convolution-like operations with sparse matrix multiplication.

To read about different sparse formats, see
U{http://www-users.cs.umn.edu/~saad/software/SPARSKIT/paper.ps}.

@todo: Automatic methods for determining best sparse format?
"""
# COPIED FROM hpu/icml09/sp.py
from __future__ import absolute_import, print_function, division
import numpy as np
import scipy
from scipy import sparse as scipy_sparse
from six.moves import xrange

import theano
import theano.sparse
from theano import sparse, gof, Op, tensor
from theano.sparse.basic import Remove0, remove0

# To maintain compatibility
from theano.sparse import (
    SpSum, sp_sum,
    ColScaleCSC, RowScaleCSC, col_scale, row_scale,
    Diag, diag, SquareDiagonal, square_diagonal,
    EnsureSortedIndices, ensure_sorted_indices, clean)


def register_specialize(lopt, *tags, **kwargs):
    theano.compile.optdb['specialize'].register(
        (kwargs and kwargs.pop('name')) or lopt.__name__, lopt, 'fast_run',
        *tags)


class ConvolutionIndices(Op):
    """Build indices for a sparse CSC matrix that could implement A
    (convolve) B.

       This generates a sparse matrix M, which generates a stack of
       image patches when computing the dot product of M with image
       patch. Convolution is then simply the dot product of (img x M)
       and the kernels.
    """
    __props__ = ()

    @staticmethod
    def conv_eval(inshp, kshp, strides=(1, 1), mode='valid'):
        (dx, dy) = strides
        return convolution_indices.evaluate(inshp, kshp, (dx, dy),
                                            mode=mode, ws=True)

    # img_shape and ker_shape are (height,width)
    @staticmethod
    def evaluate(inshp, kshp, strides=(1, 1), nkern=1, mode='valid', ws=True):
        """Build a sparse matrix which can be used for performing...
        * convolution: in this case, the dot product of this matrix
        with the input images will generate a stack of images
        patches. Convolution is then a tensordot operation of the
        filters and the patch stack.
        * sparse local connections: in this case, the sparse matrix
        allows us to operate the weight matrix as if it were
        fully-connected. The structured-dot with the input image gives
        the output for the following layer.

        :param ker_shape: shape of kernel to apply (smaller than image)
        :param img_shape: shape of input images
        :param mode: 'valid' generates output only when kernel and
                     image overlap overlap fully. Convolution obtained
                     by zero-padding the input
        :param ws: must be always True
        :param (dx,dy): offset parameter. In the case of no weight sharing,
                        gives the pixel offset between two receptive fields.
                        With weight sharing gives the offset between the
                        top-left pixels of the generated patches

        :rtype: tuple(indices, indptr, logical_shape, sp_type, out_img_shp)
        :returns: the structure of a sparse matrix, and the logical dimensions
                  of the image which will be the result of filtering.
        """
        if not ws:
            raise Exception("ws is obsolete and it must be always True")

        (dx, dy) = strides

        # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
        # in the first case, default nfeatures to 1
        if np.size(inshp) == 2:
            inshp = (1,) + inshp

        inshp = np.array(inshp)
        kshp = np.array(kshp)
        ksize = np.prod(kshp)

        kern = ksize - 1 - np.arange(ksize)

        # size of output image if doing proper convolution
        # (mode='full',dx=dy=0) outshp is the actual output shape
        # given the parameters
        fulloutshp = inshp[1:] + kshp - 1
        if mode == 'valid':
            s = -1
        else:
            s = 1
        outshp = np.int64(np.ceil((inshp[1:] + s * kshp - s * 1) \
                 / np.array([dy, dx], dtype='float')))
        if any(outshp <= 0):
            err = 'Invalid kernel', kshp, 'and/or step size', (dx, dy),\
                  'for given input shape', inshp
            raise ValueError(err)

        outsize = np.prod(outshp)
        insize = np.prod(inshp)

        # range of output units over which to iterate
        if mode == 'valid':
            lbound = np.array([kshp[0] - 1, kshp[1] - 1])
            ubound = lbound + (inshp[1:] - kshp + 1)
        else:
            lbound = np.zeros(2)
            ubound = fulloutshp

        # coordinates of image in "fulloutshp" coordinates
        topleft = np.array([kshp[0] - 1, kshp[1] - 1])
        # bound when counting the receptive field
        botright = topleft + inshp[1:]

        # sparse matrix specifics...
        if ws:
            spmatshp = (outsize * np.prod(kshp) * inshp[0], insize)
        else:
            spmatshp = (nkern * outsize, insize)
        spmat = scipy_sparse.lil_matrix(spmatshp)

        # loop over output image pixels
        z, zz = 0, 0

        # incremented every time we write something to the sparse
        # matrix this is used to track the ordering of filter tap
        # coefficient in sparse column ordering
        tapi, ntaps = 0, 0

        # Note: looping over the number of kernels could've been done
        # more efficiently as the last step (when writing to
        # spmat). However, this messes up the ordering of the column
        # values (order in which you write the values determines how
        # the vectorized data will get used later one)

        for fmapi in xrange(inshp[0]):  # loop over input features
            # loop over number of kernels (nkern=1 for weight sharing)
            for n in xrange(nkern):

                # FOR EACH OUTPUT PIXEL...
                # loop over output image height
                for oy in np.arange(lbound[0], ubound[0], dy):
                     # loop over output image width
                    for ox in np.arange(lbound[1], ubound[1], dx):

                       # kern[l] is filter value to apply at (oj,oi)
                       # for (iy,ix)
                        l = 0

                        # ... ITERATE OVER INPUT UNITS IN RECEPTIVE FIELD
                        for ky in oy + np.arange(kshp[0]):
                            for kx in ox + np.arange(kshp[1]):

                                # verify if we are still within image
                                # boundaries. Equivalent to
                                # zero-padding of the input image
                                if (all((ky, kx) >= topleft) and
                                    all((ky, kx) < botright)):

                                    # convert to "valid" input space
                                    # coords used to determine column
                                    # index to write to in sparse mat
                                    iy, ix = np.array((ky, kx)) - topleft
                                    # determine raster-index of input pixel...

                                    # taking into account multiple
                                    # input features
                                    col = iy * inshp[2] + ix + \
                                          fmapi * np.prod(inshp[1:])

                                    # convert oy,ox values to output
                                    # space coordinates
                                    if mode == 'full':
                                        (y, x) = (oy, ox)
                                    else:
                                        (y, x) = (oy, ox) - topleft
                                    # taking into account step size
                                    (y, x) = np.array([y, x]) / (dy, dx)

                                    # convert to row index of sparse matrix
                                    if ws:
                                        row = ((y * outshp[1] + x) *
                                               inshp[0] * ksize + l + fmapi *
                                               ksize)
                                    else:
                                        row = y * outshp[1] + x

                                    # Store something at that location
                                    # in sparse matrix.  The written
                                    # value is only useful for the
                                    # sparse case. It will determine
                                    # the way kernel taps are mapped
                                    # onto the sparse columns (idea of
                                    # kernel map)
                                    # n*... only for sparse
                                    spmat[row + n * outsize, col] = tapi + 1

                                    # total number of active taps
                                    # (used for kmap)
                                    ntaps += 1

                                # absolute tap index (total number of taps)
                                tapi += 1
                                # move on to next filter tap l=(l+1)%ksize
                                l += 1

        if spmat.format != 'csc':
            spmat = spmat.tocsc().sorted_indices()
        else:
            # BUG ALERT: scipy0.6 has bug where data and indices are written in
            # reverse column ordering.
            # Explicit call to sorted_indices removes this problem.
            spmat = spmat.sorted_indices()

        if ws:
            kmap = None
        else:
            kmap = np.zeros(ntaps, dtype='int')
            k = 0
            # print 'TEMPORARY BUGFIX: REMOVE !!!'
            for j in xrange(spmat.shape[1]):
                for i_idx in xrange(spmat.indptr[j], spmat.indptr[j + 1]):
                    if spmat.data[i_idx] != 0:
                        # this is == spmat[i,j] - 1
                        kmap[k] = spmat.data[i_idx] - 1
                        k += 1

        # when in valid mode, it is more efficient to store in sparse row
        # TODO: need to implement structured dot for csr matrix
        assert spmat.format == 'csc'
        sptype = 'csc'
        #sptype = 'csr' if mode=='valid' else 'csc'
        if 0 and mode == 'valid':
            spmat = spmat.tocsr()

        rval = (spmat.indices[:spmat.size],
                spmat.indptr, spmatshp, sptype, outshp)
        if kmap is not None:
            rval += (kmap,)

        return rval

    def perform(self, node, inputs, outputs):
        (inshp, kshp) = inputs
        (out_indices, out_indptr, spmat_shape) = outputs
        indices, indptr, spmatshp, outshp = self.evaluate(inshp, kshp)
        out_indices[0] = indices
        out_indptr[0] = indptr
        spmat_shape[0] = np.asarray(spmatshp)

convolution_indices = ConvolutionIndices()


def convolve(kerns, kshp, nkern, images, imgshp, step=(1, 1), bias=None,
             mode='valid', flatten=True):
    """Convolution implementation by sparse matrix multiplication.

    :note: For best speed, put the matrix which you expect to be
           smaller as the 'kernel' argument

    "images" is assumed to be a matrix of shape batch_size x img_size,
    where the second dimension represents each image in raster order

    If flatten is "False", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels x output_size

    If flatten is "True", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels * output_size

    .. note::

        IMPORTANT: note that this means that each feature map (image
        generate by each kernel) is contiguous in memory. The memory
        layout will therefore be: [ <feature_map_0> <feature_map_1>
        ... <feature_map_n>], where <feature_map> represents a
        "feature map" in raster order

    kerns is a 2D tensor of shape nkern x N.prod(kshp)

    :param kerns: 2D tensor containing kernels which are applied at every pixel
    :param kshp: tuple containing actual dimensions of kernel (not symbolic)
    :param nkern: number of kernels/filters to apply.
                  nkern=1 will apply one common filter to all input pixels
    :param images: tensor containing images on which to apply convolution
    :param imgshp: tuple containing image dimensions
    :param step: determines number of pixels between adjacent receptive fields
                 (tuple containing dx,dy values)
    :param mode: 'full', 'valid' see CSM.evaluate function for details
    :param sumdims: dimensions over which to sum for the tensordot operation.
                    By default ((2,),(1,)) assumes kerns is a nkern x kernsize
                    matrix and images is a batchsize x imgsize matrix
                    containing flattened images in raster order
    :param flatten: flatten the last 2 dimensions of the output. By default,
                    instead of generating a batchsize x outsize x nkern tensor,
                    will flatten to batchsize x outsize*nkern

    :return: out1, symbolic result
    :return: out2, logical shape of the output img (nkern,heigt,width)

    :TODO: test for 1D and think of how to do n-d convolutions
    """
    # start by computing output dimensions, size, etc
    kern_size = np.int64(np.prod(kshp))

    # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1,) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, kshp, step, mode)

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(np.ones(indices.size), indices,
                                    indptr, spmat_shape)
    patches = (sparse.structured_dot(csc, images.T)).T

    # compute output of linear classifier
    pshape = tensor.stack([images.shape[0] * tensor.as_tensor(np.prod(outshp)),\
                           tensor.as_tensor(imgshp[0] * kern_size)])
    patch_stack = tensor.reshape(patches, pshape, ndim=2)

    # kern is of shape: nkern x ksize*number_of_input_features
    # output is thus of shape: bsize*outshp x nkern
    output = tensor.dot(patch_stack, kerns.T)

    # add bias across each feature map (more efficient to do it now)
    if bias is not None:
        output += bias

    # now to have feature maps in raster order ...
    # go from bsize*outshp x nkern to bsize x nkern*outshp
    newshp = tensor.stack([images.shape[0],\
                           tensor.as_tensor(np.prod(outshp)),\
                           tensor.as_tensor(nkern)])
    tensout = tensor.reshape(output, newshp, ndim=3)
    output = tensor.DimShuffle((False,) * tensout.ndim, (0, 2, 1))(tensout)
    if flatten:
        output = tensor.flatten(output, 2)

    return output, np.hstack((nkern, outshp))


def max_pool(images, imgshp, maxpoolshp):
    """Implements a max pooling layer

    Takes as input a 2D tensor of shape batch_size x img_size and
    performs max pooling.  Max pooling downsamples by taking the max
    value in a given area, here defined by maxpoolshp. Outputs a 2D
    tensor of shape batch_size x output_size.

    :param images: 2D tensor containing images on which to apply convolution.
                   Assumed to be of shape batch_size x img_size
    :param imgshp: tuple containing image dimensions
    :param maxpoolshp: tuple containing shape of area to max pool over

    :return: out1, symbolic result (2D tensor)
    :return: out2, logical shape of the output
    """
    poolsize = np.int64(np.prod(maxpoolshp))

    # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1,) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, maxpoolshp,
                                          maxpoolshp, mode='valid')

#    print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
#    print 'imgshp = ', imgshp
#    print 'maxpoolshp = ', maxpoolshp
#    print 'outshp = ', outshp

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(np.ones(indices.size), indices,
                                    indptr, spmat_shape)
    patches = sparse.structured_dot(csc, images.T).T

    pshape = tensor.stack([images.shape[0] *\
                               tensor.as_tensor(np.prod(outshp)),
                           tensor.as_tensor(imgshp[0]),
                           tensor.as_tensor(poolsize)])
    patch_stack = tensor.reshape(patches, pshape, ndim=3)

    out1 = tensor.max(patch_stack, axis=2)

    pshape = tensor.stack([images.shape[0],
                           tensor.as_tensor(np.prod(outshp)),
                           tensor.as_tensor(imgshp[0])])
    out2 = tensor.reshape(out1, pshape, ndim=3)

    out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2)

    return tensor.flatten(out3, 2), outshp