From df80463c92058c08cb6cfd87bbc943a3256f002a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=20van=20Merri=C3=ABnboer?= Date: Mon, 24 Jul 2017 15:37:31 -0400 Subject: [PATCH 1/6] Wrap all Cupy ufuncs --- autograd/cupy/cupy_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autograd/cupy/cupy_wrapper.py b/autograd/cupy/cupy_wrapper.py index 755f23f6..e15cf70c 100644 --- a/autograd/cupy/cupy_wrapper.py +++ b/autograd/cupy/cupy_wrapper.py @@ -17,7 +17,8 @@ def wrap_namespace(old, new): unchanged_types = {float, int, type(None), type} int_types = {_cupy.int8, _cupy.int16, _cupy.int32, _cupy.int64, _cupy.integer} - function_types = {_cupy.ufunc, types.FunctionType, types.BuiltinFunctionType} + function_types = {_cupy.ufunc, _cupy.fusion.ufunc, + types.FunctionType, types.BuiltinFunctionType} for name, obj in iteritems(old): if obj in nograd_functions: new[name] = nograd_primitive(obj) From 48c7096b6f2e50960e04bb32142827aa8536c4cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=20van=20Merri=C3=ABnboer?= Date: Mon, 24 Jul 2017 15:38:12 -0400 Subject: [PATCH 2/6] Add norm and true_divide Cupy grads --- autograd/cupy/__init__.py | 2 ++ autograd/cupy/cupy_grads.py | 2 ++ autograd/cupy/linalg.py | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 autograd/cupy/linalg.py diff --git a/autograd/cupy/__init__.py b/autograd/cupy/__init__.py index cf1d3d5e..ebbf3e23 100644 --- a/autograd/cupy/__init__.py +++ b/autograd/cupy/__init__.py @@ -2,4 +2,6 @@ from . import cupy_wrapper from . import cupy_grads from . import cupy_extra +from . import random +from . import linalg from .cupy_wrapper import * diff --git a/autograd/cupy/cupy_grads.py b/autograd/cupy/cupy_grads.py index 3799bf0c..42bf710a 100644 --- a/autograd/cupy/cupy_grads.py +++ b/autograd/cupy/cupy_grads.py @@ -20,6 +20,8 @@ acp.subtract.defvjp(lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, -g), argnum=1) acp.divide.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g / y)) acp.divide.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, - g * x / y**2), argnum=1) +acp.true_divide.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g / y)) +acp.true_divide.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, - g * x / y**2), argnum=1) acp.maximum.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g * balanced_eq(x, ans, y))) acp.maximum.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g * balanced_eq(y, ans, x)), argnum=1) diff --git a/autograd/cupy/linalg.py b/autograd/cupy/linalg.py new file mode 100644 index 00000000..a8d0e124 --- /dev/null +++ b/autograd/cupy/linalg.py @@ -0,0 +1,62 @@ +from __future__ import absolute_import +import cupy.linalg as cpla +from .cupy_wrapper import wrap_namespace +from . import cupy_wrapper as acp + +wrap_namespace(cpla.__dict__, globals()) + + +def grad_norm(g, ans, vs, gvs, x, ord=None, axis=None): + def check_implemented(): + matrix_norm = (x.ndim == 2 and axis is None) or isinstance(axis, tuple) + + if matrix_norm: + if not (ord is None or ord == 'fro' or ord == 'nuc'): + raise NotImplementedError('Gradient of matrix norm not ' + 'implemented for ord={}'.format(ord)) + elif not (ord is None or ord > 1): + raise NotImplementedError('Gradient of norm not ' + 'implemented for ord={}'.format(ord)) + + if axis is None: + expand = lambda a: a + elif isinstance(axis, tuple): + row_axis, col_axis = axis + if row_axis > col_axis: + row_axis = row_axis - 1 + expand = lambda a: acp.expand_dims(acp.expand_dims(a, + row_axis), col_axis) + else: + expand = lambda a: acp.expand_dims(a, axis=axis) + + if ord == 'nuc': + if axis is None: + roll = lambda a: a + unroll = lambda a: a + else: + row_axis, col_axis = axis + if row_axis > col_axis: + row_axis = row_axis - 1 + # Roll matrix axes to the back + roll = lambda a: acp.rollaxis(acp.rollaxis(a, col_axis, a.ndim), + row_axis, a.ndim-1) + # Roll matrix axes to their original position + unroll = lambda a: acp.rollaxis(acp.rollaxis(a, a.ndim-2, row_axis), + a.ndim-1, col_axis) + + check_implemented() + if ord is None or ord == 2 or ord is 'fro': + return expand(g / ans) * x + elif ord == 'nuc': + dot = acp.dot if x.ndim == 2 else partial(acp.einsum, '...ij,...jk->...ik') + x_rolled = roll(x) + u, s, vt = svd(x_rolled, full_matrices=False) + uvt_rolled = dot(u, vt) + # Roll the matrix axes back to their correct positions + uvt = unroll(uvt_rolled) + g = expand(g) + return g * uvt + else: + # see https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm + return expand(g / ans**(ord-1)) * x * acp.abs(x)**(ord-2) +norm.defvjp(grad_norm) From 506840b45d449727d231726dcae61fbf04567c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=20van=20Merri=C3=ABnboer?= Date: Mon, 24 Jul 2017 15:38:33 -0400 Subject: [PATCH 3/6] Add vspace support for cupy --- autograd/cupy/cupy_extra.py | 3 +++ autograd/util.py | 1 + 2 files changed, 4 insertions(+) diff --git a/autograd/cupy/cupy_extra.py b/autograd/cupy/cupy_extra.py index 68b415d3..a817862a 100644 --- a/autograd/cupy/cupy_extra.py +++ b/autograd/cupy/cupy_extra.py @@ -82,6 +82,9 @@ def __init__(self, value): def zeros(self): return acp.zeros(self.shape, dtype=self.dtype) + def ones(self): + return acp.ones(self.shape, dtype=self.dtype) + def flatten(self, value, covector=False): return acp.ravel(value) diff --git a/autograd/util.py b/autograd/util.py index 613aebaa..06d6b62f 100644 --- a/autograd/util.py +++ b/autograd/util.py @@ -4,6 +4,7 @@ from builtins import range import autograd.numpy as np +import autograd.cupy as cp from autograd.convenience_wrappers import grad from autograd.core import vspace, vspace_flatten, getval From f90db45f7dd812958bdbfe27383c5aff4bc63035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=20van=20Merri=C3=ABnboer?= Date: Mon, 31 Jul 2017 14:11:43 -0400 Subject: [PATCH 4/6] Generalize vspaces to use correct numeric library --- autograd/container_types.py | 12 +++++++----- autograd/core.py | 9 ++++----- autograd/cupy/cupy_extra.py | 1 + autograd/numpy/numpy_extra.py | 1 + autograd/optimizers.py | 8 +++++--- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/autograd/container_types.py b/autograd/container_types.py index 04fb5d0d..77ad5cc0 100644 --- a/autograd/container_types.py +++ b/autograd/container_types.py @@ -66,6 +66,7 @@ def __init__(self, value): self.shape = [vspace(x) for x in value] self.size = sum(s.size for s in self.shape) self.sequence_type = type(value) + self.lib = vspace(value[0]).lib assert self.sequence_type in (tuple, list) def zeros(self): @@ -77,10 +78,10 @@ def mut_add(self, xs, ys): def flatten(self, value, covector=False): if self.shape: - return np.concatenate( - [vs.flatten(v, covector) for vs, v in zip(self.shape, value)]) + return self.lib.concatenate([vs.flatten(v, covector) + for vs, v in zip(self.shape, value)]) else: - return np.zeros((0,)) + return self.lib.zeros((0,)) def unflatten(self, value, covector=False): result = [] @@ -137,6 +138,7 @@ class DictVSpace(VSpace): def __init__(self, value): self.shape = {k : vspace(v) for k, v in iteritems(value)} self.size = sum(s.size for s in self.shape.values()) + self.lib = vspace(next(iter(value.values()))).lib def zeros(self): return {k : v.zeros() for k, v in iteritems(self.shape)} def mut_add(self, xs, ys): @@ -144,11 +146,11 @@ def mut_add(self, xs, ys): for k, v in iteritems(self.shape)} def flatten(self, value, covector=False): if self.shape: - return np.concatenate( + return self.lib.concatenate( [s.flatten(value[k], covector) for k, s in sorted(iteritems(self.shape))]) else: - return np.zeros((0,)) + return self.lib.zeros((0,)) def unflatten(self, value, covector=False): result = {} diff --git a/autograd/core.py b/autograd/core.py index 8c264e15..a5aefc08 100644 --- a/autograd/core.py +++ b/autograd/core.py @@ -2,7 +2,7 @@ import sys import types import numpy as np -import numpy.random as npr +import cupy as cp from functools import partial import warnings from .errors import defgrad_deprecated @@ -222,11 +222,10 @@ def __repr__(self): def examples(self): # Used for testing only N = self.size - unit_vect = np.zeros(N) - unit_vect[npr.randint(N)] = 1.0 + unit_vect = self.lib.zeros(N) + unit_vect[self.lib.random.randint(N)] = 0.0 unit_vect = self.unflatten(unit_vect) - rand_vect = npr.randn(N) - return [self.zeros(), self.unflatten(npr.randn(N))] + return [self.zeros(), self.unflatten(self.lib.random.randn(N))] def vspace_flatten(value, covector=False): return vspace(value).flatten(value, covector) diff --git a/autograd/cupy/cupy_extra.py b/autograd/cupy/cupy_extra.py index a817862a..91a81e7b 100644 --- a/autograd/cupy/cupy_extra.py +++ b/autograd/cupy/cupy_extra.py @@ -78,6 +78,7 @@ def __init__(self, value): self.size = value.size self.dtype = value.dtype self.scalartype = float + self.lib = cupy def zeros(self): return acp.zeros(self.shape, dtype=self.dtype) diff --git a/autograd/numpy/numpy_extra.py b/autograd/numpy/numpy_extra.py index 1fb89e31..c8d78a4a 100644 --- a/autograd/numpy/numpy_extra.py +++ b/autograd/numpy/numpy_extra.py @@ -72,6 +72,7 @@ def __init__(self, value): self.dtype = value.dtype self.ndim = value.ndim self.scalartype = float + self.lib = np def zeros(self): return np.zeros(self.shape, dtype=self.dtype) diff --git a/autograd/optimizers.py b/autograd/optimizers.py index b1ffb2d1..79506b7e 100644 --- a/autograd/optimizers.py +++ b/autograd/optimizers.py @@ -10,6 +10,7 @@ import autograd.numpy as np from autograd.util import flatten_func +from autograd.core import vspace from builtins import range @@ -44,9 +45,10 @@ def adam(grad, init_params, callback=None, num_iters=100, """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" flattened_grad, unflatten, x = flatten_func(grad, init_params) + lib = vspace(x).lib - m = np.zeros(len(x)) - v = np.zeros(len(x)) + m = lib.zeros(len(x), dtype=x.dtype) + v = lib.zeros(len(x), dtype=x.dtype) for i in range(num_iters): g = flattened_grad(x, i) if callback: callback(unflatten(x), i, unflatten(g)) @@ -54,5 +56,5 @@ def adam(grad, init_params, callback=None, num_iters=100, v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + 1)) # Bias correction. vhat = v / (1 - b2**(i + 1)) - x = x - step_size*mhat/(np.sqrt(vhat) + eps) + x = x - step_size*mhat/(lib.sqrt(vhat) + eps) return unflatten(x) From bbb5deb0135acbef63626d92ce5b8c6b918fccb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=20van=20Merri=C3=ABnboer?= Date: Mon, 31 Jul 2017 14:18:09 -0400 Subject: [PATCH 5/6] Remove unnecessary imports --- autograd/core.py | 2 -- autograd/util.py | 1 - 2 files changed, 3 deletions(-) diff --git a/autograd/core.py b/autograd/core.py index a5aefc08..87502f38 100644 --- a/autograd/core.py +++ b/autograd/core.py @@ -1,8 +1,6 @@ from __future__ import absolute_import import sys import types -import numpy as np -import cupy as cp from functools import partial import warnings from .errors import defgrad_deprecated diff --git a/autograd/util.py b/autograd/util.py index 06d6b62f..613aebaa 100644 --- a/autograd/util.py +++ b/autograd/util.py @@ -4,7 +4,6 @@ from builtins import range import autograd.numpy as np -import autograd.cupy as cp from autograd.convenience_wrappers import grad from autograd.core import vspace, vspace_flatten, getval From 8043803401d2a3c5264fbcf9858264d5e178e38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=20van=20Merri=C3=ABnboer?= Date: Mon, 31 Jul 2017 14:31:44 -0400 Subject: [PATCH 6/6] Remove test for empty sequence space --- tests/test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_flatten.py b/tests/test_flatten.py index 4a159ea4..a14d8e58 100644 --- a/tests/test_flatten.py +++ b/tests/test_flatten.py @@ -4,7 +4,7 @@ from autograd import make_vjp, grad def test_flatten(): - val = (npr.randn(4), [npr.randn(3,4), 2.5], (), (2.0, [1.0, npr.randn(2)])) + val = (npr.randn(4), [npr.randn(3,4), 2.5], (2.0, [1.0, npr.randn(2)])) vect, unflatten = flatten(val) val_recovered = unflatten(vect) vect_2, _ = flatten(val_recovered)