HIPS · bartvm · Jul 24, 2017 · Jul 24, 2017 · Jul 24, 2017 · Jul 31, 2017
diff --git a/autograd/container_types.py b/autograd/container_types.py
@@ -66,6 +66,7 @@ def __init__(self, value):
         self.shape = [vspace(x) for x in value]
         self.size = sum(s.size for s in self.shape)
         self.sequence_type = type(value)
+        self.lib = vspace(value[0]).lib
         assert self.sequence_type in (tuple, list)
 
     def zeros(self):
@@ -77,10 +78,10 @@ def mut_add(self, xs, ys):
 
     def flatten(self, value, covector=False):
         if self.shape:
-            return np.concatenate(
-                [vs.flatten(v, covector) for vs, v in zip(self.shape, value)])
+            return self.lib.concatenate([vs.flatten(v, covector)
+                                         for vs, v in zip(self.shape, value)])
         else:
-            return np.zeros((0,))
+            return self.lib.zeros((0,))
 
     def unflatten(self, value, covector=False):
         result = []
@@ -137,18 +138,19 @@ class DictVSpace(VSpace):
     def __init__(self, value):
         self.shape = {k : vspace(v) for k, v in iteritems(value)}
         self.size  = sum(s.size for s in self.shape.values())
+        self.lib = vspace(next(iter(value.values()))).lib
     def zeros(self):
         return {k : v.zeros() for k, v in iteritems(self.shape)}
     def mut_add(self, xs, ys):
         return {k : v.mut_add(xs[k], ys[k])
                 for k, v in iteritems(self.shape)}
     def flatten(self, value, covector=False):
         if self.shape:
-            return np.concatenate(
+            return self.lib.concatenate(
                 [s.flatten(value[k], covector)
                  for k, s in sorted(iteritems(self.shape))])
         else:
-            return np.zeros((0,))
+            return self.lib.zeros((0,))
 
     def unflatten(self, value, covector=False):
         result = {}

diff --git a/autograd/core.py b/autograd/core.py
@@ -1,8 +1,6 @@
 from __future__ import absolute_import
 import sys
 import types
-import numpy as np
-import numpy.random as npr
 from functools import partial
 import warnings
 from .errors import defgrad_deprecated
@@ -222,11 +220,10 @@ def __repr__(self):
     def examples(self):
         # Used for testing only
         N = self.size
-        unit_vect = np.zeros(N)
-        unit_vect[npr.randint(N)] = 1.0
+        unit_vect = self.lib.zeros(N)
+        unit_vect[self.lib.random.randint(N)] = 0.0
         unit_vect = self.unflatten(unit_vect)
-        rand_vect = npr.randn(N)
-        return [self.zeros(), self.unflatten(npr.randn(N))]
+        return [self.zeros(), self.unflatten(self.lib.random.randn(N))]
 
 def vspace_flatten(value, covector=False):
     return vspace(value).flatten(value, covector)

diff --git a/autograd/cupy/__init__.py b/autograd/cupy/__init__.py
@@ -2,4 +2,6 @@
 from . import cupy_wrapper
 from . import cupy_grads
 from . import cupy_extra
+from . import random
+from . import linalg
 from .cupy_wrapper import *
diff --git a/autograd/cupy/cupy_extra.py b/autograd/cupy/cupy_extra.py
@@ -78,10 +78,14 @@ def __init__(self, value):
         self.size  = value.size
         self.dtype = value.dtype
         self.scalartype = float
+        self.lib = cupy
 
     def zeros(self):
         return acp.zeros(self.shape, dtype=self.dtype)
 
+    def ones(self):
+        return acp.ones(self.shape, dtype=self.dtype)
+
     def flatten(self, value, covector=False):
         return acp.ravel(value)
 

diff --git a/autograd/cupy/cupy_grads.py b/autograd/cupy/cupy_grads.py
@@ -20,6 +20,8 @@
 acp.subtract.defvjp(lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, -g), argnum=1)
 acp.divide.defvjp(  lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g / y))
 acp.divide.defvjp(  lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, - g * x / y**2), argnum=1)
+acp.true_divide.defvjp(  lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g / y))
+acp.true_divide.defvjp(  lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, - g * x / y**2), argnum=1)
 acp.maximum.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g * balanced_eq(x, ans, y)))
 acp.maximum.defvjp( lambda g, ans, vs, gvs, x, y: unbroadcast(vs, gvs, g * balanced_eq(y, ans, x)),
                    argnum=1)

diff --git a/autograd/cupy/cupy_wrapper.py b/autograd/cupy/cupy_wrapper.py
@@ -17,7 +17,8 @@
 def wrap_namespace(old, new):
     unchanged_types = {float, int, type(None), type}
     int_types = {_cupy.int8, _cupy.int16, _cupy.int32, _cupy.int64, _cupy.integer}
-    function_types = {_cupy.ufunc, types.FunctionType, types.BuiltinFunctionType}
+    function_types = {_cupy.ufunc, _cupy.fusion.ufunc,
+                      types.FunctionType, types.BuiltinFunctionType}
     for name, obj in iteritems(old):
         if obj in nograd_functions:
             new[name] = nograd_primitive(obj)

diff --git a/autograd/cupy/linalg.py b/autograd/cupy/linalg.py
@@ -0,0 +1,62 @@
+from __future__ import absolute_import
+import cupy.linalg as cpla
+from .cupy_wrapper import wrap_namespace
+from . import cupy_wrapper as acp
+
+wrap_namespace(cpla.__dict__, globals())
+
+
+def grad_norm(g, ans, vs, gvs, x, ord=None, axis=None):
+    def check_implemented():
+        matrix_norm = (x.ndim == 2 and axis is None) or isinstance(axis, tuple)
+
+        if matrix_norm:
+            if not (ord is None or ord == 'fro' or ord == 'nuc'):
+                raise NotImplementedError('Gradient of matrix norm not '
+                                          'implemented for ord={}'.format(ord))
+        elif not (ord is None or ord > 1):
+            raise NotImplementedError('Gradient of norm not '
+                                      'implemented for ord={}'.format(ord))
+
+    if axis is None:
+        expand = lambda a: a
+    elif isinstance(axis, tuple):
+        row_axis, col_axis = axis
+        if row_axis > col_axis:
+            row_axis = row_axis - 1
+        expand = lambda a: acp.expand_dims(acp.expand_dims(a,
+                                                   row_axis), col_axis)
+    else:
+        expand = lambda a: acp.expand_dims(a, axis=axis)
+
+    if ord == 'nuc':
+        if axis is None:
+            roll = lambda a: a
+            unroll = lambda a: a
+        else:
+            row_axis, col_axis = axis
+            if row_axis > col_axis:
+                row_axis = row_axis - 1
+            # Roll matrix axes to the back
+            roll = lambda a: acp.rollaxis(acp.rollaxis(a, col_axis, a.ndim),
+                                          row_axis, a.ndim-1)
+            # Roll matrix axes to their original position
+            unroll = lambda a: acp.rollaxis(acp.rollaxis(a, a.ndim-2, row_axis),
+                                            a.ndim-1, col_axis)
+
+    check_implemented()
+    if ord is None or ord == 2 or ord is 'fro':
+        return expand(g / ans) * x
+    elif ord == 'nuc':
+        dot = acp.dot if x.ndim == 2 else partial(acp.einsum, '...ij,...jk->...ik')
+        x_rolled = roll(x)
+        u, s, vt = svd(x_rolled, full_matrices=False)
+        uvt_rolled = dot(u, vt)
+        # Roll the matrix axes back to their correct positions
+        uvt = unroll(uvt_rolled)
+        g = expand(g)
+        return g * uvt
+    else:
+        # see https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
+        return expand(g / ans**(ord-1)) * x * acp.abs(x)**(ord-2)
+norm.defvjp(grad_norm)
diff --git a/autograd/numpy/numpy_extra.py b/autograd/numpy/numpy_extra.py
@@ -72,6 +72,7 @@ def __init__(self, value):
         self.dtype = value.dtype
         self.ndim  = value.ndim
         self.scalartype = float
+        self.lib = np
 
     def zeros(self):
         return np.zeros(self.shape, dtype=self.dtype)

diff --git a/autograd/optimizers.py b/autograd/optimizers.py
@@ -10,6 +10,7 @@
 
 import autograd.numpy as np
 from autograd.util import flatten_func
+from autograd.core import vspace
 from builtins import range
 
 
@@ -44,15 +45,16 @@ def adam(grad, init_params, callback=None, num_iters=100,
     """Adam as described in http://arxiv.org/pdf/1412.6980.pdf.
     It's basically RMSprop with momentum and some correction terms."""
     flattened_grad, unflatten, x = flatten_func(grad, init_params)
+    lib = vspace(x).lib
 
-    m = np.zeros(len(x))
-    v = np.zeros(len(x))
+    m = lib.zeros(len(x), dtype=x.dtype)
+    v = lib.zeros(len(x), dtype=x.dtype)
     for i in range(num_iters):
         g = flattened_grad(x, i)
         if callback: callback(unflatten(x), i, unflatten(g))
         m = (1 - b1) * g      + b1 * m  # First  moment estimate.
         v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
         mhat = m / (1 - b1**(i + 1))    # Bias correction.
         vhat = v / (1 - b2**(i + 1))
-        x = x - step_size*mhat/(np.sqrt(vhat) + eps)
+        x = x - step_size*mhat/(lib.sqrt(vhat) + eps)
     return unflatten(x)
diff --git a/tests/test_flatten.py b/tests/test_flatten.py
@@ -4,7 +4,7 @@
 from autograd import make_vjp, grad
 
 def test_flatten():
-    val = (npr.randn(4), [npr.randn(3,4), 2.5], (), (2.0, [1.0, npr.randn(2)]))
+    val = (npr.randn(4), [npr.randn(3,4), 2.5], (2.0, [1.0, npr.randn(2)]))
     vect, unflatten = flatten(val)
     val_recovered = unflatten(vect)
     vect_2, _ = flatten(val_recovered)