import numpy as np N = 20 D = 5 K = 3 # Forward computation X = np.random.randn(N, D) W1 = np.random.randn(K, D) W2 = np.random.randn(D, K) H = np.dot(X, W1.T) F = np.dot(H, W2.T) # Create function for error and expensively approximate gradients by finite # differences: E_fn = lambda W1, W2: np.sum((np.dot(np.dot(X, W1.T), W2.T) - X)**2) def tut5_checkgrad(fn, hh, *args): """Return all approx partial derivatives of fn wrt args""" bars = [] for arg in args: bar = np.zeros_like(arg) arg_view = arg.ravel() bar_view = bar.ravel() for ii in range(arg_view.size): cc = arg_view[ii] arg_view[ii] = cc + hh/2.0 f2 = fn(*args) arg_view[ii] = cc - hh/2.0 f1 = fn(*args) arg_view[ii] = cc bar_view[ii] = (f2 - f1) / hh bars.append(bar) return bars W1_bar_fd, W2_bar_fd = tut5_checkgrad(E_fn, 1e-5, W1, W2) # Backpropagation of error F_bar = 2*(F-X) H_bar = np.dot(F_bar, W2) W2_bar = np.dot(F_bar.T, H) W1_bar = np.dot(H_bar.T, X) # Expect both of these errors to be fairly small: err1 = np.max(np.abs(W1_bar - W1_bar_fd)) err2 = np.max(np.abs(W2_bar - W2_bar_fd)) print(err1) print(err2)