|
20 | 20 | "import math\n", |
21 | 21 | "import torch\n", |
22 | 22 | "import gpytorch\n", |
| 23 | + "import tqdm.notebook as tqdm\n", |
23 | 24 | "from matplotlib import pyplot as plt\n", |
24 | 25 | "\n", |
25 | 26 | "# Make plots inline\n", |
|
121 | 122 | " super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)\n", |
122 | 123 | " self.mean_module = ConstantMean()\n", |
123 | 124 | " self.base_covar_module = ScaleKernel(RBFKernel())\n", |
124 | | - " self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=train_x[:500, :], likelihood=likelihood)\n", |
| 125 | + " self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=train_x[:500, :].clone(), likelihood=likelihood)\n", |
125 | 126 | "\n", |
126 | 127 | " def forward(self, x):\n", |
127 | 128 | " mean_x = self.mean_module(x)\n", |
|
154 | 155 | "cell_type": "code", |
155 | 156 | "execution_count": 6, |
156 | 157 | "metadata": { |
157 | | - "scrolled": false |
| 158 | + "scrolled": true |
158 | 159 | }, |
159 | 160 | "outputs": [ |
| 161 | + { |
| 162 | + "data": { |
| 163 | + "application/vnd.jupyter.widget-view+json": { |
| 164 | + "model_id": "90181179167b4cf3949a43c21dd8f10c", |
| 165 | + "version_major": 2, |
| 166 | + "version_minor": 0 |
| 167 | + }, |
| 168 | + "text/plain": [ |
| 169 | + "Train: 0%| | 0/100 [00:00<?, ?it/s]" |
| 170 | + ] |
| 171 | + }, |
| 172 | + "metadata": {}, |
| 173 | + "output_type": "display_data" |
| 174 | + }, |
160 | 175 | { |
161 | 176 | "name": "stdout", |
162 | 177 | "output_type": "stream", |
163 | 178 | "text": [ |
164 | | - "Iter 1/50 - Loss: 0.794\n", |
165 | | - "Iter 2/50 - Loss: 0.782\n", |
166 | | - "Iter 3/50 - Loss: 0.770\n", |
167 | | - "Iter 4/50 - Loss: 0.758\n", |
168 | | - "Iter 5/50 - Loss: 0.746\n", |
169 | | - "Iter 6/50 - Loss: 0.734\n", |
170 | | - "Iter 7/50 - Loss: 0.721\n", |
171 | | - "Iter 8/50 - Loss: 0.708\n", |
172 | | - "Iter 9/50 - Loss: 0.695\n", |
173 | | - "Iter 10/50 - Loss: 0.681\n", |
174 | | - "Iter 11/50 - Loss: 0.667\n", |
175 | | - "Iter 12/50 - Loss: 0.654\n", |
176 | | - "Iter 13/50 - Loss: 0.641\n", |
177 | | - "Iter 14/50 - Loss: 0.626\n", |
178 | | - "Iter 15/50 - Loss: 0.613\n", |
179 | | - "Iter 16/50 - Loss: 0.598\n", |
180 | | - "Iter 17/50 - Loss: 0.584\n", |
181 | | - "Iter 18/50 - Loss: 0.571\n", |
182 | | - "Iter 19/50 - Loss: 0.555\n", |
183 | | - "Iter 20/50 - Loss: 0.541\n", |
184 | | - "Iter 21/50 - Loss: 0.526\n", |
185 | | - "Iter 22/50 - Loss: 0.510\n", |
186 | | - "Iter 23/50 - Loss: 0.495\n", |
187 | | - "Iter 24/50 - Loss: 0.481\n", |
188 | | - "Iter 25/50 - Loss: 0.465\n", |
189 | | - "Iter 26/50 - Loss: 0.449\n", |
190 | | - "Iter 27/50 - Loss: 0.435\n", |
191 | | - "Iter 28/50 - Loss: 0.417\n", |
192 | | - "Iter 29/50 - Loss: 0.401\n", |
193 | | - "Iter 30/50 - Loss: 0.384\n", |
194 | | - "Iter 31/50 - Loss: 0.369\n", |
195 | | - "Iter 32/50 - Loss: 0.351\n", |
196 | | - "Iter 33/50 - Loss: 0.336\n", |
197 | | - "Iter 34/50 - Loss: 0.319\n", |
198 | | - "Iter 35/50 - Loss: 0.303\n", |
199 | | - "Iter 36/50 - Loss: 0.286\n", |
200 | | - "Iter 37/50 - Loss: 0.269\n", |
201 | | - "Iter 38/50 - Loss: 0.253\n", |
202 | | - "Iter 39/50 - Loss: 0.236\n", |
203 | | - "Iter 40/50 - Loss: 0.217\n", |
204 | | - "Iter 41/50 - Loss: 0.200\n", |
205 | | - "Iter 42/50 - Loss: 0.181\n", |
206 | | - "Iter 43/50 - Loss: 0.167\n", |
207 | | - "Iter 44/50 - Loss: 0.149\n", |
208 | | - "Iter 45/50 - Loss: 0.132\n", |
209 | | - "Iter 46/50 - Loss: 0.112\n", |
210 | | - "Iter 47/50 - Loss: 0.096\n", |
211 | | - "Iter 48/50 - Loss: 0.078\n", |
212 | | - "Iter 49/50 - Loss: 0.061\n", |
213 | | - "Iter 50/50 - Loss: 0.044\n", |
214 | | - "CPU times: user 2min 47s, sys: 7.87 s, total: 2min 55s\n", |
215 | | - "Wall time: 34.6 s\n" |
| 179 | + "CPU times: user 2.7 s, sys: 852 ms, total: 3.55 s\n", |
| 180 | + "Wall time: 3.58 s\n" |
216 | 181 | ] |
217 | 182 | } |
218 | 183 | ], |
219 | 184 | "source": [ |
220 | | - "training_iterations = 2 if smoke_test else 50\n", |
| 185 | + "training_iterations = 2 if smoke_test else 100\n", |
221 | 186 | "\n", |
222 | 187 | "# Find optimal model hyperparameters\n", |
223 | 188 | "model.train()\n", |
|
230 | 195 | "mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)\n", |
231 | 196 | "\n", |
232 | 197 | "def train():\n", |
233 | | - " for i in range(training_iterations):\n", |
| 198 | + " iterator = tqdm.tqdm(range(training_iterations), desc=\"Train\")\n", |
| 199 | + "\n", |
| 200 | + " for i in iterator:\n", |
234 | 201 | " # Zero backprop gradients\n", |
235 | 202 | " optimizer.zero_grad()\n", |
236 | 203 | " # Get output from model\n", |
237 | 204 | " output = model(train_x)\n", |
238 | 205 | " # Calc loss and backprop derivatives\n", |
239 | 206 | " loss = -mll(output, train_y)\n", |
240 | 207 | " loss.backward()\n", |
241 | | - " print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))\n", |
| 208 | + " iterator.set_postfix(loss=loss.item())\n", |
242 | 209 | " optimizer.step()\n", |
243 | 210 | " torch.cuda.empty_cache()\n", |
244 | 211 | " \n", |
245 | | - "# See dkl_mnist.ipynb for explanation of this flag\n", |
246 | 212 | "%time train()" |
247 | 213 | ] |
248 | 214 | }, |
249 | 215 | { |
250 | 216 | "cell_type": "markdown", |
251 | 217 | "metadata": {}, |
252 | 218 | "source": [ |
253 | | - "### Making Predictions\n", |
254 | | - "\n", |
255 | | - "The next cell makes predictions with SKIP. We use the same max_root_decomposition size, and we also demonstrate increasing the max preconditioner size. Increasing the preconditioner size on this dataset is **not** necessary, but can make a big difference in final test performance, and is often preferable to increasing the number of CG iterations if you can afford the space." |
| 219 | + "### Making Predictions" |
256 | 220 | ] |
257 | 221 | }, |
258 | 222 | { |
259 | 223 | "cell_type": "code", |
260 | 224 | "execution_count": 7, |
261 | 225 | "metadata": {}, |
262 | | - "outputs": [], |
263 | | - "source": [ |
264 | | - "model.eval()\n", |
265 | | - "likelihood.eval()\n", |
266 | | - "with gpytorch.settings.max_preconditioner_size(10), torch.no_grad():\n", |
267 | | - " preds = model(test_x)" |
268 | | - ] |
269 | | - }, |
270 | | - { |
271 | | - "cell_type": "code", |
272 | | - "execution_count": 8, |
273 | | - "metadata": {}, |
274 | 226 | "outputs": [ |
275 | 227 | { |
276 | 228 | "name": "stdout", |
277 | 229 | "output_type": "stream", |
278 | 230 | "text": [ |
279 | | - "Test MAE: 0.07271435856819153\n" |
| 231 | + "Test MAE: 0.07258129864931107\n", |
| 232 | + "Test NLL: 0.3463870584964752\n" |
280 | 233 | ] |
281 | 234 | } |
282 | 235 | ], |
283 | 236 | "source": [ |
284 | | - "print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))" |
| 237 | + "model.eval()\n", |
| 238 | + "likelihood.eval()\n", |
| 239 | + "with torch.no_grad():\n", |
| 240 | + " preds = model.likelihood(model(test_x))\n", |
| 241 | + " print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))\n", |
| 242 | + " print('Test NLL: {}'.format(-preds.to_data_independent_dist().log_prob(test_y).mean().item()))" |
285 | 243 | ] |
286 | 244 | }, |
287 | 245 | { |
|
295 | 253 | "metadata": { |
296 | 254 | "anaconda-cloud": {}, |
297 | 255 | "kernelspec": { |
298 | | - "display_name": "Python 3", |
| 256 | + "display_name": "Python 3 (ipykernel)", |
299 | 257 | "language": "python", |
300 | 258 | "name": "python3" |
301 | 259 | }, |
|
309 | 267 | "name": "python", |
310 | 268 | "nbconvert_exporter": "python", |
311 | 269 | "pygments_lexer": "ipython3", |
312 | | - "version": "3.7.3" |
| 270 | + "version": "3.8.0" |
313 | 271 | } |
314 | 272 | }, |
315 | 273 | "nbformat": 4, |
|
0 commit comments