diff --git a/pytorch/.vscode/settings.json b/pytorch/.vscode/settings.json
new file mode 100644
index 0000000000000000000000000000000000000000..55be5ed8073002e60127c53ad084e3ffa68f9c16
--- /dev/null
+++ b/pytorch/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "/home/marmot/miniconda3/envs/dnn/bin/python"
+}
\ No newline at end of file
diff --git a/pytorch/generate-names/.gitignore b/pytorch/generate-names/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..6320cd248dd8aeaab759d5871f8781b5c0505172
--- /dev/null
+++ b/pytorch/generate-names/.gitignore
@@ -0,0 +1 @@
+data
\ No newline at end of file
diff --git a/pytorch/generate-names/.vscode/.ropeproject/config.py b/pytorch/generate-names/.vscode/.ropeproject/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..dee2d1ae9a6be9cf0248130b6c6b9e2668052079
--- /dev/null
+++ b/pytorch/generate-names/.vscode/.ropeproject/config.py
@@ -0,0 +1,114 @@
+# The default ``config.py``
+# flake8: noqa
+
+
+def set_prefs(prefs):
+    """This function is called before opening the project"""
+
+    # Specify which files and folders to ignore in the project.
+    # Changes to ignored resources are not added to the history and
+    # VCSs.  Also they are not returned in `Project.get_files()`.
+    # Note that ``?`` and ``*`` match all characters but slashes.
+    # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
+    # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
+    # '.svn': matches 'pkg/.svn' and all of its children
+    # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
+    # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
+    prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
+                                  '.hg', '.svn', '_svn', '.git', '.tox']
+
+    # Specifies which files should be considered python files.  It is
+    # useful when you have scripts inside your project.  Only files
+    # ending with ``.py`` are considered to be python files by
+    # default.
+    # prefs['python_files'] = ['*.py']
+
+    # Custom source folders:  By default rope searches the project
+    # for finding source folders (folders that should be searched
+    # for finding modules).  You can add paths to that list.  Note
+    # that rope guesses project source folders correctly most of the
+    # time; use this if you have any problems.
+    # The folders should be relative to project root and use '/' for
+    # separating folders regardless of the platform rope is running on.
+    # 'src/my_source_folder' for instance.
+    # prefs.add('source_folders', 'src')
+
+    # You can extend python path for looking up modules
+    # prefs.add('python_path', '~/python/')
+
+    # Should rope save object information or not.
+    prefs['save_objectdb'] = True
+    prefs['compress_objectdb'] = False
+
+    # If `True`, rope analyzes each module when it is being saved.
+    prefs['automatic_soa'] = True
+    # The depth of calls to follow in static object analysis
+    prefs['soa_followed_calls'] = 0
+
+    # If `False` when running modules or unit tests "dynamic object
+    # analysis" is turned off.  This makes them much faster.
+    prefs['perform_doa'] = True
+
+    # Rope can check the validity of its object DB when running.
+    prefs['validate_objectdb'] = True
+
+    # How many undos to hold?
+    prefs['max_history_items'] = 32
+
+    # Shows whether to save history across sessions.
+    prefs['save_history'] = True
+    prefs['compress_history'] = False
+
+    # Set the number spaces used for indenting.  According to
+    # :PEP:`8`, it is best to use 4 spaces.  Since most of rope's
+    # unit-tests use 4 spaces it is more reliable, too.
+    prefs['indent_size'] = 4
+
+    # Builtin and c-extension modules that are allowed to be imported
+    # and inspected by rope.
+    prefs['extension_modules'] = []
+
+    # Add all standard c-extensions to extension_modules list.
+    prefs['import_dynload_stdmods'] = True
+
+    # If `True` modules with syntax errors are considered to be empty.
+    # The default value is `False`; When `False` syntax errors raise
+    # `rope.base.exceptions.ModuleSyntaxError` exception.
+    prefs['ignore_syntax_errors'] = False
+
+    # If `True`, rope ignores unresolvable imports.  Otherwise, they
+    # appear in the importing namespace.
+    prefs['ignore_bad_imports'] = False
+
+    # If `True`, rope will insert new module imports as
+    # `from <package> import <module>` by default.
+    prefs['prefer_module_from_imports'] = False
+
+    # If `True`, rope will transform a comma list of imports into
+    # multiple separate import statements when organizing
+    # imports.
+    prefs['split_imports'] = False
+
+    # If `True`, rope will remove all top-level import statements and
+    # reinsert them at the top of the module when making changes.
+    prefs['pull_imports_to_top'] = True
+
+    # If `True`, rope will sort imports alphabetically by module name instead
+    # of alphabetically by import statement, with from imports after normal
+    # imports.
+    prefs['sort_imports_alphabetically'] = False
+
+    # Location of implementation of
+    # rope.base.oi.type_hinting.interfaces.ITypeHintingFactory In general
+    # case, you don't have to change this value, unless you're an rope expert.
+    # Change this value to inject you own implementations of interfaces
+    # listed in module rope.base.oi.type_hinting.providers.interfaces
+    # For example, you can add you own providers for Django Models, or disable
+    # the search type-hinting in a class hierarchy, etc.
+    prefs['type_hinting_factory'] = (
+        'rope.base.oi.type_hinting.factory.default_type_hinting_factory')
+
+
+def project_opened(project):
+    """This function is called after opening the project"""
+    # Do whatever you like here!
diff --git a/pytorch/generate-names/.vscode/.ropeproject/objectdb b/pytorch/generate-names/.vscode/.ropeproject/objectdb
new file mode 100644
index 0000000000000000000000000000000000000000..0a47446c0ad231c193bdd44ff327ba2ab28bf3d8
Binary files /dev/null and b/pytorch/generate-names/.vscode/.ropeproject/objectdb differ
diff --git a/pytorch/generate-names/.vscode/settings.json b/pytorch/generate-names/.vscode/settings.json
new file mode 100644
index 0000000000000000000000000000000000000000..63d9a00b68325e36708886b6a87d68be4c38606a
--- /dev/null
+++ b/pytorch/generate-names/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+    "python.pythonPath": "/home/marmot/miniconda3/envs/dnn/bin/python",
+    "python.formatting.provider": "black"
+}
\ No newline at end of file
diff --git a/pytorch/generate-names/char_rnn_generation_tutorial.py b/pytorch/generate-names/char_rnn_generation_tutorial.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec1d750ab26fbe8b4020aa92e38cecece75fe8e7
--- /dev/null
+++ b/pytorch/generate-names/char_rnn_generation_tutorial.py
@@ -0,0 +1,435 @@
+# -*- coding: utf-8 -*-
+"""
+NLP From Scratch: Generating Names with a Character-Level RNN
+*************************************************************
+**Author**: `Sean Robertson <https://github.com/spro/practical-pytorch>`_
+
+This is our second of three tutorials on "NLP From Scratch".
+In the `first tutorial </intermediate/char_rnn_classification_tutorial>`
+we used a RNN to classify names into their language of origin. This time
+we'll turn around and generate names from languages.
+
+::
+
+    > python sample.py Russian RUS
+    Rovakov
+    Uantov
+    Shavakov
+
+    > python sample.py German GER
+    Gerren
+    Ereng
+    Rosher
+
+    > python sample.py Spanish SPA
+    Salla
+    Parer
+    Allan
+
+    > python sample.py Chinese CHI
+    Chan
+    Hang
+    Iun
+
+We are still hand-crafting a small RNN with a few linear layers. The big
+difference is instead of predicting a category after reading in all the
+letters of a name, we input a category and output one letter at a time.
+Recurrently predicting characters to form language (this could also be
+done with words or other higher order constructs) is often referred to
+as a "language model".
+
+**Recommended Reading:**
+
+I assume you have at least installed PyTorch, know Python, and
+understand Tensors:
+
+-  https://pytorch.org/ For installation instructions
+-  :doc:`/beginner/deep_learning_60min_blitz` to get started with PyTorch in general
+-  :doc:`/beginner/pytorch_with_examples` for a wide and deep overview
+-  :doc:`/beginner/former_torchies_tutorial` if you are former Lua Torch user
+
+It would also be useful to know about RNNs and how they work:
+
+-  `The Unreasonable Effectiveness of Recurrent Neural
+   Networks <https://karpathy.github.io/2015/05/21/rnn-effectiveness/>`__
+   shows a bunch of real life examples
+-  `Understanding LSTM
+   Networks <https://colah.github.io/posts/2015-08-Understanding-LSTMs/>`__
+   is about LSTMs specifically but also informative about RNNs in
+   general
+
+I also suggest the previous tutorial, :doc:`/intermediate/char_rnn_classification_tutorial`
+
+
+Preparing the Data
+==================
+
+.. Note::
+   Download the data from
+   `here <https://download.pytorch.org/tutorial/data.zip>`_
+   and extract it to the current directory.
+
+See the last tutorial for more detail of this process. In short, there
+are a bunch of plain text files ``data/names/[Language].txt`` with a
+name per line. We split lines into an array, convert Unicode to ASCII,
+and end up with a dictionary ``{language: [names ...]}``.
+
+"""
+from __future__ import unicode_literals, print_function, division
+from io import open
+import glob
+import os
+import unicodedata
+import string
+
+all_letters = string.ascii_letters + " .,;'-"
+n_letters = len(all_letters) + 1 # Plus EOS marker
+
+def findFiles(path): return glob.glob(path)
+
+# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
+def unicodeToAscii(s):
+    return ''.join(
+        c for c in unicodedata.normalize('NFD', s)
+        if unicodedata.category(c) != 'Mn'
+        and c in all_letters
+    )
+
+# Read a file and split into lines
+def readLines(filename):
+    lines = open(filename, encoding='utf-8').read().strip().split('\n')
+    return [unicodeToAscii(line) for line in lines]
+
+# Build the category_lines dictionary, a list of lines per category
+category_lines = {}
+all_categories = []
+for filename in findFiles('data/names/*.txt'):
+    category = os.path.splitext(os.path.basename(filename))[0]
+    all_categories.append(category)
+    lines = readLines(filename)
+    category_lines[category] = lines
+
+n_categories = len(all_categories)
+
+if n_categories == 0:
+    raise RuntimeError('Data not found. Make sure that you downloaded data '
+        'from https://download.pytorch.org/tutorial/data.zip and extract it to '
+        'the current directory.')
+
+print('# categories:', n_categories, all_categories)
+print(unicodeToAscii("O'Néàl"))
+
+
+######################################################################
+# Creating the Network
+# ====================
+#
+# This network extends `the last tutorial's RNN <#Creating-the-Network>`__
+# with an extra argument for the category tensor, which is concatenated
+# along with the others. The category tensor is a one-hot vector just like
+# the letter input.
+#
+# We will interpret the output as the probability of the next letter. When
+# sampling, the most likely output letter is used as the next input
+# letter.
+#
+# I added a second linear layer ``o2o`` (after combining hidden and
+# output) to give it more muscle to work with. There's also a dropout
+# layer, which `randomly zeros parts of its
+# input <https://arxiv.org/abs/1207.0580>`__ with a given probability
+# (here 0.1) and is usually used to fuzz inputs to prevent overfitting.
+# Here we're using it towards the end of the network to purposely add some
+# chaos and increase sampling variety.
+#
+# .. figure:: https://i.imgur.com/jzVrf7f.png
+#    :alt:
+#
+#
+
+import torch
+import torch.nn as nn
+
+class RNN(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(RNN, self).__init__()
+        self.hidden_size = hidden_size
+
+        self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
+        self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
+        self.o2o = nn.Linear(hidden_size + output_size, output_size)
+        self.dropout = nn.Dropout(0.1)
+        self.softmax = nn.LogSoftmax(dim=1)
+
+    def forward(self, category, input, hidden):
+        input_combined = torch.cat((category, input, hidden), 1)
+        hidden = self.i2h(input_combined)
+        output = self.i2o(input_combined)
+        output_combined = torch.cat((hidden, output), 1)
+        output = self.o2o(output_combined)
+        output = self.dropout(output)
+        output = self.softmax(output)
+        return output, hidden
+
+    def initHidden(self):
+        return torch.zeros(1, self.hidden_size)
+
+
+######################################################################
+# Training
+# =========
+# Preparing for Training
+# ----------------------
+#
+# First of all, helper functions to get random pairs of (category, line):
+#
+
+import random
+
+# Random item from a list
+def randomChoice(l):
+    return l[random.randint(0, len(l) - 1)]
+
+# Get a random category and random line from that category
+def randomTrainingPair():
+    category = randomChoice(all_categories)
+    line = randomChoice(category_lines[category])
+    return category, line
+
+
+######################################################################
+# For each timestep (that is, for each letter in a training word) the
+# inputs of the network will be
+# ``(category, current letter, hidden state)`` and the outputs will be
+# ``(next letter, next hidden state)``. So for each training set, we'll
+# need the category, a set of input letters, and a set of output/target
+# letters.
+#
+# Since we are predicting the next letter from the current letter for each
+# timestep, the letter pairs are groups of consecutive letters from the
+# line - e.g. for ``"ABCD<EOS>"`` we would create ("A", "B"), ("B", "C"),
+# ("C", "D"), ("D", "EOS").
+#
+# .. figure:: https://i.imgur.com/JH58tXY.png
+#    :alt:
+#
+# The category tensor is a `one-hot
+# tensor <https://en.wikipedia.org/wiki/One-hot>`__ of size
+# ``<1 x n_categories>``. When training we feed it to the network at every
+# timestep - this is a design choice, it could have been included as part
+# of initial hidden state or some other strategy.
+#
+
+# One-hot vector for category
+def categoryTensor(category):
+    li = all_categories.index(category)
+    tensor = torch.zeros(1, n_categories)
+    tensor[0][li] = 1
+    return tensor
+
+# One-hot matrix of first to last letters (not including EOS) for input
+def inputTensor(line):
+    tensor = torch.zeros(len(line), 1, n_letters)
+    for li in range(len(line)):
+        letter = line[li]
+        tensor[li][0][all_letters.find(letter)] = 1
+    return tensor
+
+# LongTensor of second letter to end (EOS) for target
+def targetTensor(line):
+    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
+    letter_indexes.append(n_letters - 1) # EOS
+    return torch.LongTensor(letter_indexes)
+
+
+######################################################################
+# For convenience during training we'll make a ``randomTrainingExample``
+# function that fetches a random (category, line) pair and turns them into
+# the required (category, input, target) tensors.
+#
+
+# Make category, input, and target tensors from a random category, line pair
+def randomTrainingExample():
+    category, line = randomTrainingPair()
+    category_tensor = categoryTensor(category)
+    input_line_tensor = inputTensor(line)
+    target_line_tensor = targetTensor(line)
+    return category_tensor, input_line_tensor, target_line_tensor
+
+
+######################################################################
+# Training the Network
+# --------------------
+#
+# In contrast to classification, where only the last output is used, we
+# are making a prediction at every step, so we are calculating loss at
+# every step.
+#
+# The magic of autograd allows you to simply sum these losses at each step
+# and call backward at the end.
+#
+
+criterion = nn.NLLLoss()
+
+learning_rate = 0.0005
+
+def train(category_tensor, input_line_tensor, target_line_tensor):
+    target_line_tensor.unsqueeze_(-1)
+    hidden = rnn.initHidden()
+
+    rnn.zero_grad()
+
+    loss = 0
+
+    for i in range(input_line_tensor.size(0)):
+        output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
+        l = criterion(output, target_line_tensor[i])
+        loss += l
+
+    loss.backward()
+
+    for p in rnn.parameters():
+        p.data.add_(-learning_rate, p.grad.data)
+
+    return output, loss.item() / input_line_tensor.size(0)
+
+
+######################################################################
+# To keep track of how long training takes I am adding a
+# ``timeSince(timestamp)`` function which returns a human readable string:
+#
+
+import time
+import math
+
+def timeSince(since):
+    now = time.time()
+    s = now - since
+    m = math.floor(s / 60)
+    s -= m * 60
+    return '%dm %ds' % (m, s)
+
+
+######################################################################
+# Training is business as usual - call train a bunch of times and wait a
+# few minutes, printing the current time and loss every ``print_every``
+# examples, and keeping store of an average loss per ``plot_every`` examples
+# in ``all_losses`` for plotting later.
+#
+
+rnn = RNN(n_letters, 128, n_letters)
+
+n_iters = 100000
+print_every = 5000
+plot_every = 500
+all_losses = []
+total_loss = 0 # Reset every plot_every iters
+
+start = time.time()
+
+for iter in range(1, n_iters + 1):
+    output, loss = train(*randomTrainingExample())
+    total_loss += loss
+
+    if iter % print_every == 0:
+        print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))
+
+    if iter % plot_every == 0:
+        all_losses.append(total_loss / plot_every)
+        total_loss = 0
+
+
+######################################################################
+# Plotting the Losses
+# -------------------
+#
+# Plotting the historical loss from all\_losses shows the network
+# learning:
+#
+
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
+plt.figure()
+plt.plot(all_losses)
+
+
+######################################################################
+# Sampling the Network
+# ====================
+#
+# To sample we give the network a letter and ask what the next one is,
+# feed that in as the next letter, and repeat until the EOS token.
+#
+# -  Create tensors for input category, starting letter, and empty hidden
+#    state
+# -  Create a string ``output_name`` with the starting letter
+# -  Up to a maximum output length,
+#
+#    -  Feed the current letter to the network
+#    -  Get the next letter from highest output, and next hidden state
+#    -  If the letter is EOS, stop here
+#    -  If a regular letter, add to ``output_name`` and continue
+#
+# -  Return the final name
+#
+# .. Note::
+#    Rather than having to give it a starting letter, another
+#    strategy would have been to include a "start of string" token in
+#    training and have the network choose its own starting letter.
+#
+
+max_length = 20
+
+# Sample from a category and starting letter
+def sample(category, start_letter='A'):
+    with torch.no_grad():  # no need to track history in sampling
+        category_tensor = categoryTensor(category)
+        input = inputTensor(start_letter)
+        hidden = rnn.initHidden()
+
+        output_name = start_letter
+
+        for i in range(max_length):
+            output, hidden = rnn(category_tensor, input[0], hidden)
+            topv, topi = output.topk(1)
+            topi = topi[0][0]
+            if topi == n_letters - 1:
+                break
+            else:
+                letter = all_letters[topi]
+                output_name += letter
+            input = inputTensor(letter)
+
+        return output_name
+
+# Get multiple samples from one category and multiple starting letters
+def samples(category, start_letters='ABC'):
+    for start_letter in start_letters:
+        print(sample(category, start_letter))
+
+samples('Russian', 'RUS')
+
+samples('German', 'GER')
+
+samples('Spanish', 'SPA')
+
+samples('Chinese', 'CHI')
+
+
+######################################################################
+# Exercises
+# =========
+#
+# -  Try with a different dataset of category -> line, for example:
+#
+#    -  Fictional series -> Character name
+#    -  Part of speech -> Word
+#    -  Country -> City
+#
+# -  Use a "start of sentence" token so that sampling can be done without
+#    choosing a start letter
+# -  Get better results with a bigger and/or better shaped network
+#
+#    -  Try the nn.LSTM and nn.GRU layers
+#    -  Combine multiple of these RNNs as a higher level network
+#
diff --git a/pytorch/generate-names/src/prepare-data.py b/pytorch/generate-names/src/prepare-data.py
new file mode 100644
index 0000000000000000000000000000000000000000..ace40dc9cdaff733ae67875e894f3fd15d425153
--- /dev/null
+++ b/pytorch/generate-names/src/prepare-data.py
@@ -0,0 +1,48 @@
+import glob
+import os
+import unicodedata
+import string
+
+all_letters = string.ascii_letters + " .,;'-"
+n_letters = len(all_letters) + 1  # Plus EOS marker
+
+
+def find_files(path):
+    return glob.glob(path)
+
+
+# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
+def unicode_to_ascii(s):
+    return "".join(
+        c
+        for c in unicodedata.normalize("NFD", s)
+        if unicodedata.category(c) != "Mn" and c in all_letters
+    )
+
+
+# Read a file and split into lines
+def read_lines(filename):
+    lines = open(filename, encoding="utf-8").read().strip().split("\n")
+    return [unicode_to_ascii(line) for line in lines]
+
+
+# Build the category_lines dictionary, a list of lines per category
+category_lines = {}
+all_categories = []
+for filename in find_files("data/names/*.txt"):
+    category = os.path.splitext(os.path.basename(filename))[0]
+    all_categories.append(category)
+    lines = read_lines(filename)
+    category_lines[category] = lines
+
+n_categories = len(all_categories)
+
+if n_categories == 0:
+    raise RuntimeError(
+        "Data not found. Make sure that you downloaded data "
+        "from https://download.pytorch.org/tutorial/data.zip and extract it to "
+        "the current directory."
+    )
+
+print("# categories:", n_categories, all_categories)
+print(unicode_to_ascii("O'Néàl"))
diff --git a/pytorch/test/.gitignore b/pytorch/test/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..600d2d33badf45cc068e01d2e3c837e11c417bc4
--- /dev/null
+++ b/pytorch/test/.gitignore
@@ -0,0 +1 @@
+.vscode
\ No newline at end of file
diff --git a/pytorch/test/test-all.sh b/pytorch/test/test-all.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7d8cb58c162d91c0aee13590af9e7d486c29c0e7
--- /dev/null
+++ b/pytorch/test/test-all.sh
@@ -0,0 +1,2 @@
+python test-torch.py
+python test-cuda.py
\ No newline at end of file
diff --git a/pytorch/test/test-cuda.py b/pytorch/test/test-cuda.py
new file mode 100644
index 0000000000000000000000000000000000000000..339d19a0c52101be7a7a34c3e9f702ddbcaeaf3a
--- /dev/null
+++ b/pytorch/test/test-cuda.py
@@ -0,0 +1,4 @@
+import torch
+
+is_available = torch.cuda.is_available()
+print(is_available)
diff --git a/pytorch/test/test-torch.py b/pytorch/test/test-torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6373e40d992d26c540ee767c1b8e74b5e84d895
--- /dev/null
+++ b/pytorch/test/test-torch.py
@@ -0,0 +1,5 @@
+from __future__ import print_function
+import torch
+
+x = torch.rand(5, 3)
+print(x)
diff --git a/pytorch/tutorials/.gitignore b/pytorch/tutorials/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..6320cd248dd8aeaab759d5871f8781b5c0505172
--- /dev/null
+++ b/pytorch/tutorials/.gitignore
@@ -0,0 +1 @@
+data
\ No newline at end of file
diff --git a/pytorch/tutorials/.ipynb_checkpoints/PyTorchTutorial-checkpoint.ipynb b/pytorch/tutorials/.ipynb_checkpoints/PyTorchTutorial-checkpoint.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e5b0a95ccf72d4810f6f4ae07fde5d410c18234b
--- /dev/null
+++ b/pytorch/tutorials/.ipynb_checkpoints/PyTorchTutorial-checkpoint.ipynb
@@ -0,0 +1,53 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/pytorch/tutorials/.ipynb_checkpoints/nn_tutorial-checkpoint.ipynb b/pytorch/tutorials/.ipynb_checkpoints/nn_tutorial-checkpoint.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..1bfb9044cc4f77a98ab588e7e95b37c2c20f6fbe
--- /dev/null
+++ b/pytorch/tutorials/.ipynb_checkpoints/nn_tutorial-checkpoint.ipynb
@@ -0,0 +1,1497 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "What is `torch.nn` *really*?\n",
+    "============================\n",
+    "by Jeremy Howard, `fast.ai <https://www.fast.ai>`_. Thanks to Rachel Thomas and Francisco Ingham.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We recommend running this tutorial as a notebook, not a script. To download the notebook (.ipynb) file,\n",
+    "click the link at the top of the page.\n",
+    "\n",
+    "PyTorch provides the elegantly designed modules and classes `torch.nn <https://pytorch.org/docs/stable/nn.html>`_ ,\n",
+    "`torch.optim <https://pytorch.org/docs/stable/optim.html>`_ ,\n",
+    "`Dataset <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset>`_ ,\n",
+    "and `DataLoader <https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader>`_\n",
+    "to help you create and train neural networks.\n",
+    "In order to fully utilize their power and customize\n",
+    "them for your problem, you need to really understand exactly what they're\n",
+    "doing. To develop this understanding, we will first train basic neural net\n",
+    "on the MNIST data set without using any features from these models; we will\n",
+    "initially only use the most basic PyTorch tensor functionality. Then, we will\n",
+    "incrementally add one feature from ``torch.nn``, ``torch.optim``, ``Dataset``, or\n",
+    "``DataLoader`` at a time, showing exactly what each piece does, and how it\n",
+    "works to make the code either more concise, or more flexible.\n",
+    "\n",
+    "**This tutorial assumes you already have PyTorch installed, and are familiar\n",
+    "with the basics of tensor operations.** (If you're familiar with Numpy array\n",
+    "operations, you'll find the PyTorch tensor operations used here nearly identical).\n",
+    "\n",
+    "MNIST data setup\n",
+    "----------------\n",
+    "\n",
+    "We will use the classic `MNIST <http://deeplearning.net/data/mnist/>`_ dataset,\n",
+    "which consists of black-and-white images of hand-drawn digits (between 0 and 9).\n",
+    "\n",
+    "We will use `pathlib <https://docs.python.org/3/library/pathlib.html>`_\n",
+    "for dealing with paths (part of the Python 3 standard library), and will\n",
+    "download the dataset using\n",
+    "`requests <http://docs.python-requests.org/en/master/>`_. We will only\n",
+    "import modules when we use them, so you can see exactly what's being\n",
+    "used at each point.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import requests\n",
+    "\n",
+    "DATA_PATH = Path(\"data\")\n",
+    "PATH = DATA_PATH / \"mnist\"\n",
+    "\n",
+    "PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "URL = \"http://deeplearning.net/data/mnist/\"\n",
+    "FILENAME = \"mnist.pkl.gz\"\n",
+    "\n",
+    "if not (PATH / FILENAME).exists():\n",
+    "        content = requests.get(URL + FILENAME).content\n",
+    "        (PATH / FILENAME).open(\"wb\").write(content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This dataset is in numpy array format, and has been stored using pickle,\n",
+    "a python-specific format for serializing data.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import gzip\n",
+    "\n",
+    "with gzip.open((PATH / FILENAME).as_posix(), \"rb\") as f:\n",
+    "        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding=\"latin-1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Each image is 28 x 28, and is being stored as a flattened row of length\n",
+    "784 (=28x28). Let's take a look at one; we need to reshape it to 2d\n",
+    "first.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot\n",
+    "import numpy as np\n",
+    "\n",
+    "pyplot.imshow(x_train[0].reshape((28, 28)), cmap=\"gray\")\n",
+    "print(x_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "PyTorch uses ``torch.tensor``, rather than numpy arrays, so we need to\n",
+    "convert our data.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "x_train, y_train, x_valid, y_valid = map(\n",
+    "    torch.tensor, (x_train, y_train, x_valid, y_valid)\n",
+    ")\n",
+    "n, c = x_train.shape\n",
+    "x_train, x_train.shape, y_train.min(), y_train.max()\n",
+    "print(x_train, y_train)\n",
+    "print(x_train.shape)\n",
+    "print(y_train.min(), y_train.max())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Neural net from scratch (no torch.nn)\n",
+    "---------------------------------------------\n",
+    "\n",
+    "Let's first create a model using nothing but PyTorch tensor operations. We're assuming\n",
+    "you're already familiar with the basics of neural networks. (If you're not, you can\n",
+    "learn them at `course.fast.ai <https://course.fast.ai>`_).\n",
+    "\n",
+    "PyTorch provides methods to create random or zero-filled tensors, which we will\n",
+    "use to create our weights and bias for a simple linear model. These are just regular\n",
+    "tensors, with one very special addition: we tell PyTorch that they require a\n",
+    "gradient. This causes PyTorch to record all of the operations done on the tensor,\n",
+    "so that it can calculate the gradient during back-propagation *automatically*!\n",
+    "\n",
+    "For the weights, we set ``requires_grad`` **after** the initialization, since we\n",
+    "don't want that step included in the gradient. (Note that a trailling ``_`` in\n",
+    "PyTorch signifies that the operation is performed in-place.)\n",
+    "\n",
+    "<div class=\"alert alert-info\"><h4>Note</h4><p>We are initializing the weights here with\n",
+    "   `Xavier initialisation <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_\n",
+    "   (by multiplying with 1/sqrt(n)).</p></div>\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "\n",
+    "weights = torch.randn(784, 10) / math.sqrt(784)\n",
+    "weights.requires_grad_()\n",
+    "bias = torch.zeros(10, requires_grad=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Thanks to PyTorch's ability to calculate gradients automatically, we can\n",
+    "use any standard Python function (or callable object) as a model! So\n",
+    "let's just write a plain matrix multiplication and broadcasted addition\n",
+    "to create a simple linear model. We also need an activation function, so\n",
+    "we'll write `log_softmax` and use it. Remember: although PyTorch\n",
+    "provides lots of pre-written loss functions, activation functions, and\n",
+    "so forth, you can easily write your own using plain python. PyTorch will\n",
+    "even create fast GPU or vectorized CPU code for your function\n",
+    "automatically.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def log_softmax(x):\n",
+    "    return x - x.exp().sum(-1).log().unsqueeze(-1)\n",
+    "\n",
+    "def model(xb):\n",
+    "    return log_softmax(xb @ weights + bias)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the above, the ``@`` stands for the dot product operation. We will call\n",
+    "our function on one batch of data (in this case, 64 images).  This is\n",
+    "one *forward pass*.  Note that our predictions won't be any better than\n",
+    "random at this stage, since we start with random weights.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bs = 64  # batch size\n",
+    "\n",
+    "xb = x_train[0:bs]  # a mini-batch from x\n",
+    "preds = model(xb)  # predictions\n",
+    "preds[0], preds.shape\n",
+    "print(preds[0], preds.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you see, the ``preds`` tensor contains not only the tensor values, but also a\n",
+    "gradient function. We'll use this later to do backprop.\n",
+    "\n",
+    "Let's implement negative log-likelihood to use as the loss function\n",
+    "(again, we can just use standard Python):\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nll(input, target):\n",
+    "    return -input[range(target.shape[0]), target].mean()\n",
+    "\n",
+    "loss_func = nll"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check our loss with our random model, so we can see if we improve\n",
+    "after a backprop pass later.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "yb = y_train[0:bs]\n",
+    "print(loss_func(preds, yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's also implement a function to calculate the accuracy of our model.\n",
+    "For each prediction, if the index with the largest value matches the\n",
+    "target value, then the prediction was correct.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def accuracy(out, yb):\n",
+    "    preds = torch.argmax(out, dim=1)\n",
+    "    return (preds == yb).float().mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check the accuracy of our random model, so we can see if our\n",
+    "accuracy improves as our loss improves.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(accuracy(preds, yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now run a training loop.  For each iteration, we will:\n",
+    "\n",
+    "- select a mini-batch of data (of size ``bs``)\n",
+    "- use the model to make predictions\n",
+    "- calculate the loss\n",
+    "- ``loss.backward()`` updates the gradients of the model, in this case, ``weights``\n",
+    "  and ``bias``.\n",
+    "\n",
+    "We now use these gradients to update the weights and bias.  We do this\n",
+    "within the ``torch.no_grad()`` context manager, because we do not want these\n",
+    "actions to be recorded for our next calculation of the gradient.  You can read\n",
+    "more about how PyTorch's Autograd records operations\n",
+    "`here <https://pytorch.org/docs/stable/notes/autograd.html>`_.\n",
+    "\n",
+    "We then set the\n",
+    "gradients to zero, so that we are ready for the next loop.\n",
+    "Otherwise, our gradients would record a running tally of all the operations\n",
+    "that had happened (i.e. ``loss.backward()`` *adds* the gradients to whatever is\n",
+    "already stored, rather than replacing them).\n",
+    "\n",
+    ".. tip:: You can use the standard python debugger to step through PyTorch\n",
+    "   code, allowing you to check the various variable values at each step.\n",
+    "   Uncomment ``set_trace()`` below to try it out.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "lr = 0.5  # learning rate\n",
+    "epochs = 2  # how many epochs to train for\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for i in range((n - 1) // bs + 1):\n",
+    "        #         set_trace()\n",
+    "        start_i = i * bs\n",
+    "        end_i = start_i + bs\n",
+    "        xb = x_train[start_i:end_i]\n",
+    "        yb = y_train[start_i:end_i]\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        with torch.no_grad():\n",
+    "            weights -= weights.grad * lr\n",
+    "            bias -= bias.grad * lr\n",
+    "            weights.grad.zero_()\n",
+    "            bias.grad.zero_()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That's it: we've created and trained a minimal neural network (in this case, a\n",
+    "logistic regression, since we have no hidden layers) entirely from scratch!\n",
+    "\n",
+    "Let's check the loss and accuracy and compare those to what we got\n",
+    "earlier. We expect that the loss will have decreased and accuracy to\n",
+    "have increased, and they have.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb), accuracy(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using torch.nn.functional\n",
+    "------------------------------\n",
+    "\n",
+    "We will now refactor our code, so that it does the same thing as before, only\n",
+    "we'll start taking advantage of PyTorch's ``nn`` classes to make it more concise\n",
+    "and flexible. At each step from here, we should be making our code one or more\n",
+    "of: shorter, more understandable, and/or more flexible.\n",
+    "\n",
+    "The first and easiest step is to make our code shorter by replacing our\n",
+    "hand-written activation and loss functions with those from ``torch.nn.functional``\n",
+    "(which is generally imported into the namespace ``F`` by convention). This module\n",
+    "contains all the functions in the ``torch.nn`` library (whereas other parts of the\n",
+    "library contain classes). As well as a wide range of loss and activation\n",
+    "functions, you'll also find here some convenient functions for creating neural\n",
+    "nets, such as pooling functions. (There are also functions for doing convolutions,\n",
+    "linear layers, etc, but as we'll see, these are usually better handled using\n",
+    "other parts of the library.)\n",
+    "\n",
+    "If you're using negative log likelihood loss and log softmax activation,\n",
+    "then Pytorch provides a single function ``F.cross_entropy`` that combines\n",
+    "the two. So we can even remove the activation function from our model.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch.nn.functional as F\n",
+    "\n",
+    "loss_func = F.cross_entropy\n",
+    "\n",
+    "def model(xb):\n",
+    "    return xb @ weights + bias"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that we no longer call ``log_softmax`` in the ``model`` function. Let's\n",
+    "confirm that our loss and accuracy are the same as before:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb), accuracy(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using nn.Module\n",
+    "-----------------------------\n",
+    "Next up, we'll use ``nn.Module`` and ``nn.Parameter``, for a clearer and more\n",
+    "concise training loop. We subclass ``nn.Module`` (which itself is a class and\n",
+    "able to keep track of state).  In this case, we want to create a class that\n",
+    "holds our weights, bias, and method for the forward step.  ``nn.Module`` has a\n",
+    "number of attributes and methods (such as ``.parameters()`` and ``.zero_grad()``)\n",
+    "which we will be using.\n",
+    "\n",
+    "<div class=\"alert alert-info\"><h4>Note</h4><p>``nn.Module`` (uppercase M) is a PyTorch specific concept, and is a\n",
+    "   class we'll be using a lot. ``nn.Module`` is not to be confused with the Python\n",
+    "   concept of a (lowercase ``m``) `module <https://docs.python.org/3/tutorial/modules.html>`_,\n",
+    "   which is a file of Python code that can be imported.</p></div>\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch import nn\n",
+    "\n",
+    "class Mnist_Logistic(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))\n",
+    "        self.bias = nn.Parameter(torch.zeros(10))\n",
+    "\n",
+    "    def forward(self, xb):\n",
+    "        return xb @ self.weights + self.bias"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since we're now using an object instead of just using a function, we\n",
+    "first have to instantiate our model:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Mnist_Logistic()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can calculate the loss in the same way as before. Note that\n",
+    "``nn.Module`` objects are used as if they are functions (i.e they are\n",
+    "*callable*), but behind the scenes Pytorch will call our ``forward``\n",
+    "method automatically.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously for our training loop we had to update the values for each parameter\n",
+    "by name, and manually zero out the grads for each parameter separately, like this:\n",
+    "::\n",
+    "  with torch.no_grad():\n",
+    "      weights -= weights.grad * lr\n",
+    "      bias -= bias.grad * lr\n",
+    "      weights.grad.zero_()\n",
+    "      bias.grad.zero_()\n",
+    "\n",
+    "\n",
+    "Now we can take advantage of model.parameters() and model.zero_grad() (which\n",
+    "are both defined by PyTorch for ``nn.Module``) to make those steps more concise\n",
+    "and less prone to the error of forgetting some of our parameters, particularly\n",
+    "if we had a more complicated model:\n",
+    "::\n",
+    "  with torch.no_grad():\n",
+    "      for p in model.parameters(): p -= p.grad * lr\n",
+    "      model.zero_grad()\n",
+    "\n",
+    "\n",
+    "We'll wrap our little training loop in a ``fit`` function so we can run it\n",
+    "again later.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fit():\n",
+    "    for epoch in range(epochs):\n",
+    "        for i in range((n - 1) // bs + 1):\n",
+    "            start_i = i * bs\n",
+    "            end_i = start_i + bs\n",
+    "            xb = x_train[start_i:end_i]\n",
+    "            yb = y_train[start_i:end_i]\n",
+    "            pred = model(xb)\n",
+    "            loss = loss_func(pred, yb)\n",
+    "\n",
+    "            loss.backward()\n",
+    "            with torch.no_grad():\n",
+    "                for p in model.parameters():\n",
+    "                    p -= p.grad * lr\n",
+    "                model.zero_grad()\n",
+    "\n",
+    "fit()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's double-check that our loss has gone down:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using nn.Linear\n",
+    "-------------------------\n",
+    "\n",
+    "We continue to refactor our code.  Instead of manually defining and\n",
+    "initializing ``self.weights`` and ``self.bias``, and calculating ``xb  @\n",
+    "self.weights + self.bias``, we will instead use the Pytorch class\n",
+    "`nn.Linear <https://pytorch.org/docs/stable/nn.html#linear-layers>`_ for a\n",
+    "linear layer, which does all that for us. Pytorch has many types of\n",
+    "predefined layers that can greatly simplify our code, and often makes it\n",
+    "faster too.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Mnist_Logistic(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.lin = nn.Linear(784, 10)\n",
+    "\n",
+    "    def forward(self, xb):\n",
+    "        return self.lin(xb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We instantiate our model and calculate the loss in the same way as before:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Mnist_Logistic()\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We are still able to use our same ``fit`` method as before.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using optim\n",
+    "------------------------------\n",
+    "\n",
+    "Pytorch also has a package with various optimization algorithms, ``torch.optim``.\n",
+    "We can use the ``step`` method from our optimizer to take a forward step, instead\n",
+    "of manually updating each parameter.\n",
+    "\n",
+    "This will let us replace our previous manually coded optimization step:\n",
+    "::\n",
+    "  with torch.no_grad():\n",
+    "      for p in model.parameters(): p -= p.grad * lr\n",
+    "      model.zero_grad()\n",
+    "\n",
+    "and instead use just:\n",
+    "::\n",
+    "  opt.step()\n",
+    "  opt.zero_grad()\n",
+    "\n",
+    "(``optim.zero_grad()`` resets the gradient to 0 and we need to call it before\n",
+    "computing the gradient for the next minibatch.)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch import optim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll define a little function to create our model and optimizer so we\n",
+    "can reuse it in the future.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_model():\n",
+    "    model = Mnist_Logistic()\n",
+    "    return model, optim.SGD(model.parameters(), lr=lr)\n",
+    "\n",
+    "model, opt = get_model()\n",
+    "print(loss_func(model(xb), yb))\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for i in range((n - 1) // bs + 1):\n",
+    "        start_i = i * bs\n",
+    "        end_i = start_i + bs\n",
+    "        xb = x_train[start_i:end_i]\n",
+    "        yb = y_train[start_i:end_i]\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using Dataset\n",
+    "------------------------------\n",
+    "\n",
+    "PyTorch has an abstract Dataset class.  A Dataset can be anything that has\n",
+    "a ``__len__`` function (called by Python's standard ``len`` function) and\n",
+    "a ``__getitem__`` function as a way of indexing into it.\n",
+    "`This tutorial <https://pytorch.org/tutorials/beginner/data_loading_tutorial.html>`_\n",
+    "walks through a nice example of creating a custom ``FacialLandmarkDataset`` class\n",
+    "as a subclass of ``Dataset``.\n",
+    "\n",
+    "PyTorch's `TensorDataset <https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#TensorDataset>`_\n",
+    "is a Dataset wrapping tensors. By defining a length and way of indexing,\n",
+    "this also gives us a way to iterate, index, and slice along the first\n",
+    "dimension of a tensor. This will make it easier to access both the\n",
+    "independent and dependent variables in the same line as we train.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import TensorDataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Both ``x_train`` and ``y_train`` can be combined in a single ``TensorDataset``,\n",
+    "which will be easier to iterate over and slice.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_ds = TensorDataset(x_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously, we had to iterate through minibatches of x and y values separately:\n",
+    "::\n",
+    "    xb = x_train[start_i:end_i]\n",
+    "    yb = y_train[start_i:end_i]\n",
+    "\n",
+    "\n",
+    "Now, we can do these two steps together:\n",
+    "::\n",
+    "    xb,yb = train_ds[i*bs : i*bs+bs]\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, opt = get_model()\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for i in range((n - 1) // bs + 1):\n",
+    "        xb, yb = train_ds[i * bs: i * bs + bs]\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using DataLoader\n",
+    "------------------------------\n",
+    "\n",
+    "Pytorch's ``DataLoader`` is responsible for managing batches. You can\n",
+    "create a ``DataLoader`` from any ``Dataset``. ``DataLoader`` makes it easier\n",
+    "to iterate over batches. Rather than having to use ``train_ds[i*bs : i*bs+bs]``,\n",
+    "the DataLoader gives us each minibatch automatically.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "train_ds = TensorDataset(x_train, y_train)\n",
+    "train_dl = DataLoader(train_ds, batch_size=bs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously, our loop iterated over batches (xb, yb) like this:\n",
+    "::\n",
+    "      for i in range((n-1)//bs + 1):\n",
+    "          xb,yb = train_ds[i*bs : i*bs+bs]\n",
+    "          pred = model(xb)\n",
+    "\n",
+    "Now, our loop is much cleaner, as (xb, yb) are loaded automatically from the data loader:\n",
+    "::\n",
+    "      for xb,yb in train_dl:\n",
+    "          pred = model(xb)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, opt = get_model()\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for xb, yb in train_dl:\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Thanks to Pytorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,\n",
+    "our training loop is now dramatically smaller and easier to understand. Let's\n",
+    "now try to add the basic features necessary to create effecive models in practice.\n",
+    "\n",
+    "Add validation\n",
+    "-----------------------\n",
+    "\n",
+    "In section 1, we were just trying to get a reasonable training loop set up for\n",
+    "use on our training data.  In reality, you **always** should also have\n",
+    "a `validation set <https://www.fast.ai/2017/11/13/validation-sets/>`_, in order\n",
+    "to identify if you are overfitting.\n",
+    "\n",
+    "Shuffling the training data is\n",
+    "`important <https://www.quora.com/Does-the-order-of-training-data-matter-when-training-neural-networks>`_\n",
+    "to prevent correlation between batches and overfitting. On the other hand, the\n",
+    "validation loss will be identical whether we shuffle the validation set or not.\n",
+    "Since shuffling takes extra time, it makes no sense to shuffle the validation data.\n",
+    "\n",
+    "We'll use a batch size for the validation set that is twice as large as\n",
+    "that for the training set. This is because the validation set does not\n",
+    "need backpropagation and thus takes less memory (it doesn't need to\n",
+    "store the gradients). We take advantage of this to use a larger batch\n",
+    "size and compute the loss more quickly.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_ds = TensorDataset(x_train, y_train)\n",
+    "train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)\n",
+    "\n",
+    "valid_ds = TensorDataset(x_valid, y_valid)\n",
+    "valid_dl = DataLoader(valid_ds, batch_size=bs * 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will calculate and print the validation loss at the end of each epoch.\n",
+    "\n",
+    "(Note that we always call ``model.train()`` before training, and ``model.eval()``\n",
+    "before inference, because these are used by layers such as ``nn.BatchNorm2d``\n",
+    "and ``nn.Dropout`` to ensure appropriate behaviour for these different phases.)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, opt = get_model()\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    model.train()\n",
+    "    for xb, yb in train_dl:\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "    model.eval()\n",
+    "    with torch.no_grad():\n",
+    "        valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)\n",
+    "\n",
+    "    print(epoch, valid_loss / len(valid_dl))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create fit() and get_data()\n",
+    "----------------------------------\n",
+    "\n",
+    "We'll now do a little refactoring of our own. Since we go through a similar\n",
+    "process twice of calculating the loss for both the training set and the\n",
+    "validation set, let's make that into its own function, ``loss_batch``, which\n",
+    "computes the loss for one batch.\n",
+    "\n",
+    "We pass an optimizer in for the training set, and use it to perform\n",
+    "backprop.  For the validation set, we don't pass an optimizer, so the\n",
+    "method doesn't perform backprop.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def loss_batch(model, loss_func, xb, yb, opt=None):\n",
+    "    loss = loss_func(model(xb), yb)\n",
+    "\n",
+    "    if opt is not None:\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "    return loss.item(), len(xb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "``fit`` runs the necessary operations to train our model and compute the\n",
+    "training and validation losses for each epoch.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def fit(epochs, model, loss_func, opt, train_dl, valid_dl):\n",
+    "    for epoch in range(epochs):\n",
+    "        model.train()\n",
+    "        for xb, yb in train_dl:\n",
+    "            loss_batch(model, loss_func, xb, yb, opt)\n",
+    "\n",
+    "        model.eval()\n",
+    "        with torch.no_grad():\n",
+    "            losses, nums = zip(\n",
+    "                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]\n",
+    "            )\n",
+    "        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)\n",
+    "\n",
+    "        print(epoch, val_loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "``get_data`` returns dataloaders for the training and validation sets.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_data(train_ds, valid_ds, bs):\n",
+    "    return (\n",
+    "        DataLoader(train_ds, batch_size=bs, shuffle=True),\n",
+    "        DataLoader(valid_ds, batch_size=bs * 2),\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, our whole process of obtaining the data loaders and fitting the\n",
+    "model can be run in 3 lines of code:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\n",
+    "model, opt = get_model()\n",
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can use these basic 3 lines of code to train a wide variety of models.\n",
+    "Let's see if we can use them to train a convolutional neural network (CNN)!\n",
+    "\n",
+    "Switch to CNN\n",
+    "-------------\n",
+    "\n",
+    "We are now going to build our neural network with three convolutional layers.\n",
+    "Because none of the functions in the previous section assume anything about\n",
+    "the model form, we'll be able to use them to train a CNN without any modification.\n",
+    "\n",
+    "We will use Pytorch's predefined\n",
+    "`Conv2d <https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_ class\n",
+    "as our convolutional layer. We define a CNN with 3 convolutional layers.\n",
+    "Each convolution is followed by a ReLU.  At the end, we perform an\n",
+    "average pooling.  (Note that ``view`` is PyTorch's version of numpy's\n",
+    "``reshape``)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Mnist_CNN(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)\n",
+    "        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)\n",
+    "        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)\n",
+    "\n",
+    "    def forward(self, xb):\n",
+    "        xb = xb.view(-1, 1, 28, 28)\n",
+    "        xb = F.relu(self.conv1(xb))\n",
+    "        xb = F.relu(self.conv2(xb))\n",
+    "        xb = F.relu(self.conv3(xb))\n",
+    "        xb = F.avg_pool2d(xb, 4)\n",
+    "        return xb.view(-1, xb.size(1))\n",
+    "\n",
+    "lr = 0.1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`Momentum <https://cs231n.github.io/neural-networks-3/#sgd>`_ is a variation on\n",
+    "stochastic gradient descent that takes previous updates into account as well\n",
+    "and generally leads to faster training.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Mnist_CNN()\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n",
+    "\n",
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "nn.Sequential\n",
+    "------------------------\n",
+    "\n",
+    "``torch.nn`` has another handy class we can use to simply our code:\n",
+    "`Sequential <https://pytorch.org/docs/stable/nn.html#torch.nn.Sequential>`_ .\n",
+    "A ``Sequential`` object runs each of the modules contained within it, in a\n",
+    "sequential manner. This is a simpler way of writing our neural network.\n",
+    "\n",
+    "To take advantage of this, we need to be able to easily define a\n",
+    "**custom layer** from a given function.  For instance, PyTorch doesn't\n",
+    "have a `view` layer, and we need to create one for our network. ``Lambda``\n",
+    "will create a layer that we can then use when defining a network with\n",
+    "``Sequential``.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Lambda(nn.Module):\n",
+    "    def __init__(self, func):\n",
+    "        super().__init__()\n",
+    "        self.func = func\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.func(x)\n",
+    "\n",
+    "\n",
+    "def preprocess(x):\n",
+    "    return x.view(-1, 1, 28, 28)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The model created with ``Sequential`` is simply:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = nn.Sequential(\n",
+    "    Lambda(preprocess),\n",
+    "    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.AvgPool2d(4),\n",
+    "    Lambda(lambda x: x.view(x.size(0), -1)),\n",
+    ")\n",
+    "\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n",
+    "\n",
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Wrapping DataLoader\n",
+    "-----------------------------\n",
+    "\n",
+    "Our CNN is fairly concise, but it only works with MNIST, because:\n",
+    " - It assumes the input is a 28\\*28 long vector\n",
+    " - It assumes that the final CNN grid size is 4\\*4 (since that's the average\n",
+    "pooling kernel size we used)\n",
+    "\n",
+    "Let's get rid of these two assumptions, so our model works with any 2d\n",
+    "single channel image. First, we can remove the initial Lambda layer but\n",
+    "moving the data preprocessing into a generator:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(x, y):\n",
+    "    return x.view(-1, 1, 28, 28), y\n",
+    "\n",
+    "\n",
+    "class WrappedDataLoader:\n",
+    "    def __init__(self, dl, func):\n",
+    "        self.dl = dl\n",
+    "        self.func = func\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.dl)\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        batches = iter(self.dl)\n",
+    "        for b in batches:\n",
+    "            yield (self.func(*b))\n",
+    "\n",
+    "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\n",
+    "train_dl = WrappedDataLoader(train_dl, preprocess)\n",
+    "valid_dl = WrappedDataLoader(valid_dl, preprocess)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we can replace ``nn.AvgPool2d`` with ``nn.AdaptiveAvgPool2d``, which\n",
+    "allows us to define the size of the *output* tensor we want, rather than\n",
+    "the *input* tensor we have. As a result, our model will work with any\n",
+    "size input.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = nn.Sequential(\n",
+    "    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.AdaptiveAvgPool2d(1),\n",
+    "    Lambda(lambda x: x.view(x.size(0), -1)),\n",
+    ")\n",
+    "\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's try it out:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using your GPU\n",
+    "---------------\n",
+    "\n",
+    "If you're lucky enough to have access to a CUDA-capable GPU (you can\n",
+    "rent one for about $0.50/hour from most cloud providers) you can\n",
+    "use it to speed up your code. First check that your GPU is working in\n",
+    "Pytorch:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(torch.cuda.is_available())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And then create a device object for it:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev = torch.device(\n",
+    "    \"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's update ``preprocess`` to move batches to the GPU:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(x, y):\n",
+    "    return x.view(-1, 1, 28, 28).to(dev), y.to(dev)\n",
+    "\n",
+    "\n",
+    "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\n",
+    "train_dl = WrappedDataLoader(train_dl, preprocess)\n",
+    "valid_dl = WrappedDataLoader(valid_dl, preprocess)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we can move our model to the GPU.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.to(dev)\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should find it runs faster now:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Closing thoughts\n",
+    "-----------------\n",
+    "\n",
+    "We now have a general data pipeline and training loop which you can use for\n",
+    "training many types of models using Pytorch. To see how simple training a model\n",
+    "can now be, take a look at the `mnist_sample` sample notebook.\n",
+    "\n",
+    "Of course, there are many things you'll want to add, such as data augmentation,\n",
+    "hyperparameter tuning, monitoring training, transfer learning, and so forth.\n",
+    "These features are available in the fastai library, which has been developed\n",
+    "using the same design approach shown in this tutorial, providing a natural\n",
+    "next step for practitioners looking to take their models further.\n",
+    "\n",
+    "We promised at the start of this tutorial we'd explain through example each of\n",
+    "``torch.nn``, ``torch.optim``, ``Dataset``, and ``DataLoader``. So let's summarize\n",
+    "what we've seen:\n",
+    "\n",
+    " - **torch.nn**\n",
+    "\n",
+    "   + ``Module``: creates a callable which behaves like a function, but can also\n",
+    "     contain state(such as neural net layer weights). It knows what ``Parameter`` (s) it\n",
+    "     contains and can zero all their gradients, loop through them for weight updates, etc.\n",
+    "   + ``Parameter``: a wrapper for a tensor that tells a ``Module`` that it has weights\n",
+    "     that need updating during backprop. Only tensors with the `requires_grad` attribute set are updated\n",
+    "   + ``functional``: a module(usually imported into the ``F`` namespace by convention)\n",
+    "     which contains activation functions, loss functions, etc, as well as non-stateful\n",
+    "     versions of layers such as convolutional and linear layers.\n",
+    " - ``torch.optim``: Contains optimizers such as ``SGD``, which update the weights\n",
+    "   of ``Parameter`` during the backward step\n",
+    " - ``Dataset``: An abstract interface of objects with a ``__len__`` and a ``__getitem__``,\n",
+    "   including classes provided with Pytorch such as ``TensorDataset``\n",
+    " - ``DataLoader``: Takes any ``Dataset`` and creates an iterator which returns batches of data.\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/pytorch/tutorials/.vscode/settings.json b/pytorch/tutorials/.vscode/settings.json
new file mode 100644
index 0000000000000000000000000000000000000000..55be5ed8073002e60127c53ad084e3ffa68f9c16
--- /dev/null
+++ b/pytorch/tutorials/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "/home/marmot/miniconda3/envs/dnn/bin/python"
+}
\ No newline at end of file
diff --git a/pytorch/tutorials/PyTorchTutorial.ipynb b/pytorch/tutorials/PyTorchTutorial.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7e7d105866ada7b483d0db10cec7494259ce92f9
--- /dev/null
+++ b/pytorch/tutorials/PyTorchTutorial.ipynb
@@ -0,0 +1,145 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Link of tutorial: https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tensor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a tensor and set requires_grad=True to track computation with it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[1., 1.],\n",
+       "        [1., 1.]], requires_grad=True)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x = torch.ones(2, 2, requires_grad=True)\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Do a tensor operation:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[3., 3.],\n",
+       "        [3., 3.]], grad_fn=<AddBackward0>)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y = x + 2\n",
+    "y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Do more operations on y:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[27., 27.],\n",
+       "         [27., 27.]], grad_fn=<MulBackward0>),\n",
+       " tensor(27., grad_fn=<MeanBackward0>))"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "z = y * y * 3\n",
+    "out = z.mean()\n",
+    "\n",
+    "z, out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/pytorch/tutorials/nn_tutorial.ipynb b/pytorch/tutorials/nn_tutorial.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..4eaa5cfd167ff50e7526d325b8049e8d0a6d6ccf
--- /dev/null
+++ b/pytorch/tutorials/nn_tutorial.ipynb
@@ -0,0 +1,1550 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "What is `torch.nn` *really*?\n",
+    "============================\n",
+    "by Jeremy Howard, `fast.ai <https://www.fast.ai>`_. Thanks to Rachel Thomas and Francisco Ingham.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We recommend running this tutorial as a notebook, not a script. To download the notebook (.ipynb) file,\n",
+    "click the link at the top of the page.\n",
+    "\n",
+    "PyTorch provides the elegantly designed modules and classes `torch.nn <https://pytorch.org/docs/stable/nn.html>`_ ,\n",
+    "`torch.optim <https://pytorch.org/docs/stable/optim.html>`_ ,\n",
+    "`Dataset <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset>`_ ,\n",
+    "and `DataLoader <https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader>`_\n",
+    "to help you create and train neural networks.\n",
+    "In order to fully utilize their power and customize\n",
+    "them for your problem, you need to really understand exactly what they're\n",
+    "doing. To develop this understanding, we will first train basic neural net\n",
+    "on the MNIST data set without using any features from these models; we will\n",
+    "initially only use the most basic PyTorch tensor functionality. Then, we will\n",
+    "incrementally add one feature from ``torch.nn``, ``torch.optim``, ``Dataset``, or\n",
+    "``DataLoader`` at a time, showing exactly what each piece does, and how it\n",
+    "works to make the code either more concise, or more flexible.\n",
+    "\n",
+    "**This tutorial assumes you already have PyTorch installed, and are familiar\n",
+    "with the basics of tensor operations.** (If you're familiar with Numpy array\n",
+    "operations, you'll find the PyTorch tensor operations used here nearly identical).\n",
+    "\n",
+    "MNIST data setup\n",
+    "----------------\n",
+    "\n",
+    "We will use the classic `MNIST <http://deeplearning.net/data/mnist/>`_ dataset,\n",
+    "which consists of black-and-white images of hand-drawn digits (between 0 and 9).\n",
+    "\n",
+    "We will use `pathlib <https://docs.python.org/3/library/pathlib.html>`_\n",
+    "for dealing with paths (part of the Python 3 standard library), and will\n",
+    "download the dataset using\n",
+    "`requests <http://docs.python-requests.org/en/master/>`_. We will only\n",
+    "import modules when we use them, so you can see exactly what's being\n",
+    "used at each point.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import requests\n",
+    "\n",
+    "DATA_PATH = Path(\"data\")\n",
+    "PATH = DATA_PATH / \"mnist\"\n",
+    "\n",
+    "PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "URL = \"http://deeplearning.net/data/mnist/\"\n",
+    "FILENAME = \"mnist.pkl.gz\"\n",
+    "\n",
+    "if not (PATH / FILENAME).exists():\n",
+    "        content = requests.get(URL + FILENAME).content\n",
+    "        (PATH / FILENAME).open(\"wb\").write(content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This dataset is in numpy array format, and has been stored using pickle,\n",
+    "a python-specific format for serializing data.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import gzip\n",
+    "\n",
+    "with gzip.open((PATH / FILENAME).as_posix(), \"rb\") as f:\n",
+    "        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding=\"latin-1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Each image is 28 x 28, and is being stored as a flattened row of length\n",
+    "784 (=28x28). Let's take a look at one; we need to reshape it to 2d\n",
+    "first.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(50000, 784)\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAN9klEQVR4nO3df4xV9ZnH8c+zWP6QojBrOhKKSyEGg8ZON4gbl6w1hvojGhw1TSexoZE4/YNJaLIhNewf1WwwZBU2SzTNTKMWNl1qEzUgaQouoOzGhDgiKo5LdQ2mTEaowZEf/mCHefaPezBTnfu9w7nn3nOZ5/1Kbu6957nnnicnfDi/7pmvubsATH5/VXYDAJqDsANBEHYgCMIOBEHYgSAuaubCzIxT/0CDubuNN72uLbuZ3Wpmh8zsPTN7sJ7vAtBYlvc6u5lNkfRHSUslHZH0qqQudx9IzMOWHWiwRmzZF0t6z93fd/czkn4raVkd3weggeoJ+2xJfxrz/kg27S+YWbeZ9ZtZfx3LAlCnhp+gc/c+SX0Su/FAmerZsg9KmjPm/bezaQBaUD1hf1XSlWb2HTObKulHkrYV0xaAouXejXf3ETPrkbRD0hRJT7n724V1BqBQuS+95VoYx+xAwzXkRzUALhyEHQiCsANBEHYgCMIOBEHYgSAIOxAEYQeCIOxAEIQdCIKwA0EQdiAIwg4EQdiBIAg7EARhB4Ig7EAQhB0IgrADQRB2IAjCDgRB2IEgCDsQBGEHgiDsQBCEHQiCsANBEHYgCMIOBJF7yGZcGKZMmZKsX3rppQ1dfk9PT9XaxRdfnJx3wYIFyfrKlSuT9ccee6xqraurKznv559/nqyvW7cuWX/44YeT9TLUFXYzOyzppKSzkkbcfVERTQEoXhFb9pvc/aMCvgdAA3HMDgRRb9hd0k4ze83Musf7gJl1m1m/mfXXuSwAdah3N36Juw+a2bckvWhm/+Pue8d+wN37JPVJkpl5ncsDkFNdW3Z3H8yej0l6XtLiIpoCULzcYTezaWY2/dxrST+QdLCoxgAUq57d+HZJz5vZue/5D3f/QyFdTTJXXHFFsj516tRk/YYbbkjWlyxZUrU2Y8aM5Lz33HNPsl6mI0eOJOsbN25M1js7O6vWTp48mZz3jTfeSNZffvnlZL0V5Q67u78v6bsF9gKggbj0BgRB2IEgCDsQBGEHgiDsQBDm3rwftU3WX9B1dHQk67t3707WG32baasaHR1N1u+///5k/dSpU7mXPTQ0lKx//PHHyfqhQ4dyL7vR3N3Gm86WHQiCsANBEHYgCMIOBEHYgSAIOxAEYQeC4Dp7Adra2pL1ffv2Jevz5s0rsp1C1ep9eHg4Wb/pppuq1s6cOZOcN+rvD+rFdXYgOMIOBEHYgSAIOxAEYQeCIOxAEIQdCIIhmwtw/PjxZH316tXJ+h133JGsv/7668l6rT+pnHLgwIFkfenSpcn66dOnk/Wrr766am3VqlXJeVEstuxAEIQdCIKwA0EQdiAIwg4EQdiBIAg7EAT3s7eASy65JFmvNbxwb29v1dqKFSuS8953333J+pYtW5J1tJ7c97Ob2VNmdszMDo6Z1mZmL5rZu9nzzCKbBVC8iezG/1rSrV+Z9qCkXe5+paRd2XsALaxm2N19r6Sv/h50maRN2etNku4quC8ABcv72/h2dz83WNaHktqrfdDMuiV151wOgILUfSOMu3vqxJu790nqkzhBB5Qp76W3o2Y2S5Ky52PFtQSgEfKGfZuk5dnr5ZK2FtMOgEapuRtvZlskfV/SZWZ2RNIvJK2T9DszWyHpA0k/bGSTk92JEyfqmv+TTz7JPe8DDzyQrD/zzDPJeq0x1tE6aobd3buqlG4uuBcADcTPZYEgCDsQBGEHgiDsQBCEHQiCW1wngWnTplWtvfDCC8l5b7zxxmT9tttuS9Z37tyZrKP5GLIZCI6wA0EQdiAIwg4EQdiBIAg7EARhB4LgOvskN3/+/GR9//79yfrw8HCyvmfPnmS9v7+/au2JJ55IztvMf5uTCdfZgeAIOxAEYQeCIOxAEIQdCIKwA0EQdiAIrrMH19nZmaw//fTTyfr06dNzL3vNmjXJ+ubNm5P1oaGhZD0qrrMDwRF2IAjCDgRB2IEgCDsQBGEHgiDsQBBcZ0fSNddck6xv2LAhWb/55vyD/fb29ibra9euTdYHBwdzL/tClvs6u5k9ZWbHzOzgmGkPmdmgmR3IHrcX2SyA4k1kN/7Xkm4dZ/q/untH9vh9sW0BKFrNsLv7XknHm9ALgAaq5wRdj5m9me3mz6z2ITPrNrN+M6v+x8gANFzesP9S0nxJHZKGJK2v9kF373P3Re6+KOeyABQgV9jd/ai7n3X3UUm/krS42LYAFC1X2M1s1pi3nZIOVvssgNZQ8zq7mW2R9H1Jl0k6KukX2fsOSS7psKSfunvNm4u5zj75zJgxI1m/8847q9Zq3StvNu7l4i/t3r07WV+6dGmyPllVu85+0QRm7Bpn8pN1dwSgqfi5LBAEYQeCIOxAEIQdCIKwA0FwiytK88UXXyTrF12Uvlg0MjKSrN9yyy1Vay+99FJy3gsZf0oaCI6wA0EQdiAIwg4EQdiBIAg7EARhB4KoedcbYrv22muT9XvvvTdZv+6666rWal1Hr2VgYCBZ37t3b13fP9mwZQeCIOxAEIQdCIKwA0EQdiAIwg4EQdiBILjOPsktWLAgWe/p6UnW77777mT98ssvP++eJurs2bPJ+tBQ+q+Xj46OFtnOBY8tOxAEYQeCIOxAEIQdCIKwA0EQdiAIwg4EwXX2C0Cta9ldXeMNtFtR6zr63Llz87RUiP7+/mR97dq1yfq2bduKbGfSq7llN7M5ZrbHzAbM7G0zW5VNbzOzF83s3ex5ZuPbBZDXRHbjRyT9o7svlPR3klaa2UJJD0ra5e5XStqVvQfQomqG3d2H3H1/9vqkpHckzZa0TNKm7GObJN3VqCYB1O+8jtnNbK6k70naJ6nd3c/9OPlDSe1V5umW1J2/RQBFmPDZeDP7pqRnJf3M3U+MrXlldMhxB2109z53X+Tui+rqFEBdJhR2M/uGKkH/jbs/l00+amazsvosScca0yKAItTcjTczk/SkpHfcfcOY0jZJyyWty563NqTDSaC9fdwjnC8tXLgwWX/88ceT9auuuuq8eyrKvn37kvVHH320am3r1vQ/GW5RLdZEjtn/XtKPJb1lZgeyaWtUCfnvzGyFpA8k/bAxLQIoQs2wu/t/Sxp3cHdJNxfbDoBG4eeyQBCEHQiCsANBEHYgCMIOBMEtrhPU1tZWtdbb25uct6OjI1mfN29erp6K8MorryTr69evT9Z37NiRrH/22Wfn3RMagy07EARhB4Ig7EAQhB0IgrADQRB2IAjCDgQR5jr79ddfn6yvXr06WV+8eHHV2uzZs3P1VJRPP/20am3jxo3JeR955JFk/fTp07l6Quthyw4EQdiBIAg7EARhB4Ig7EAQhB0IgrADQYS5zt7Z2VlXvR4DAwPJ+vbt25P1kZGRZD11z/nw8HByXsTBlh0IgrADQRB2IAjCDgRB2IEgCDsQBGEHgjB3T3/AbI6kzZLaJbmkPnf/NzN7SNIDkv6cfXSNu/++xnelFwagbu4+7qjLEwn7LEmz3H2/mU2X9Jqku1QZj/2Uuz820SYIO9B41cI+kfHZhyQNZa9Pmtk7ksr90ywAztt5HbOb2VxJ35O0L5vUY2ZvmtlTZjazyjzdZtZvZv11dQqgLjV347/8oNk3Jb0saa27P2dm7ZI+UuU4/p9V2dW/v8Z3sBsPNFjuY3ZJMrNvSNouaYe7bxinPlfSdne/psb3EHagwaqFveZuvJmZpCclvTM26NmJu3M6JR2st0kAjTORs/FLJP2XpLckjWaT10jqktShym78YUk/zU7mpb6LLTvQYHXtxheFsAONl3s3HsDkQNiBIAg7EARhB4Ig7EAQhB0IgrADQRB2IAjCDgRB2IEgCDsQBGEHgiDsQBCEHQii2UM2fyTpgzHvL8umtaJW7a1V+5LoLa8ie/ubaoWm3s/+tYWb9bv7otIaSGjV3lq1L4ne8mpWb+zGA0EQdiCIssPeV/LyU1q1t1btS6K3vJrSW6nH7ACap+wtO4AmIexAEKWE3cxuNbNDZvaemT1YRg/VmNlhM3vLzA6UPT5dNobeMTM7OGZam5m9aGbvZs/jjrFXUm8Pmdlgtu4OmNntJfU2x8z2mNmAmb1tZquy6aWuu0RfTVlvTT9mN7Mpkv4oaamkI5JeldTl7gNNbaQKMzssaZG7l/4DDDP7B0mnJG0+N7SWmf2LpOPuvi77j3Kmu/+8RXp7SOc5jHeDeqs2zPhPVOK6K3L48zzK2LIvlvSeu7/v7mck/VbSshL6aHnuvlfS8a9MXiZpU/Z6kyr/WJquSm8twd2H3H1/9vqkpHPDjJe67hJ9NUUZYZ8t6U9j3h9Ra4337pJ2mtlrZtZddjPjaB8zzNaHktrLbGYcNYfxbqavDDPeMusuz/Dn9eIE3dctcfe/lXSbpJXZ7mpL8soxWCtdO/2lpPmqjAE4JGl9mc1kw4w/K+ln7n5ibK3MdTdOX01Zb2WEfVDSnDHvv51NawnuPpg9H5P0vCqHHa3k6LkRdLPnYyX38yV3P+ruZ919VNKvVOK6y4YZf1bSb9z9uWxy6etuvL6atd7KCPurkq40s++Y2VRJP5K0rYQ+vsbMpmUnTmRm0yT9QK03FPU2Scuz18slbS2xl7/QKsN4VxtmXCWvu9KHP3f3pj8k3a7KGfn/lfRPZfRQpa95kt7IHm+X3ZukLars1v2fKuc2Vkj6a0m7JL0r6T8ltbVQb/+uytDeb6oSrFkl9bZElV30NyUdyB63l73uEn01Zb3xc1kgCE7QAUEQdiAIwg4EQdiBIAg7EARhB4Ig7EAQ/w8ie3GmjcGk5QAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from matplotlib import pyplot\n",
+    "import numpy as np\n",
+    "\n",
+    "pyplot.imshow(x_train[0].reshape((28, 28)), cmap=\"gray\")\n",
+    "print(x_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "PyTorch uses ``torch.tensor``, rather than numpy arrays, so we need to\n",
+    "convert our data.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[0., 0., 0.,  ..., 0., 0., 0.],\n",
+      "        [0., 0., 0.,  ..., 0., 0., 0.],\n",
+      "        [0., 0., 0.,  ..., 0., 0., 0.],\n",
+      "        ...,\n",
+      "        [0., 0., 0.,  ..., 0., 0., 0.],\n",
+      "        [0., 0., 0.,  ..., 0., 0., 0.],\n",
+      "        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([5, 0, 4,  ..., 8, 4, 8])\n",
+      "torch.Size([50000, 784])\n",
+      "tensor(0) tensor(9)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-13-4e178a85a005>:3: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  x_train, y_train, x_valid, y_valid = map(\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "x_train, y_train, x_valid, y_valid = map(\n",
+    "    torch.tensor, (x_train, y_train, x_valid, y_valid)\n",
+    ")\n",
+    "n, c = x_train.shape\n",
+    "x_train, x_train.shape, y_train.min(), y_train.max()\n",
+    "print(x_train, y_train)\n",
+    "print(x_train.shape)\n",
+    "print(y_train.min(), y_train.max())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Neural net from scratch (no torch.nn)\n",
+    "---------------------------------------------\n",
+    "\n",
+    "Let's first create a model using nothing but PyTorch tensor operations. We're assuming\n",
+    "you're already familiar with the basics of neural networks. (If you're not, you can\n",
+    "learn them at `course.fast.ai <https://course.fast.ai>`_).\n",
+    "\n",
+    "PyTorch provides methods to create random or zero-filled tensors, which we will\n",
+    "use to create our weights and bias for a simple linear model. These are just regular\n",
+    "tensors, with one very special addition: we tell PyTorch that they require a\n",
+    "gradient. This causes PyTorch to record all of the operations done on the tensor,\n",
+    "so that it can calculate the gradient during back-propagation *automatically*!\n",
+    "\n",
+    "For the weights, we set ``requires_grad`` **after** the initialization, since we\n",
+    "don't want that step included in the gradient. (Note that a trailling ``_`` in\n",
+    "PyTorch signifies that the operation is performed in-place.)\n",
+    "\n",
+    "<div class=\"alert alert-info\"><h4>Note</h4><p>We are initializing the weights here with\n",
+    "   `Xavier initialisation <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_\n",
+    "   (by multiplying with 1/sqrt(n)).</p></div>\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "\n",
+    "weights = torch.randn(784, 10) / math.sqrt(784)\n",
+    "weights.requires_grad_()\n",
+    "bias = torch.zeros(10, requires_grad=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Thanks to PyTorch's ability to calculate gradients automatically, we can\n",
+    "use any standard Python function (or callable object) as a model! So\n",
+    "let's just write a plain matrix multiplication and broadcasted addition\n",
+    "to create a simple linear model. We also need an activation function, so\n",
+    "we'll write `log_softmax` and use it. Remember: although PyTorch\n",
+    "provides lots of pre-written loss functions, activation functions, and\n",
+    "so forth, you can easily write your own using plain python. PyTorch will\n",
+    "even create fast GPU or vectorized CPU code for your function\n",
+    "automatically.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def log_softmax(x):\n",
+    "    return x - x.exp().sum(-1).log().unsqueeze(-1)\n",
+    "\n",
+    "def model(xb):\n",
+    "    return log_softmax(xb @ weights + bias)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the above, the ``@`` stands for the dot product operation. We will call\n",
+    "our function on one batch of data (in this case, 64 images).  This is\n",
+    "one *forward pass*.  Note that our predictions won't be any better than\n",
+    "random at this stage, since we start with random weights.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([-1.9035, -2.3565, -2.5702, -2.6696, -3.1425, -1.9348, -2.5253, -2.9717,\n",
+      "        -1.6661, -2.2775], grad_fn=<SelectBackward>) torch.Size([64, 10])\n"
+     ]
+    }
+   ],
+   "source": [
+    "bs = 64  # batch size\n",
+    "\n",
+    "xb = x_train[0:bs]  # a mini-batch from x\n",
+    "preds = model(xb)  # predictions\n",
+    "preds[0], preds.shape\n",
+    "print(preds[0], preds.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you see, the ``preds`` tensor contains not only the tensor values, but also a\n",
+    "gradient function. We'll use this later to do backprop.\n",
+    "\n",
+    "Let's implement negative log-likelihood to use as the loss function\n",
+    "(again, we can just use standard Python):\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nll(input, target):\n",
+    "    return -input[range(target.shape[0]), target].mean()\n",
+    "\n",
+    "loss_func = nll"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check our loss with our random model, so we can see if we improve\n",
+    "after a backprop pass later.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "yb = y_train[0:bs]\n",
+    "print(loss_func(preds, yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's also implement a function to calculate the accuracy of our model.\n",
+    "For each prediction, if the index with the largest value matches the\n",
+    "target value, then the prediction was correct.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def accuracy(out, yb):\n",
+    "    preds = torch.argmax(out, dim=1)\n",
+    "    return (preds == yb).float().mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check the accuracy of our random model, so we can see if our\n",
+    "accuracy improves as our loss improves.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(accuracy(preds, yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now run a training loop.  For each iteration, we will:\n",
+    "\n",
+    "- select a mini-batch of data (of size ``bs``)\n",
+    "- use the model to make predictions\n",
+    "- calculate the loss\n",
+    "- ``loss.backward()`` updates the gradients of the model, in this case, ``weights``\n",
+    "  and ``bias``.\n",
+    "\n",
+    "We now use these gradients to update the weights and bias.  We do this\n",
+    "within the ``torch.no_grad()`` context manager, because we do not want these\n",
+    "actions to be recorded for our next calculation of the gradient.  You can read\n",
+    "more about how PyTorch's Autograd records operations\n",
+    "`here <https://pytorch.org/docs/stable/notes/autograd.html>`_.\n",
+    "\n",
+    "We then set the\n",
+    "gradients to zero, so that we are ready for the next loop.\n",
+    "Otherwise, our gradients would record a running tally of all the operations\n",
+    "that had happened (i.e. ``loss.backward()`` *adds* the gradients to whatever is\n",
+    "already stored, rather than replacing them).\n",
+    "\n",
+    ".. tip:: You can use the standard python debugger to step through PyTorch\n",
+    "   code, allowing you to check the various variable values at each step.\n",
+    "   Uncomment ``set_trace()`` below to try it out.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.debugger import set_trace\n",
+    "\n",
+    "lr = 0.5  # learning rate\n",
+    "epochs = 2  # how many epochs to train for\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for i in range((n - 1) // bs + 1):\n",
+    "        #         set_trace()\n",
+    "        start_i = i * bs\n",
+    "        end_i = start_i + bs\n",
+    "        xb = x_train[start_i:end_i]\n",
+    "        yb = y_train[start_i:end_i]\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        with torch.no_grad():\n",
+    "            weights -= weights.grad * lr\n",
+    "            bias -= bias.grad * lr\n",
+    "            weights.grad.zero_()\n",
+    "            bias.grad.zero_()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That's it: we've created and trained a minimal neural network (in this case, a\n",
+    "logistic regression, since we have no hidden layers) entirely from scratch!\n",
+    "\n",
+    "Let's check the loss and accuracy and compare those to what we got\n",
+    "earlier. We expect that the loss will have decreased and accuracy to\n",
+    "have increased, and they have.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb), accuracy(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using torch.nn.functional\n",
+    "------------------------------\n",
+    "\n",
+    "We will now refactor our code, so that it does the same thing as before, only\n",
+    "we'll start taking advantage of PyTorch's ``nn`` classes to make it more concise\n",
+    "and flexible. At each step from here, we should be making our code one or more\n",
+    "of: shorter, more understandable, and/or more flexible.\n",
+    "\n",
+    "The first and easiest step is to make our code shorter by replacing our\n",
+    "hand-written activation and loss functions with those from ``torch.nn.functional``\n",
+    "(which is generally imported into the namespace ``F`` by convention). This module\n",
+    "contains all the functions in the ``torch.nn`` library (whereas other parts of the\n",
+    "library contain classes). As well as a wide range of loss and activation\n",
+    "functions, you'll also find here some convenient functions for creating neural\n",
+    "nets, such as pooling functions. (There are also functions for doing convolutions,\n",
+    "linear layers, etc, but as we'll see, these are usually better handled using\n",
+    "other parts of the library.)\n",
+    "\n",
+    "If you're using negative log likelihood loss and log softmax activation,\n",
+    "then Pytorch provides a single function ``F.cross_entropy`` that combines\n",
+    "the two. So we can even remove the activation function from our model.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch.nn.functional as F\n",
+    "\n",
+    "loss_func = F.cross_entropy\n",
+    "\n",
+    "def model(xb):\n",
+    "    return xb @ weights + bias"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that we no longer call ``log_softmax`` in the ``model`` function. Let's\n",
+    "confirm that our loss and accuracy are the same as before:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb), accuracy(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using nn.Module\n",
+    "-----------------------------\n",
+    "Next up, we'll use ``nn.Module`` and ``nn.Parameter``, for a clearer and more\n",
+    "concise training loop. We subclass ``nn.Module`` (which itself is a class and\n",
+    "able to keep track of state).  In this case, we want to create a class that\n",
+    "holds our weights, bias, and method for the forward step.  ``nn.Module`` has a\n",
+    "number of attributes and methods (such as ``.parameters()`` and ``.zero_grad()``)\n",
+    "which we will be using.\n",
+    "\n",
+    "<div class=\"alert alert-info\"><h4>Note</h4><p>``nn.Module`` (uppercase M) is a PyTorch specific concept, and is a\n",
+    "   class we'll be using a lot. ``nn.Module`` is not to be confused with the Python\n",
+    "   concept of a (lowercase ``m``) `module <https://docs.python.org/3/tutorial/modules.html>`_,\n",
+    "   which is a file of Python code that can be imported.</p></div>\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch import nn\n",
+    "\n",
+    "class Mnist_Logistic(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))\n",
+    "        self.bias = nn.Parameter(torch.zeros(10))\n",
+    "\n",
+    "    def forward(self, xb):\n",
+    "        return xb @ self.weights + self.bias"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since we're now using an object instead of just using a function, we\n",
+    "first have to instantiate our model:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Mnist_Logistic()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can calculate the loss in the same way as before. Note that\n",
+    "``nn.Module`` objects are used as if they are functions (i.e they are\n",
+    "*callable*), but behind the scenes Pytorch will call our ``forward``\n",
+    "method automatically.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously for our training loop we had to update the values for each parameter\n",
+    "by name, and manually zero out the grads for each parameter separately, like this:\n",
+    "::\n",
+    "  with torch.no_grad():\n",
+    "      weights -= weights.grad * lr\n",
+    "      bias -= bias.grad * lr\n",
+    "      weights.grad.zero_()\n",
+    "      bias.grad.zero_()\n",
+    "\n",
+    "\n",
+    "Now we can take advantage of model.parameters() and model.zero_grad() (which\n",
+    "are both defined by PyTorch for ``nn.Module``) to make those steps more concise\n",
+    "and less prone to the error of forgetting some of our parameters, particularly\n",
+    "if we had a more complicated model:\n",
+    "::\n",
+    "  with torch.no_grad():\n",
+    "      for p in model.parameters(): p -= p.grad * lr\n",
+    "      model.zero_grad()\n",
+    "\n",
+    "\n",
+    "We'll wrap our little training loop in a ``fit`` function so we can run it\n",
+    "again later.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fit():\n",
+    "    for epoch in range(epochs):\n",
+    "        for i in range((n - 1) // bs + 1):\n",
+    "            start_i = i * bs\n",
+    "            end_i = start_i + bs\n",
+    "            xb = x_train[start_i:end_i]\n",
+    "            yb = y_train[start_i:end_i]\n",
+    "            pred = model(xb)\n",
+    "            loss = loss_func(pred, yb)\n",
+    "\n",
+    "            loss.backward()\n",
+    "            with torch.no_grad():\n",
+    "                for p in model.parameters():\n",
+    "                    p -= p.grad * lr\n",
+    "                model.zero_grad()\n",
+    "\n",
+    "fit()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's double-check that our loss has gone down:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using nn.Linear\n",
+    "-------------------------\n",
+    "\n",
+    "We continue to refactor our code.  Instead of manually defining and\n",
+    "initializing ``self.weights`` and ``self.bias``, and calculating ``xb  @\n",
+    "self.weights + self.bias``, we will instead use the Pytorch class\n",
+    "`nn.Linear <https://pytorch.org/docs/stable/nn.html#linear-layers>`_ for a\n",
+    "linear layer, which does all that for us. Pytorch has many types of\n",
+    "predefined layers that can greatly simplify our code, and often makes it\n",
+    "faster too.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Mnist_Logistic(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.lin = nn.Linear(784, 10)\n",
+    "\n",
+    "    def forward(self, xb):\n",
+    "        return self.lin(xb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We instantiate our model and calculate the loss in the same way as before:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Mnist_Logistic()\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We are still able to use our same ``fit`` method as before.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using optim\n",
+    "------------------------------\n",
+    "\n",
+    "Pytorch also has a package with various optimization algorithms, ``torch.optim``.\n",
+    "We can use the ``step`` method from our optimizer to take a forward step, instead\n",
+    "of manually updating each parameter.\n",
+    "\n",
+    "This will let us replace our previous manually coded optimization step:\n",
+    "::\n",
+    "  with torch.no_grad():\n",
+    "      for p in model.parameters(): p -= p.grad * lr\n",
+    "      model.zero_grad()\n",
+    "\n",
+    "and instead use just:\n",
+    "::\n",
+    "  opt.step()\n",
+    "  opt.zero_grad()\n",
+    "\n",
+    "(``optim.zero_grad()`` resets the gradient to 0 and we need to call it before\n",
+    "computing the gradient for the next minibatch.)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch import optim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll define a little function to create our model and optimizer so we\n",
+    "can reuse it in the future.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_model():\n",
+    "    model = Mnist_Logistic()\n",
+    "    return model, optim.SGD(model.parameters(), lr=lr)\n",
+    "\n",
+    "model, opt = get_model()\n",
+    "print(loss_func(model(xb), yb))\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for i in range((n - 1) // bs + 1):\n",
+    "        start_i = i * bs\n",
+    "        end_i = start_i + bs\n",
+    "        xb = x_train[start_i:end_i]\n",
+    "        yb = y_train[start_i:end_i]\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using Dataset\n",
+    "------------------------------\n",
+    "\n",
+    "PyTorch has an abstract Dataset class.  A Dataset can be anything that has\n",
+    "a ``__len__`` function (called by Python's standard ``len`` function) and\n",
+    "a ``__getitem__`` function as a way of indexing into it.\n",
+    "`This tutorial <https://pytorch.org/tutorials/beginner/data_loading_tutorial.html>`_\n",
+    "walks through a nice example of creating a custom ``FacialLandmarkDataset`` class\n",
+    "as a subclass of ``Dataset``.\n",
+    "\n",
+    "PyTorch's `TensorDataset <https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#TensorDataset>`_\n",
+    "is a Dataset wrapping tensors. By defining a length and way of indexing,\n",
+    "this also gives us a way to iterate, index, and slice along the first\n",
+    "dimension of a tensor. This will make it easier to access both the\n",
+    "independent and dependent variables in the same line as we train.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import TensorDataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Both ``x_train`` and ``y_train`` can be combined in a single ``TensorDataset``,\n",
+    "which will be easier to iterate over and slice.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_ds = TensorDataset(x_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously, we had to iterate through minibatches of x and y values separately:\n",
+    "::\n",
+    "    xb = x_train[start_i:end_i]\n",
+    "    yb = y_train[start_i:end_i]\n",
+    "\n",
+    "\n",
+    "Now, we can do these two steps together:\n",
+    "::\n",
+    "    xb,yb = train_ds[i*bs : i*bs+bs]\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, opt = get_model()\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for i in range((n - 1) // bs + 1):\n",
+    "        xb, yb = train_ds[i * bs: i * bs + bs]\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Refactor using DataLoader\n",
+    "------------------------------\n",
+    "\n",
+    "Pytorch's ``DataLoader`` is responsible for managing batches. You can\n",
+    "create a ``DataLoader`` from any ``Dataset``. ``DataLoader`` makes it easier\n",
+    "to iterate over batches. Rather than having to use ``train_ds[i*bs : i*bs+bs]``,\n",
+    "the DataLoader gives us each minibatch automatically.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "train_ds = TensorDataset(x_train, y_train)\n",
+    "train_dl = DataLoader(train_ds, batch_size=bs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously, our loop iterated over batches (xb, yb) like this:\n",
+    "::\n",
+    "      for i in range((n-1)//bs + 1):\n",
+    "          xb,yb = train_ds[i*bs : i*bs+bs]\n",
+    "          pred = model(xb)\n",
+    "\n",
+    "Now, our loop is much cleaner, as (xb, yb) are loaded automatically from the data loader:\n",
+    "::\n",
+    "      for xb,yb in train_dl:\n",
+    "          pred = model(xb)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, opt = get_model()\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    for xb, yb in train_dl:\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "print(loss_func(model(xb), yb))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Thanks to Pytorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,\n",
+    "our training loop is now dramatically smaller and easier to understand. Let's\n",
+    "now try to add the basic features necessary to create effecive models in practice.\n",
+    "\n",
+    "Add validation\n",
+    "-----------------------\n",
+    "\n",
+    "In section 1, we were just trying to get a reasonable training loop set up for\n",
+    "use on our training data.  In reality, you **always** should also have\n",
+    "a `validation set <https://www.fast.ai/2017/11/13/validation-sets/>`_, in order\n",
+    "to identify if you are overfitting.\n",
+    "\n",
+    "Shuffling the training data is\n",
+    "`important <https://www.quora.com/Does-the-order-of-training-data-matter-when-training-neural-networks>`_\n",
+    "to prevent correlation between batches and overfitting. On the other hand, the\n",
+    "validation loss will be identical whether we shuffle the validation set or not.\n",
+    "Since shuffling takes extra time, it makes no sense to shuffle the validation data.\n",
+    "\n",
+    "We'll use a batch size for the validation set that is twice as large as\n",
+    "that for the training set. This is because the validation set does not\n",
+    "need backpropagation and thus takes less memory (it doesn't need to\n",
+    "store the gradients). We take advantage of this to use a larger batch\n",
+    "size and compute the loss more quickly.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_ds = TensorDataset(x_train, y_train)\n",
+    "train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)\n",
+    "\n",
+    "valid_ds = TensorDataset(x_valid, y_valid)\n",
+    "valid_dl = DataLoader(valid_ds, batch_size=bs * 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will calculate and print the validation loss at the end of each epoch.\n",
+    "\n",
+    "(Note that we always call ``model.train()`` before training, and ``model.eval()``\n",
+    "before inference, because these are used by layers such as ``nn.BatchNorm2d``\n",
+    "and ``nn.Dropout`` to ensure appropriate behaviour for these different phases.)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, opt = get_model()\n",
+    "\n",
+    "for epoch in range(epochs):\n",
+    "    model.train()\n",
+    "    for xb, yb in train_dl:\n",
+    "        pred = model(xb)\n",
+    "        loss = loss_func(pred, yb)\n",
+    "\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "    model.eval()\n",
+    "    with torch.no_grad():\n",
+    "        valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)\n",
+    "\n",
+    "    print(epoch, valid_loss / len(valid_dl))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create fit() and get_data()\n",
+    "----------------------------------\n",
+    "\n",
+    "We'll now do a little refactoring of our own. Since we go through a similar\n",
+    "process twice of calculating the loss for both the training set and the\n",
+    "validation set, let's make that into its own function, ``loss_batch``, which\n",
+    "computes the loss for one batch.\n",
+    "\n",
+    "We pass an optimizer in for the training set, and use it to perform\n",
+    "backprop.  For the validation set, we don't pass an optimizer, so the\n",
+    "method doesn't perform backprop.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def loss_batch(model, loss_func, xb, yb, opt=None):\n",
+    "    loss = loss_func(model(xb), yb)\n",
+    "\n",
+    "    if opt is not None:\n",
+    "        loss.backward()\n",
+    "        opt.step()\n",
+    "        opt.zero_grad()\n",
+    "\n",
+    "    return loss.item(), len(xb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "``fit`` runs the necessary operations to train our model and compute the\n",
+    "training and validation losses for each epoch.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def fit(epochs, model, loss_func, opt, train_dl, valid_dl):\n",
+    "    for epoch in range(epochs):\n",
+    "        model.train()\n",
+    "        for xb, yb in train_dl:\n",
+    "            loss_batch(model, loss_func, xb, yb, opt)\n",
+    "\n",
+    "        model.eval()\n",
+    "        with torch.no_grad():\n",
+    "            losses, nums = zip(\n",
+    "                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]\n",
+    "            )\n",
+    "        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)\n",
+    "\n",
+    "        print(epoch, val_loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "``get_data`` returns dataloaders for the training and validation sets.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_data(train_ds, valid_ds, bs):\n",
+    "    return (\n",
+    "        DataLoader(train_ds, batch_size=bs, shuffle=True),\n",
+    "        DataLoader(valid_ds, batch_size=bs * 2),\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, our whole process of obtaining the data loaders and fitting the\n",
+    "model can be run in 3 lines of code:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\n",
+    "model, opt = get_model()\n",
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can use these basic 3 lines of code to train a wide variety of models.\n",
+    "Let's see if we can use them to train a convolutional neural network (CNN)!\n",
+    "\n",
+    "Switch to CNN\n",
+    "-------------\n",
+    "\n",
+    "We are now going to build our neural network with three convolutional layers.\n",
+    "Because none of the functions in the previous section assume anything about\n",
+    "the model form, we'll be able to use them to train a CNN without any modification.\n",
+    "\n",
+    "We will use Pytorch's predefined\n",
+    "`Conv2d <https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_ class\n",
+    "as our convolutional layer. We define a CNN with 3 convolutional layers.\n",
+    "Each convolution is followed by a ReLU.  At the end, we perform an\n",
+    "average pooling.  (Note that ``view`` is PyTorch's version of numpy's\n",
+    "``reshape``)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Mnist_CNN(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)\n",
+    "        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)\n",
+    "        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)\n",
+    "\n",
+    "    def forward(self, xb):\n",
+    "        xb = xb.view(-1, 1, 28, 28)\n",
+    "        xb = F.relu(self.conv1(xb))\n",
+    "        xb = F.relu(self.conv2(xb))\n",
+    "        xb = F.relu(self.conv3(xb))\n",
+    "        xb = F.avg_pool2d(xb, 4)\n",
+    "        return xb.view(-1, xb.size(1))\n",
+    "\n",
+    "lr = 0.1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`Momentum <https://cs231n.github.io/neural-networks-3/#sgd>`_ is a variation on\n",
+    "stochastic gradient descent that takes previous updates into account as well\n",
+    "and generally leads to faster training.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Mnist_CNN()\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n",
+    "\n",
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "nn.Sequential\n",
+    "------------------------\n",
+    "\n",
+    "``torch.nn`` has another handy class we can use to simply our code:\n",
+    "`Sequential <https://pytorch.org/docs/stable/nn.html#torch.nn.Sequential>`_ .\n",
+    "A ``Sequential`` object runs each of the modules contained within it, in a\n",
+    "sequential manner. This is a simpler way of writing our neural network.\n",
+    "\n",
+    "To take advantage of this, we need to be able to easily define a\n",
+    "**custom layer** from a given function.  For instance, PyTorch doesn't\n",
+    "have a `view` layer, and we need to create one for our network. ``Lambda``\n",
+    "will create a layer that we can then use when defining a network with\n",
+    "``Sequential``.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Lambda(nn.Module):\n",
+    "    def __init__(self, func):\n",
+    "        super().__init__()\n",
+    "        self.func = func\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.func(x)\n",
+    "\n",
+    "\n",
+    "def preprocess(x):\n",
+    "    return x.view(-1, 1, 28, 28)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The model created with ``Sequential`` is simply:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = nn.Sequential(\n",
+    "    Lambda(preprocess),\n",
+    "    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.AvgPool2d(4),\n",
+    "    Lambda(lambda x: x.view(x.size(0), -1)),\n",
+    ")\n",
+    "\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n",
+    "\n",
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Wrapping DataLoader\n",
+    "-----------------------------\n",
+    "\n",
+    "Our CNN is fairly concise, but it only works with MNIST, because:\n",
+    " - It assumes the input is a 28\\*28 long vector\n",
+    " - It assumes that the final CNN grid size is 4\\*4 (since that's the average\n",
+    "pooling kernel size we used)\n",
+    "\n",
+    "Let's get rid of these two assumptions, so our model works with any 2d\n",
+    "single channel image. First, we can remove the initial Lambda layer but\n",
+    "moving the data preprocessing into a generator:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(x, y):\n",
+    "    return x.view(-1, 1, 28, 28), y\n",
+    "\n",
+    "\n",
+    "class WrappedDataLoader:\n",
+    "    def __init__(self, dl, func):\n",
+    "        self.dl = dl\n",
+    "        self.func = func\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.dl)\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        batches = iter(self.dl)\n",
+    "        for b in batches:\n",
+    "            yield (self.func(*b))\n",
+    "\n",
+    "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\n",
+    "train_dl = WrappedDataLoader(train_dl, preprocess)\n",
+    "valid_dl = WrappedDataLoader(valid_dl, preprocess)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we can replace ``nn.AvgPool2d`` with ``nn.AdaptiveAvgPool2d``, which\n",
+    "allows us to define the size of the *output* tensor we want, rather than\n",
+    "the *input* tensor we have. As a result, our model will work with any\n",
+    "size input.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = nn.Sequential(\n",
+    "    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),\n",
+    "    nn.ReLU(),\n",
+    "    nn.AdaptiveAvgPool2d(1),\n",
+    "    Lambda(lambda x: x.view(x.size(0), -1)),\n",
+    ")\n",
+    "\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's try it out:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using your GPU\n",
+    "---------------\n",
+    "\n",
+    "If you're lucky enough to have access to a CUDA-capable GPU (you can\n",
+    "rent one for about $0.50/hour from most cloud providers) you can\n",
+    "use it to speed up your code. First check that your GPU is working in\n",
+    "Pytorch:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(torch.cuda.is_available())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And then create a device object for it:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev = torch.device(\n",
+    "    \"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's update ``preprocess`` to move batches to the GPU:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(x, y):\n",
+    "    return x.view(-1, 1, 28, 28).to(dev), y.to(dev)\n",
+    "\n",
+    "\n",
+    "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\n",
+    "train_dl = WrappedDataLoader(train_dl, preprocess)\n",
+    "valid_dl = WrappedDataLoader(valid_dl, preprocess)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we can move our model to the GPU.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.to(dev)\n",
+    "opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should find it runs faster now:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit(epochs, model, loss_func, opt, train_dl, valid_dl)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Closing thoughts\n",
+    "-----------------\n",
+    "\n",
+    "We now have a general data pipeline and training loop which you can use for\n",
+    "training many types of models using Pytorch. To see how simple training a model\n",
+    "can now be, take a look at the `mnist_sample` sample notebook.\n",
+    "\n",
+    "Of course, there are many things you'll want to add, such as data augmentation,\n",
+    "hyperparameter tuning, monitoring training, transfer learning, and so forth.\n",
+    "These features are available in the fastai library, which has been developed\n",
+    "using the same design approach shown in this tutorial, providing a natural\n",
+    "next step for practitioners looking to take their models further.\n",
+    "\n",
+    "We promised at the start of this tutorial we'd explain through example each of\n",
+    "``torch.nn``, ``torch.optim``, ``Dataset``, and ``DataLoader``. So let's summarize\n",
+    "what we've seen:\n",
+    "\n",
+    " - **torch.nn**\n",
+    "\n",
+    "   + ``Module``: creates a callable which behaves like a function, but can also\n",
+    "     contain state(such as neural net layer weights). It knows what ``Parameter`` (s) it\n",
+    "     contains and can zero all their gradients, loop through them for weight updates, etc.\n",
+    "   + ``Parameter``: a wrapper for a tensor that tells a ``Module`` that it has weights\n",
+    "     that need updating during backprop. Only tensors with the `requires_grad` attribute set are updated\n",
+    "   + ``functional``: a module(usually imported into the ``F`` namespace by convention)\n",
+    "     which contains activation functions, loss functions, etc, as well as non-stateful\n",
+    "     versions of layers such as convolutional and linear layers.\n",
+    " - ``torch.optim``: Contains optimizers such as ``SGD``, which update the weights\n",
+    "   of ``Parameter`` during the backward step\n",
+    " - ``Dataset``: An abstract interface of objects with a ``__len__`` and a ``__getitem__``,\n",
+    "   including classes provided with Pytorch such as ``TensorDataset``\n",
+    " - ``DataLoader``: Takes any ``Dataset`` and creates an iterator which returns batches of data.\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/tensorflow-test/.gitignore b/tensorflow/test/.gitignore
similarity index 100%
rename from tensorflow-test/.gitignore
rename to tensorflow/test/.gitignore
diff --git a/tensorflow-test/TensorFlowMNISTExample.ipynb b/tensorflow/test/TensorFlowMNISTExample.ipynb
similarity index 100%
rename from tensorflow-test/TensorFlowMNISTExample.ipynb
rename to tensorflow/test/TensorFlowMNISTExample.ipynb
diff --git a/tensorflow-test/test.py b/tensorflow/test/test.py
similarity index 100%
rename from tensorflow-test/test.py
rename to tensorflow/test/test.py