From 64580b0e34109f0dee46e610b64dc74390d7ef26 Mon Sep 17 00:00:00 2001
From: Claudio Scheer <claudioscheer@protonmail.com>
Date: Sun, 10 May 2020 04:00:58 -0300
Subject: [PATCH] Add data.csv and dataset loader for PyTorch

---
 stupid-bot/dataset/data.csv            |   7 ++
 stupid-bot/src/notebooks/dataset.ipynb | 128 ++++++++++++++++++++++++-
 2 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 stupid-bot/dataset/data.csv
diff --git a/stupid-bot/dataset/data.csv b/stupid-bot/dataset/data.csv
new file mode 100644
index 0000000..d17ce29
--- /dev/null
+++ b/stupid-bot/dataset/data.csv
@@ -0,0 +1,7 @@
+question,answer
+how are you?,good
+how are you?,sad
+how are you?,upset
+how old are you?,23 years old
+how old are you?,9 years old
+how old are you?,65 years old
\ No newline at end of file
diff --git a/stupid-bot/src/notebooks/dataset.ipynb b/stupid-bot/src/notebooks/dataset.ipynb
index f94f100..ba254cf 100644
--- a/stupid-bot/src/notebooks/dataset.ipynb
+++ b/stupid-bot/src/notebooks/dataset.ipynb
@@ -11,7 +11,133 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This section describes the process to load the dataset used to train and test the model."
+    "This section describes the process to load the dataset used to train and test the model. The dataset I am using on this project is just as stupid as the network. The idea is just to learn more about recurrent neural networks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>question</th>\n",
+       "      <th>answer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>how are you?</td>\n",
+       "      <td>good</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>how are you?</td>\n",
+       "      <td>sad</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>how are you?</td>\n",
+       "      <td>upset</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>how old are you?</td>\n",
+       "      <td>23 years old</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>how old are you?</td>\n",
+       "      <td>9 years old</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           question        answer\n",
+       "0      how are you?          good\n",
+       "1      how are you?           sad\n",
+       "2      how are you?         upset\n",
+       "3  how old are you?  23 years old\n",
+       "4  how old are you?   9 years old"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset = \"../../dataset/data.csv\"\n",
+    "data = pd.read_csv(dataset, header=0)\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This dataset is not large enough to justify using the PyTorch `Dataset` utility class. However, I will use it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch.utils.data.dataset import Dataset\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "class StupidBotDataset(Dataset):\n",
+    "    def __init__(self, csv_path):\n",
+    "        self.data = pd.read_csv(csv_path, header=0)\n",
+    "        self.questions = self.data.iloc[:, 0]\n",
+    "        self.answers = self.data.iloc[:, 1]\n",
+    "        self.data_len = len(self.data.index)\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        x = [self.questions[index]]\n",
+    "        x = torch.Tensor(x).cuda()\n",
+    "        y = [self.price[index]]\n",
+    "        y = torch.Tensor(y).cuda()\n",
+    "        # One-hot encode questions and answers.\n",
+    "        return x, y\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.data_len"
    ]
   }
  ],
-- 
GitLab


	question	answer
0	how are you?	good
1	how are you?	sad
2	how are you?	upset
3	how old are you?	23 years old
4	how old are you?	9 years old