From 64580b0e34109f0dee46e610b64dc74390d7ef26 Mon Sep 17 00:00:00 2001 From: Claudio Scheer Date: Sun, 10 May 2020 04:00:58 -0300 Subject: [PATCH] Add data.csv and dataset loader for PyTorch --- stupid-bot/dataset/data.csv | 7 ++ stupid-bot/src/notebooks/dataset.ipynb | 128 ++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 stupid-bot/dataset/data.csv diff --git a/stupid-bot/dataset/data.csv b/stupid-bot/dataset/data.csv new file mode 100644 index 0000000..d17ce29 --- /dev/null +++ b/stupid-bot/dataset/data.csv @@ -0,0 +1,7 @@ +question,answer +how are you?,good +how are you?,sad +how are you?,upset +how old are you?,23 years old +how old are you?,9 years old +how old are you?,65 years old \ No newline at end of file diff --git a/stupid-bot/src/notebooks/dataset.ipynb b/stupid-bot/src/notebooks/dataset.ipynb index f94f100..ba254cf 100644 --- a/stupid-bot/src/notebooks/dataset.ipynb +++ b/stupid-bot/src/notebooks/dataset.ipynb @@ -11,7 +11,133 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This section describes the process to load the dataset used to train and test the model." + "This section describes the process to load the dataset used to train and test the model. The dataset I am using on this project is just as stupid as the network. The idea is just to learn more about recurrent neural networks." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionanswer
0how are you?good
1how are you?sad
2how are you?upset
3how old are you?23 years old
4how old are you?9 years old
\n", + "
" + ], + "text/plain": [ + " question answer\n", + "0 how are you? good\n", + "1 how are you? sad\n", + "2 how are you? upset\n", + "3 how old are you? 23 years old\n", + "4 how old are you? 9 years old" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = \"../../dataset/data.csv\"\n", + "data = pd.read_csv(dataset, header=0)\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This dataset is not large enough to justify using the PyTorch `Dataset` utility class. However, I will use it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch.utils.data.dataset import Dataset\n", + "import pandas as pd\n", + "\n", + "\n", + "class StupidBotDataset(Dataset):\n", + " def __init__(self, csv_path):\n", + " self.data = pd.read_csv(csv_path, header=0)\n", + " self.questions = self.data.iloc[:, 0]\n", + " self.answers = self.data.iloc[:, 1]\n", + " self.data_len = len(self.data.index)\n", + "\n", + " def __getitem__(self, index):\n", + " x = [self.questions[index]]\n", + " x = torch.Tensor(x).cuda()\n", + " y = [self.price[index]]\n", + " y = torch.Tensor(y).cuda()\n", + " # One-hot encode questions and answers.\n", + " return x, y\n", + "\n", + " def __len__(self):\n", + " return self.data_len" ] } ], -- GitLab