diff --git a/stupid-bot/dataset/data.csv b/stupid-bot/dataset/data.csv new file mode 100644 index 0000000000000000000000000000000000000000..d17ce29511db6ddcab2e82b939ef4b4b774a1eea --- /dev/null +++ b/stupid-bot/dataset/data.csv @@ -0,0 +1,7 @@ +question,answer +how are you?,good +how are you?,sad +how are you?,upset +how old are you?,23 years old +how old are you?,9 years old +how old are you?,65 years old \ No newline at end of file diff --git a/stupid-bot/src/notebooks/dataset.ipynb b/stupid-bot/src/notebooks/dataset.ipynb index f94f1002a63eedcad22f12b8a4ee177c32e18107..ba254cfa4bc2d468edb1acf59d1add2042709f7e 100644 --- a/stupid-bot/src/notebooks/dataset.ipynb +++ b/stupid-bot/src/notebooks/dataset.ipynb @@ -11,7 +11,133 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This section describes the process to load the dataset used to train and test the model." + "This section describes the process to load the dataset used to train and test the model. The dataset I am using on this project is just as stupid as the network. The idea is just to learn more about recurrent neural networks." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionanswer
0how are you?good
1how are you?sad
2how are you?upset
3how old are you?23 years old
4how old are you?9 years old
\n", + "
" + ], + "text/plain": [ + " question answer\n", + "0 how are you? good\n", + "1 how are you? sad\n", + "2 how are you? upset\n", + "3 how old are you? 23 years old\n", + "4 how old are you? 9 years old" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = \"../../dataset/data.csv\"\n", + "data = pd.read_csv(dataset, header=0)\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This dataset is not large enough to justify using the PyTorch `Dataset` utility class. However, I will use it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch.utils.data.dataset import Dataset\n", + "import pandas as pd\n", + "\n", + "\n", + "class StupidBotDataset(Dataset):\n", + " def __init__(self, csv_path):\n", + " self.data = pd.read_csv(csv_path, header=0)\n", + " self.questions = self.data.iloc[:, 0]\n", + " self.answers = self.data.iloc[:, 1]\n", + " self.data_len = len(self.data.index)\n", + "\n", + " def __getitem__(self, index):\n", + " x = [self.questions[index]]\n", + " x = torch.Tensor(x).cuda()\n", + " y = [self.price[index]]\n", + " y = torch.Tensor(y).cuda()\n", + " # One-hot encode questions and answers.\n", + " return x, y\n", + "\n", + " def __len__(self):\n", + " return self.data_len" ] } ],