"This section describes the process to load the dataset used to train and test the model."
"This section describes the process to load the dataset used to train and test the model. The dataset I am using on this project is just as stupid as the network. The idea is just to learn more about recurrent neural networks."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>question</th>\n",
" <th>answer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>how are you?</td>\n",
" <td>good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>how are you?</td>\n",
" <td>sad</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>how are you?</td>\n",
" <td>upset</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>how old are you?</td>\n",
" <td>23 years old</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>how old are you?</td>\n",
" <td>9 years old</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" question answer\n",
"0 how are you? good\n",
"1 how are you? sad\n",
"2 how are you? upset\n",
"3 how old are you? 23 years old\n",
"4 how old are you? 9 years old"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset = \"../../dataset/data.csv\"\n",
"data = pd.read_csv(dataset, header=0)\n",
"data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This dataset is not large enough to justify using the PyTorch `Dataset` utility class. However, I will use it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"from torch.utils.data.dataset import Dataset\n",
"import pandas as pd\n",
"\n",
"\n",
"class StupidBotDataset(Dataset):\n",
" def __init__(self, csv_path):\n",
" self.data = pd.read_csv(csv_path, header=0)\n",
" self.questions = self.data.iloc[:, 0]\n",
" self.answers = self.data.iloc[:, 1]\n",
" self.data_len = len(self.data.index)\n",
"\n",
" def __getitem__(self, index):\n",
" x = [self.questions[index]]\n",
" x = torch.Tensor(x).cuda()\n",
" y = [self.price[index]]\n",
" y = torch.Tensor(y).cuda()\n",
" # One-hot encode questions and answers.\n",
" return x, y\n",
"\n",
" def __len__(self):\n",
" return self.data_len"
]
]
}
}
],
],
...
...
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Dataset
## Dataset
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
This section describes the process to load the dataset used to train and test the model.
This section describes the process to load the dataset used to train and test the model. The dataset I am using on this project is just as stupid as the network. The idea is just to learn more about recurrent neural networks.
%% Cell type:code id: tags:
``` python
importpandasaspd
```
%% Cell type:code id: tags:
``` python
dataset="../../dataset/data.csv"
data=pd.read_csv(dataset,header=0)
data.head()
```
%% Output
question answer
0 how are you? good
1 how are you? sad
2 how are you? upset
3 how old are you? 23 years old
4 how old are you? 9 years old
%% Cell type:markdown id: tags:
This dataset is not large enough to justify using the PyTorch `Dataset` utility class. However, I will use it.