From 1dd80c11aaa5b2f94988767d9f647a49840423f4 Mon Sep 17 00:00:00 2001 From: Claudio Scheer Date: Sat, 23 May 2020 20:21:54 -0300 Subject: [PATCH] Download nltk punkt --- notebooks/data_loader.ipynb | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/notebooks/data_loader.ipynb b/notebooks/data_loader.ipynb index 3e6ea42..c1a1880 100644 --- a/notebooks/data_loader.ipynb +++ b/notebooks/data_loader.ipynb @@ -35,19 +35,42 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2020-05-23T21:04:37.477348Z", - "start_time": "2020-05-23T21:04:37.475048Z" + "end_time": "2020-05-23T23:20:55.848368Z", + "start_time": "2020-05-23T23:20:54.138572Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/water/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", - "from nltk.tokenize import word_tokenize" + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "\n", + "\n", + "nltk.download(\"punkt\")" ] }, { @@ -280,7 +303,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "At least, we go through all the sentences in the dataset and create the token dictionary." + "Finally, we go through all the sentences in the dataset and create the token dictionary." ] }, { -- GitLab