diff --git a/.gitignore b/.gitignore index 0f1c87869c2bc9beeda5d88743e07fe17684a774..3ee33c46b63bd4ee8f8122d537153051d4d8f6bd 100644 --- a/.gitignore +++ b/.gitignore @@ -128,4 +128,5 @@ dmypy.json # End of https://www.gitignore.io/api/python,jupyternotebooks,visualstudiocode !src/lib -.vscode \ No newline at end of file +.vscode +model.pytorch \ No newline at end of file diff --git a/src/lib/dataset.py b/src/lib/dataset.py index fd4ffa826ae5ab2d5f402563e068bfd382dbf562..6fa7e8720c765a54b920c6a286351f9d879e634f 100644 --- a/src/lib/dataset.py +++ b/src/lib/dataset.py @@ -55,17 +55,3 @@ class RandomDatasetLoader(Dataset): def int2characters(self, characters): return [self.int2char[c] for c in characters] - - # def one_hot_encode(self, characters): - # batches = characters.shape[0] - # sequence_size = characters.shape[1] - # encoded = np.zeros( - # [batches, sequence_size, self.unique_characters_length], dtype=int, - # ) - # for i in range(batches): - # for j in range(sequence_size): - # encoded[i][j][characters[i][j]] = 1 - # return encoded - - # def one_hot_decode(self, characters): - # return [np.argmax(x) for x in characters] diff --git a/src/main.py b/src/main.py index 87733e793f5adf9c23d8662bbdc34201ee87e891..2de936cd931f29a1940ab1476ad941a3be901124 100644 --- a/src/main.py +++ b/src/main.py @@ -1,3 +1,4 @@ +import os import numpy as np import torch import torch.nn as nn @@ -6,8 +7,8 @@ from lib.model import LSTMModel from train import train_model -torch.autograd.set_detect_anomaly(True) -dataset = RandomDatasetLoader("../dataset/latex.txt") +file_path = os.path.dirname(os.path.abspath(__file__)) +dataset = RandomDatasetLoader(os.path.join(file_path, "../dataset/source-code.txt")) model = LSTMModel(dataset.unique_characters_length, dataset.unique_characters_length) model.cuda() @@ -15,6 +16,6 @@ model.cuda() print("Starting train process...") model = train_model( - model, dataset, show_loss_plot=True, n_epochs=1000, sequence_size=256 + model, dataset, show_loss_plot=True, n_epochs=128, sequence_size=256 ) torch.save(model, "../model.pytorch") diff --git a/src/predict.py b/src/predict.py index a284cdbc2ccbbcaad0f89bc413369ef1fcc06e5a..e894a782f7305b69d6594cbece67a9cbf7a982ac 100644 --- a/src/predict.py +++ b/src/predict.py @@ -1,3 +1,4 @@ +import os import torch import torch.nn as nn from torch.autograd import Variable @@ -6,48 +7,14 @@ from lib.model import LSTMModel import numpy as np -dataset = RandomDatasetLoader("../dataset/latex.txt") +file_path = os.path.dirname(os.path.abspath(__file__)) +dataset = RandomDatasetLoader(os.path.join(file_path, "../dataset/source-code.txt")) model = torch.load("../model.pytorch") model.cpu() model.eval() -# def encode_character(character): -# character = dataset.characters2int(character) -# character = torch.tensor(character) -# character.unsqueeze_(axis=0) -# character = dataset.one_hot_encode(character) -# character = torch.from_numpy(character).float() -# return character - - -# def get_predicted_character(output): -# probability = nn.functional.softmax(output[-1], dim=0).data -# output = torch.max(probability, dim=0)[1].item() -# output = dataset.int2char[output] -# return output - - -# def predict(model, prediction_length, start_text): -# characters = [x for x in start_text] -# size_prediction = prediction_length - len(characters) - -# for character in characters: -# output, previous_hidden_states = model( -# encode_character(character), previous_hidden_states -# ) - -# for x in range(size_prediction): -# output, previous_hidden_states = model( -# encode_character(characters), previous_hidden_states -# ) -# output = get_predicted_character(output) -# characters.append(output) - -# return characters - - def evaluate(model, start_text, prediction_length, temperature=0.8): previous_hidden_states = model.init_hidden_states(1, False) prime_input = Variable(torch.tensor(dataset.characters2int(start_text))) @@ -76,5 +43,5 @@ def evaluate(model, start_text, prediction_length, temperature=0.8): with torch.no_grad(): - prediction = evaluate(model, "\document", 5000) + prediction = evaluate(model, "static int", 5000) print("".join(prediction)) diff --git a/src/train.py b/src/train.py index 9b2700c0df04191c6d66eb09d50ab75c6386361d..954bc8d130c1e9d309af7b892c8fc2c21a0197a7 100644 --- a/src/train.py +++ b/src/train.py @@ -3,9 +3,9 @@ import torch.nn as nn import torch.optim as optim -def train_model(model, dataset, show_loss_plot=False, n_epochs=1000, sequence_size=16): +def train_model(model, dataset, show_loss_plot=False, n_epochs=16, sequence_size=16): criterion = nn.CrossEntropyLoss() - optimizer = optim.Adam(model.parameters(), lr=0.001) + optimizer = optim.Adam(model.parameters(), lr=0.0001) train_loss_over_epochs = [] validation_loss_over_epochs = [] @@ -22,7 +22,7 @@ def train_model(model, dataset, show_loss_plot=False, n_epochs=1000, sequence_si train_loss += criterion(output, y[i].unsqueeze(0)) train_loss.backward() - # nn.utils.clip_grad_norm_(model.parameters(), 0.5) + nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() # x_validation, y_validation = dataset.get_batch(batch_size=1, sequence_size=1)