From 86a5125e7586eb5d4e86a7fdb4f1f2f8cf41b6a0 Mon Sep 17 00:00:00 2001 From: Claudio Scheer Date: Wed, 24 Jun 2020 22:17:09 -0300 Subject: [PATCH] Log loss and learning rate --- results/README.md | 4 +++- src/seq2seq/lib/global_args.py | 1 - src/seq2seq/lib/seq2seq_model.py | 13 +++++++------ src/seq2seq/main.py | 9 ++++++--- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/results/README.md b/results/README.md index 9d88fef..4cb4c26 100644 --- a/results/README.md +++ b/results/README.md @@ -10,4 +10,6 @@ These replies were generated using the model available in the [v0.0.6-alpha](htt | John, please check with Louise as how to get the attached to her. | Lynn, I have left you a voice mail re this. Did you get the information you need to fill out the form? Thanks, Randy. | | I have an 8:30 a.m. doctor's appointment. | Ava, I have plans on coming to your office. Thanks, Ricki | | Nidia has changed her mind there is no credit on this deal. They will need a deemed ISDA though. Can you follow up with Nidia to see who is drafting the confirm so we can send them the appropriate deemed ISDA. Thanks | The status of the A / R has been changed. I highlighted in blue those deals listed below. would you like to get a copy of this and let me know if you need anything else. Thanks. | -| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF | \ No newline at end of file +| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF | +| Hi. Can you send me the invitation to the meeting? | Hi Liz, I will attend this. Lizzette to send you a copy of the original invitation for you. Thank you. | +| Shall we have the meeting tomorrow? | meeting is postponed because the 3 : 00 p. m. meeting today. meeting is canceled for today. | \ No newline at end of file diff --git a/src/seq2seq/lib/global_args.py b/src/seq2seq/lib/global_args.py index b69488e..af17997 100644 --- a/src/seq2seq/lib/global_args.py +++ b/src/seq2seq/lib/global_args.py @@ -43,7 +43,6 @@ global_args = { "save_steps": 2000, "save_optimizer_and_scheduler": True, "silent": False, - "tensorboard_dir": None, "train_batch_size": 8, "use_cached_eval_features": False, "use_early_stopping": False, diff --git a/src/seq2seq/lib/seq2seq_model.py b/src/seq2seq/lib/seq2seq_model.py index 88e1464..b28a372 100644 --- a/src/seq2seq/lib/seq2seq_model.py +++ b/src/seq2seq/lib/seq2seq_model.py @@ -6,6 +6,7 @@ import random import warnings from multiprocessing import cpu_count from pathlib import Path +import csv import numpy as np from tqdm.auto import tqdm, trange @@ -14,7 +15,6 @@ import pandas as pd import torch from .global_args import global_args from .seq2seq_utils import Seq2SeqDataset, SimpleSummarizationDataset -from tensorboardX import SummaryWriter from torch.nn.utils.rnn import pad_sequence from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler from torch.utils.data.distributed import DistributedSampler @@ -277,7 +277,10 @@ class Seq2SeqModel: model = self.model args = self.args - tb_writer = SummaryWriter(logdir=args["tensorboard_dir"]) + csv_file = open("outputs/log.csv", "w") + csv_file_writer = csv.writer(csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL) + csv_file_writer.writerow(["step", "lr", "loss"]) + train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args["train_batch_size"]) @@ -408,8 +411,7 @@ class Seq2SeqModel: if args["logging_steps"] > 0 and global_step % args["logging_steps"] == 0: # Log metrics - tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) - tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args["logging_steps"], global_step) + csv_file_writer.writerow([global_step, scheduler.get_lr()[0], (tr_loss - logging_loss) / args["logging_steps"]]) logging_loss = tr_loss if args["wandb_project"]: wandb.log( @@ -425,8 +427,6 @@ class Seq2SeqModel: if args["evaluate_during_training"] and (args["evaluate_during_training_steps"] > 0 and global_step % args["evaluate_during_training_steps"] == 0): # Only evaluate when single GPU otherwise metrics may not average well results = self.eval_model(eval_data, verbose=verbose and args["evaluate_during_training_verbose"], silent=args["evaluate_during_training_silent"], **kwargs,) - for key, value in results.items(): - tb_writer.add_scalar("eval_{}".format(key), value, global_step) output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step)) @@ -560,6 +560,7 @@ class Seq2SeqModel: train_iterator.close() return global_step, tr_loss / global_step + csv_file.close() return global_step, tr_loss / global_step def eval_model(self, eval_data, output_dir=None, verbose=True, silent=False, **kwargs): diff --git a/src/seq2seq/main.py b/src/seq2seq/main.py index b1ca563..2605bcc 100644 --- a/src/seq2seq/main.py +++ b/src/seq2seq/main.py @@ -9,7 +9,7 @@ transformers_logger = logging.getLogger("transformers") transformers_logger.setLevel(logging.WARNING) file_path = os.path.dirname(os.path.abspath(__file__)) -train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256-dev.csv")) +train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256.csv")) train_df.drop(train_df.columns[[0]], axis=1, inplace=True) train_df.dropna(subset=["input_text", "target_text"], inplace=True) @@ -17,10 +17,11 @@ model_args = { "fp16": False, "overwrite_output_dir": True, "max_seq_length": 256, + "eval_batch_size": 1, "train_batch_size": 8, - "num_train_epochs": 128, + "num_train_epochs": 32, "max_length": 256, - "learning_rate": 3e-5, + "learning_rate": 1e-4, "save_eval_checkpoints": False, "save_model_every_epoch": False, "save_best_model": False, @@ -28,6 +29,8 @@ model_args = { "num_beams": 3, "gradient_accumulation_steps": 1, "use_multiprocessing": False, + "logging_steps": 50, + "adam_epsilon": 1e-4, } model = Seq2SeqModel(encoder_type="bert", encoder_name="bert-base-cased", decoder_name="bert-base-cased", args=model_args) -- GitLab