diff --git a/results/README.md b/results/README.md index 9d88feffb1263ad8df93dbc82f43b052a17cab20..4cb4c26ed53f437a36dc28a7b771e0c750043a6a 100644 --- a/results/README.md +++ b/results/README.md @@ -10,4 +10,6 @@ These replies were generated using the model available in the [v0.0.6-alpha](htt | John, please check with Louise as how to get the attached to her. | Lynn, I have left you a voice mail re this. Did you get the information you need to fill out the form? Thanks, Randy. | | I have an 8:30 a.m. doctor's appointment. | Ava, I have plans on coming to your office. Thanks, Ricki | | Nidia has changed her mind there is no credit on this deal. They will need a deemed ISDA though. Can you follow up with Nidia to see who is drafting the confirm so we can send them the appropriate deemed ISDA. Thanks | The status of the A / R has been changed. I highlighted in blue those deals listed below. would you like to get a copy of this and let me know if you need anything else. Thanks. | -| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF | \ No newline at end of file +| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF | +| Hi. Can you send me the invitation to the meeting? | Hi Liz, I will attend this. Lizzette to send you a copy of the original invitation for you. Thank you. | +| Shall we have the meeting tomorrow? | meeting is postponed because the 3 : 00 p. m. meeting today. meeting is canceled for today. | \ No newline at end of file diff --git a/src/seq2seq/lib/global_args.py b/src/seq2seq/lib/global_args.py index b69488e53af078c2d2cc80d35020f3bd91079025..af179977336132c6a924b208802659221086f182 100644 --- a/src/seq2seq/lib/global_args.py +++ b/src/seq2seq/lib/global_args.py @@ -43,7 +43,6 @@ global_args = { "save_steps": 2000, "save_optimizer_and_scheduler": True, "silent": False, - "tensorboard_dir": None, "train_batch_size": 8, "use_cached_eval_features": False, "use_early_stopping": False, diff --git a/src/seq2seq/lib/seq2seq_model.py b/src/seq2seq/lib/seq2seq_model.py index 88e1464ddb96b4f44c666b7ea9bcfb2e8d319515..b28a372b58d8cbe2d5547514101be06fa0e91a14 100644 --- a/src/seq2seq/lib/seq2seq_model.py +++ b/src/seq2seq/lib/seq2seq_model.py @@ -6,6 +6,7 @@ import random import warnings from multiprocessing import cpu_count from pathlib import Path +import csv import numpy as np from tqdm.auto import tqdm, trange @@ -14,7 +15,6 @@ import pandas as pd import torch from .global_args import global_args from .seq2seq_utils import Seq2SeqDataset, SimpleSummarizationDataset -from tensorboardX import SummaryWriter from torch.nn.utils.rnn import pad_sequence from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler from torch.utils.data.distributed import DistributedSampler @@ -277,7 +277,10 @@ class Seq2SeqModel: model = self.model args = self.args - tb_writer = SummaryWriter(logdir=args["tensorboard_dir"]) + csv_file = open("outputs/log.csv", "w") + csv_file_writer = csv.writer(csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL) + csv_file_writer.writerow(["step", "lr", "loss"]) + train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args["train_batch_size"]) @@ -408,8 +411,7 @@ class Seq2SeqModel: if args["logging_steps"] > 0 and global_step % args["logging_steps"] == 0: # Log metrics - tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) - tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args["logging_steps"], global_step) + csv_file_writer.writerow([global_step, scheduler.get_lr()[0], (tr_loss - logging_loss) / args["logging_steps"]]) logging_loss = tr_loss if args["wandb_project"]: wandb.log( @@ -425,8 +427,6 @@ class Seq2SeqModel: if args["evaluate_during_training"] and (args["evaluate_during_training_steps"] > 0 and global_step % args["evaluate_during_training_steps"] == 0): # Only evaluate when single GPU otherwise metrics may not average well results = self.eval_model(eval_data, verbose=verbose and args["evaluate_during_training_verbose"], silent=args["evaluate_during_training_silent"], **kwargs,) - for key, value in results.items(): - tb_writer.add_scalar("eval_{}".format(key), value, global_step) output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step)) @@ -560,6 +560,7 @@ class Seq2SeqModel: train_iterator.close() return global_step, tr_loss / global_step + csv_file.close() return global_step, tr_loss / global_step def eval_model(self, eval_data, output_dir=None, verbose=True, silent=False, **kwargs): diff --git a/src/seq2seq/main.py b/src/seq2seq/main.py index b1ca5632b039ee6832547f98e17d1825778f167c..2605bccdc0fcd3c76f7edc4766a61c6ef1a2b166 100644 --- a/src/seq2seq/main.py +++ b/src/seq2seq/main.py @@ -9,7 +9,7 @@ transformers_logger = logging.getLogger("transformers") transformers_logger.setLevel(logging.WARNING) file_path = os.path.dirname(os.path.abspath(__file__)) -train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256-dev.csv")) +train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256.csv")) train_df.drop(train_df.columns[[0]], axis=1, inplace=True) train_df.dropna(subset=["input_text", "target_text"], inplace=True) @@ -17,10 +17,11 @@ model_args = { "fp16": False, "overwrite_output_dir": True, "max_seq_length": 256, + "eval_batch_size": 1, "train_batch_size": 8, - "num_train_epochs": 128, + "num_train_epochs": 32, "max_length": 256, - "learning_rate": 3e-5, + "learning_rate": 1e-4, "save_eval_checkpoints": False, "save_model_every_epoch": False, "save_best_model": False, @@ -28,6 +29,8 @@ model_args = { "num_beams": 3, "gradient_accumulation_steps": 1, "use_multiprocessing": False, + "logging_steps": 50, + "adam_epsilon": 1e-4, } model = Seq2SeqModel(encoder_type="bert", encoder_name="bert-base-cased", decoder_name="bert-base-cased", args=model_args)