From 86a5125e7586eb5d4e86a7fdb4f1f2f8cf41b6a0 Mon Sep 17 00:00:00 2001
From: Claudio Scheer <claudioscheer@protonmail.com>
Date: Wed, 24 Jun 2020 22:17:09 -0300
Subject: [PATCH] Log loss and learning rate

---
 results/README.md                |  4 +++-
 src/seq2seq/lib/global_args.py   |  1 -
 src/seq2seq/lib/seq2seq_model.py | 13 +++++++------
 src/seq2seq/main.py              |  9 ++++++---
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/results/README.md b/results/README.md
index 9d88fef..4cb4c26 100644
--- a/results/README.md
+++ b/results/README.md
@@ -10,4 +10,6 @@ These replies were generated using the model available in the [v0.0.6-alpha](htt
 | John, please check with Louise as how to get the attached to her. | Lynn, I have left you a voice mail re this. Did you get the information you need to fill out the form? Thanks, Randy. |
 | I have an 8:30 a.m. doctor's appointment. | Ava, I have plans on coming to your office. Thanks, Ricki |
 | Nidia has changed her mind there is no credit on this deal.  They will need a deemed ISDA though.  Can you follow up with Nidia to see who is drafting the confirm so we can send them the appropriate deemed ISDA. Thanks | The status of the A / R has been changed. I highlighted in blue those deals listed below. would you like to get a copy of this and let me know if you need anything else. Thanks. |
-| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF |
\ No newline at end of file
+| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF |
+| Hi. Can you send me the invitation to the meeting? | Hi Liz, I will attend this. Lizzette to send you a copy of the original invitation for you. Thank you. |
+| Shall we have the meeting tomorrow? | meeting is postponed because the 3 : 00 p. m. meeting today. meeting is canceled for today. |
\ No newline at end of file
diff --git a/src/seq2seq/lib/global_args.py b/src/seq2seq/lib/global_args.py
index b69488e..af17997 100644
--- a/src/seq2seq/lib/global_args.py
+++ b/src/seq2seq/lib/global_args.py
@@ -43,7 +43,6 @@ global_args = {
     "save_steps": 2000,
     "save_optimizer_and_scheduler": True,
     "silent": False,
-    "tensorboard_dir": None,
     "train_batch_size": 8,
     "use_cached_eval_features": False,
     "use_early_stopping": False,
diff --git a/src/seq2seq/lib/seq2seq_model.py b/src/seq2seq/lib/seq2seq_model.py
index 88e1464..b28a372 100644
--- a/src/seq2seq/lib/seq2seq_model.py
+++ b/src/seq2seq/lib/seq2seq_model.py
@@ -6,6 +6,7 @@ import random
 import warnings
 from multiprocessing import cpu_count
 from pathlib import Path
+import csv
 
 import numpy as np
 from tqdm.auto import tqdm, trange
@@ -14,7 +15,6 @@ import pandas as pd
 import torch
 from .global_args import global_args
 from .seq2seq_utils import Seq2SeqDataset, SimpleSummarizationDataset
-from tensorboardX import SummaryWriter
 from torch.nn.utils.rnn import pad_sequence
 from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
 from torch.utils.data.distributed import DistributedSampler
@@ -277,7 +277,10 @@ class Seq2SeqModel:
         model = self.model
         args = self.args
 
-        tb_writer = SummaryWriter(logdir=args["tensorboard_dir"])
+        csv_file = open("outputs/log.csv", "w")
+        csv_file_writer = csv.writer(csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL)
+        csv_file_writer.writerow(["step", "lr", "loss"])
+
         train_sampler = RandomSampler(train_dataset)
         train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args["train_batch_size"])
 
@@ -408,8 +411,7 @@ class Seq2SeqModel:
 
                     if args["logging_steps"] > 0 and global_step % args["logging_steps"] == 0:
                         # Log metrics
-                        tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
-                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args["logging_steps"], global_step)
+                        csv_file_writer.writerow([global_step, scheduler.get_lr()[0], (tr_loss - logging_loss) / args["logging_steps"]])
                         logging_loss = tr_loss
                         if args["wandb_project"]:
                             wandb.log(
@@ -425,8 +427,6 @@ class Seq2SeqModel:
                     if args["evaluate_during_training"] and (args["evaluate_during_training_steps"] > 0 and global_step % args["evaluate_during_training_steps"] == 0):
                         # Only evaluate when single GPU otherwise metrics may not average well
                         results = self.eval_model(eval_data, verbose=verbose and args["evaluate_during_training_verbose"], silent=args["evaluate_during_training_silent"], **kwargs,)
-                        for key, value in results.items():
-                            tb_writer.add_scalar("eval_{}".format(key), value, global_step)
 
                         output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))
 
@@ -560,6 +560,7 @@ class Seq2SeqModel:
                                     train_iterator.close()
                                 return global_step, tr_loss / global_step
 
+        csv_file.close()
         return global_step, tr_loss / global_step
 
     def eval_model(self, eval_data, output_dir=None, verbose=True, silent=False, **kwargs):
diff --git a/src/seq2seq/main.py b/src/seq2seq/main.py
index b1ca563..2605bcc 100644
--- a/src/seq2seq/main.py
+++ b/src/seq2seq/main.py
@@ -9,7 +9,7 @@ transformers_logger = logging.getLogger("transformers")
 transformers_logger.setLevel(logging.WARNING)
 
 file_path = os.path.dirname(os.path.abspath(__file__))
-train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256-dev.csv"))
+train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256.csv"))
 train_df.drop(train_df.columns[[0]], axis=1, inplace=True)
 train_df.dropna(subset=["input_text", "target_text"], inplace=True)
 
@@ -17,10 +17,11 @@ model_args = {
     "fp16": False,
     "overwrite_output_dir": True,
     "max_seq_length": 256,
+    "eval_batch_size": 1,
     "train_batch_size": 8,
-    "num_train_epochs": 128,
+    "num_train_epochs": 32,
     "max_length": 256,
-    "learning_rate": 3e-5,
+    "learning_rate": 1e-4,
     "save_eval_checkpoints": False,
     "save_model_every_epoch": False,
     "save_best_model": False,
@@ -28,6 +29,8 @@ model_args = {
     "num_beams": 3,
     "gradient_accumulation_steps": 1,
     "use_multiprocessing": False,
+    "logging_steps": 50,
+    "adam_epsilon": 1e-4,
 }
 
 model = Seq2SeqModel(encoder_type="bert", encoder_name="bert-base-cased", decoder_name="bert-base-cased", args=model_args)
-- 
GitLab