Skip to content
Snippets Groups Projects
Verified Commit 86a5125e authored by Claudio Scheer's avatar Claudio Scheer
Browse files

Log loss and learning rate

parent d42746fa
No related branches found
No related tags found
No related merge requests found
......@@ -10,4 +10,6 @@ These replies were generated using the model available in the [v0.0.6-alpha](htt
| John, please check with Louise as how to get the attached to her. | Lynn, I have left you a voice mail re this. Did you get the information you need to fill out the form? Thanks, Randy. |
| I have an 8:30 a.m. doctor's appointment. | Ava, I have plans on coming to your office. Thanks, Ricki |
| Nidia has changed her mind there is no credit on this deal. They will need a deemed ISDA though. Can you follow up with Nidia to see who is drafting the confirm so we can send them the appropriate deemed ISDA. Thanks | The status of the A / R has been changed. I highlighted in blue those deals listed below. would you like to get a copy of this and let me know if you need anything else. Thanks. |
| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF |
\ No newline at end of file
| Agustin Perez is wanting to send out a confirmation for YD9296.1 (currently booked to the old name Petrotemex, S.A. de C.V.) by this afternoon, if at all possible. Do we have enough ""lead time"" to get this out today? Thanks, Gordon | I heard from Kevin at 4 : 00 pm. Would you like to change it? The deal is 1190921. Thanks, JMF |
| Hi. Can you send me the invitation to the meeting? | Hi Liz, I will attend this. Lizzette to send you a copy of the original invitation for you. Thank you. |
| Shall we have the meeting tomorrow? | meeting is postponed because the 3 : 00 p. m. meeting today. meeting is canceled for today. |
\ No newline at end of file
......@@ -43,7 +43,6 @@ global_args = {
"save_steps": 2000,
"save_optimizer_and_scheduler": True,
"silent": False,
"tensorboard_dir": None,
"train_batch_size": 8,
"use_cached_eval_features": False,
"use_early_stopping": False,
......
......@@ -6,6 +6,7 @@ import random
import warnings
from multiprocessing import cpu_count
from pathlib import Path
import csv
import numpy as np
from tqdm.auto import tqdm, trange
......@@ -14,7 +15,6 @@ import pandas as pd
import torch
from .global_args import global_args
from .seq2seq_utils import Seq2SeqDataset, SimpleSummarizationDataset
from tensorboardX import SummaryWriter
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
......@@ -277,7 +277,10 @@ class Seq2SeqModel:
model = self.model
args = self.args
tb_writer = SummaryWriter(logdir=args["tensorboard_dir"])
csv_file = open("outputs/log.csv", "w")
csv_file_writer = csv.writer(csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL)
csv_file_writer.writerow(["step", "lr", "loss"])
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args["train_batch_size"])
......@@ -408,8 +411,7 @@ class Seq2SeqModel:
if args["logging_steps"] > 0 and global_step % args["logging_steps"] == 0:
# Log metrics
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args["logging_steps"], global_step)
csv_file_writer.writerow([global_step, scheduler.get_lr()[0], (tr_loss - logging_loss) / args["logging_steps"]])
logging_loss = tr_loss
if args["wandb_project"]:
wandb.log(
......@@ -425,8 +427,6 @@ class Seq2SeqModel:
if args["evaluate_during_training"] and (args["evaluate_during_training_steps"] > 0 and global_step % args["evaluate_during_training_steps"] == 0):
# Only evaluate when single GPU otherwise metrics may not average well
results = self.eval_model(eval_data, verbose=verbose and args["evaluate_during_training_verbose"], silent=args["evaluate_during_training_silent"], **kwargs,)
for key, value in results.items():
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))
......@@ -560,6 +560,7 @@ class Seq2SeqModel:
train_iterator.close()
return global_step, tr_loss / global_step
csv_file.close()
return global_step, tr_loss / global_step
def eval_model(self, eval_data, output_dir=None, verbose=True, silent=False, **kwargs):
......
......@@ -9,7 +9,7 @@ transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)
file_path = os.path.dirname(os.path.abspath(__file__))
train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256-dev.csv"))
train_df = pd.read_csv(os.path.join(file_path, "../../dataset/input-target-256.csv"))
train_df.drop(train_df.columns[[0]], axis=1, inplace=True)
train_df.dropna(subset=["input_text", "target_text"], inplace=True)
......@@ -17,10 +17,11 @@ model_args = {
"fp16": False,
"overwrite_output_dir": True,
"max_seq_length": 256,
"eval_batch_size": 1,
"train_batch_size": 8,
"num_train_epochs": 128,
"num_train_epochs": 32,
"max_length": 256,
"learning_rate": 3e-5,
"learning_rate": 1e-4,
"save_eval_checkpoints": False,
"save_model_every_epoch": False,
"save_best_model": False,
......@@ -28,6 +29,8 @@ model_args = {
"num_beams": 3,
"gradient_accumulation_steps": 1,
"use_multiprocessing": False,
"logging_steps": 50,
"adam_epsilon": 1e-4,
}
model = Seq2SeqModel(encoder_type="bert", encoder_name="bert-base-cased", decoder_name="bert-base-cased", args=model_args)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment