Skip to content

Commit

Permalink
adjust FTS dependency adjustment warning to reference more informativ…
Browse files Browse the repository at this point in the history
…e object name, update HF Datasets `trust_remote_code` to `True` in examples as will be required with HF Datasets >= 3.x
  • Loading branch information
speediedan committed Jun 1, 2024
1 parent b96c157 commit 2c7fec3
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
3 changes: 1 addition & 2 deletions src/finetuning_scheduler/fts_supporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1876,8 +1876,7 @@ def _add_fts_callback(trainer: "pl.Trainer", fts_cls: FTSCallbackDepType, cfg: D
"""
if cfg.get("monitor", None) is None:
cfg["monitor"] = "val_loss"
rank_zero_warn(f"No monitor metric specified for {fts_cls.__class__.__name__},"
" using 'val_loss' as default.")
rank_zero_warn(f"No monitor metric specified for {fts_cls.__name__}, using 'val_loss' as default.")
trainer.callbacks.append(fts_cls(**cfg))

def _callback_dep_setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: str) -> None:
Expand Down
7 changes: 5 additions & 2 deletions src/fts_examples/stable/fts_superglue.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def __init__(
"dataloader_kwargs": dataloader_kwargs,
"tokenizers_parallelism": tokenizers_parallelism,
}
# starting with HF Datasets v3.x, trust_remote_code must be `True` https://bit.ly/hf_datasets_trust_remote_req
self.trust_remote_code = True
self.save_hyperparameters(self.init_hparams)
self.dataloader_kwargs = {
"num_workers": dataloader_kwargs.get("num_workers", 0),
Expand All @@ -139,11 +141,12 @@ def prepare_data(self):
"""Load the SuperGLUE dataset."""
# N.B. PL calls prepare_data from a single process (rank 0) so do not use it to assign
# state (e.g. self.x=y)
datasets.load_dataset("super_glue", self.hparams.task_name)
datasets.load_dataset("super_glue", self.hparams.task_name, trust_remote_code=self.trust_remote_code)

def setup(self, stage):
"""Setup our dataset splits for training/validation."""
self.dataset = datasets.load_dataset("super_glue", self.hparams.task_name)
self.dataset = datasets.load_dataset("super_glue", self.hparams.task_name,
trust_remote_code=self.trust_remote_code)
for split in self.dataset.keys():
self.dataset[split] = self.dataset[split].map(
self._convert_to_features, batched=True, remove_columns=["label"]
Expand Down
7 changes: 5 additions & 2 deletions src/fts_examples/stable/ipynb_src/fts_superglue_nb.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ def __init__(
"dataloader_kwargs": dataloader_kwargs,
"tokenizers_parallelism": tokenizers_parallelism,
}
# starting with HF Datasets v3.x, trust_remote_code must be `True` https://bit.ly/hf_datasets_trust_remote_req
self.trust_remote_code = True
self.save_hyperparameters(self.init_hparams)
self.dataloader_kwargs = {
"num_workers": dataloader_kwargs.get("num_workers", 0),
Expand All @@ -273,11 +275,12 @@ def prepare_data(self):
"""Load the SuperGLUE dataset."""
# N.B. PL calls prepare_data from a single process (rank 0) so do not use it to assign
# state (e.g. self.x=y)
datasets.load_dataset("super_glue", self.hparams.task_name)
datasets.load_dataset("super_glue", self.hparams.task_name, trust_remote_code=self.trust_remote_code)

def setup(self, stage):
"""Setup our dataset splits for training/validation."""
self.dataset = datasets.load_dataset("super_glue", self.hparams.task_name)
self.dataset = datasets.load_dataset("super_glue", self.hparams.task_name,
trust_remote_code=self.trust_remote_code)
for split in self.dataset.keys():
self.dataset[split] = self.dataset[split].map(
self._convert_to_features, batched=True, remove_columns=["label"]
Expand Down

0 comments on commit 2c7fec3

Please sign in to comment.