diff --git a/metaseq/dataclass/configs.py b/metaseq/dataclass/configs.py index 469fb4d63891091bd548f63c95749c85a042ce7b..024d465961d7bb3351add3978687799a95ada56c 100644 --- a/metaseq/dataclass/configs.py +++ b/metaseq/dataclass/configs.py @@ -363,9 +363,6 @@ class DatasetConfig(MetaseqDataclass): default=False, metadata={"help": "do not raise error if valid subsets are ignored"}, ) - validate_interval: int = field( - default=1, metadata={"help": "validate every N epochs"} - ) validate_interval_updates: int = field( default=0, metadata={"help": "validate every N updates"} ) @@ -515,7 +512,7 @@ class CheckpointConfig(MetaseqDataclass): default=False, metadata={"help": "don't save models or checkpoints"} ) no_epoch_checkpoints: bool = field( - default=False, metadata={"help": "only store last and best checkpoints"} + default=False, metadata={"help": "don't store checkpoints at epoch boundaries"} ) no_last_checkpoints: bool = field( default=False, metadata={"help": "don't store last checkpoints"} diff --git a/metaseq_cli/train.py b/metaseq_cli/train.py index 956922628903b926d186075ad831a03c2214b183..759127519c0b7d911cd19b6f23aa68fe9c8051f4 100644 --- a/metaseq_cli/train.py +++ b/metaseq_cli/train.py @@ -402,7 +402,11 @@ def validate_and_save( ) do_save = ( - (end_of_epoch and epoch_itr.epoch % cfg.checkpoint.save_interval == 0) + ( + end_of_epoch + and epoch_itr.epoch % cfg.checkpoint.save_interval == 0 + and not cfg.checkpoint.no_epoch_checkpoints + ) or should_stop or ( cfg.checkpoint.save_interval_updates > 0 @@ -416,7 +420,6 @@ def validate_and_save( ( not end_of_epoch and do_save and not cfg.checkpoint.no_best_checkpoints ) # validate during mid-epoch saves - or (end_of_epoch and epoch_itr.epoch % cfg.dataset.validate_interval == 0) or should_stop or ( cfg.dataset.validate_interval_updates > 0