From 89f020b8407c5bddd435c5aff03612b86fd1724a Mon Sep 17 00:00:00 2001 From: Victoria Lin <victorialin@fb.com> Date: Mon, 18 Jul 2022 23:11:38 -0700 Subject: [PATCH] fix streaming language modeling multicorpus_sampling_maximum hyperparameter --- metaseq/tasks/streaming_language_modeling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metaseq/tasks/streaming_language_modeling.py b/metaseq/tasks/streaming_language_modeling.py index 614e33a..21342e3 100644 --- a/metaseq/tasks/streaming_language_modeling.py +++ b/metaseq/tasks/streaming_language_modeling.py @@ -186,9 +186,9 @@ class StreamingLanguageModelingTask(LegacyTask): smoothed_prob = prob**self.args.multicorpus_sampling_alpha smoothed_prob = smoothed_prob / smoothed_prob.sum() else: - dataset_lens = [ - min(l, self.args.multicorpus_sampling_maximum) for l in dataset_lens - ] + dataset_lens = np.array( + [min(l, self.args.multicorpus_sampling_maximum) for l in dataset_lens] + ) smoothed_prob = dataset_lens / sum(dataset_lens) return smoothed_prob -- GitLab