From 89f020b8407c5bddd435c5aff03612b86fd1724a Mon Sep 17 00:00:00 2001
From: Victoria Lin <victorialin@fb.com>
Date: Mon, 18 Jul 2022 23:11:38 -0700
Subject: [PATCH] fix streaming language modeling multicorpus_sampling_maximum
 hyperparameter

---
 metaseq/tasks/streaming_language_modeling.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/metaseq/tasks/streaming_language_modeling.py b/metaseq/tasks/streaming_language_modeling.py
index 614e33a..21342e3 100644
--- a/metaseq/tasks/streaming_language_modeling.py
+++ b/metaseq/tasks/streaming_language_modeling.py
@@ -186,9 +186,9 @@ class StreamingLanguageModelingTask(LegacyTask):
             smoothed_prob = prob**self.args.multicorpus_sampling_alpha
             smoothed_prob = smoothed_prob / smoothed_prob.sum()
         else:
-            dataset_lens = [
-                min(l, self.args.multicorpus_sampling_maximum) for l in dataset_lens
-            ]
+            dataset_lens = np.array(
+                [min(l, self.args.multicorpus_sampling_maximum) for l in dataset_lens]
+            )
             smoothed_prob = dataset_lens / sum(dataset_lens)
         return smoothed_prob
 
-- 
GitLab