diff --git a/.gitignore b/.gitignore index dd84837dd..24d1db4c6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # Compiled python modules. *.pyc +# Byte-compiled +__pycache__/ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index ca9418488..1cbd27f2b 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -87,15 +87,15 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: algorithmic.multiplication_generator(10, 40, 100000), lambda: algorithmic.multiplication_generator(10, 400, 10000)), "algorithmic_reverse_nlplike_decimal8K": ( - lambda: algorithmic.reverse_generator_nlplike(8000, 40, 100000, - 10, 1.250), - lambda: algorithmic.reverse_generator_nlplike(8000, 400, 10000, - 10, 1.250)), + lambda: algorithmic.reverse_generator_nlplike(8000, 70, 100000, + 10, 1.300), + lambda: algorithmic.reverse_generator_nlplike(8000, 700, 10000, + 10, 1.300)), "algorithmic_reverse_nlplike_decimal32K": ( - lambda: algorithmic.reverse_generator_nlplike(32000, 40, 100000, - 10, 1.005), - lambda: algorithmic.reverse_generator_nlplike(32000, 400, 10000, - 10, 1.005)), + lambda: algorithmic.reverse_generator_nlplike(32000, 70, 100000, + 10, 1.050), + lambda: algorithmic.reverse_generator_nlplike(32000, 700, 10000, + 10, 1.050)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index d7013469b..9bbb4bc4b 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -123,7 +123,12 @@ def zipf_random_sample(distr_map, sample_len): """ u = np.random.random(sample_len) - return [t+1 for t in np.searchsorted(distr_map, u)] # 0 pad and 1 EOS + # Random produces values in range [0.0,1.0); even if it is almost + # improbable(but possible) that it can generate a clear 0.000..0, + # we have made a sanity check to overcome this issue. On the other hand, + # t+1 is enough from saving us to generate PAD(0) and EOS(1) which are + # reservated symbols. + return [t+1 if t > 0 else t+2 for t in np.searchsorted(distr_map, u)] def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, \