PixArtAlphaPipeline.from_pretrained Error: Non-consecutive added token '<extra_id_99>' found #83

mnslarcher · 2024-01-30T10:44:42Z

mnslarcher
Jan 30, 2024

Hi,

I've trained the model and then converted the weights using the script. Unfortunately, when I attempt to execute the code, I encounter the following error:

Non-consecutive added token '<extra_id_99>' found. Should have index 32100 but has index 32000 in saved vocabulary.

Do you have any ideas about what might be causing this?

Thanks!

Here's the full error for reference:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In [3], line 2
      1 #transformer = Transformer2DModel.from_pretrained("/workspace/PixArt-alpha/output/diffusers_trained/transformer", torch_dtype=torch.float16)
----> 2 pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-512x512", torch_dtype=torch.float16, transformer=transformer)
      3 pipe = pipe.to("cuda")

File /usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
    115 if check_use_auth_token:
    116     kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)

File /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:1286, in DiffusionPipeline.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
   1283     loaded_sub_model = passed_class_obj[name]
   1284 else:
   1285     # load sub model
-> 1286     loaded_sub_model = load_sub_model(
   1287         library_name=library_name,
   1288         class_name=class_name,
   1289         importable_classes=importable_classes,
   1290         pipelines=pipelines,
   1291         is_pipeline_module=is_pipeline_module,
   1292         pipeline_class=pipeline_class,
   1293         torch_dtype=torch_dtype,
   1294         provider=provider,
   1295         sess_options=sess_options,
   1296         device_map=device_map,
   1297         max_memory=max_memory,
   1298         offload_folder=offload_folder,
   1299         offload_state_dict=offload_state_dict,
   1300         model_variants=model_variants,
   1301         name=name,
   1302         from_flax=from_flax,
   1303         variant=variant,
   1304         low_cpu_mem_usage=low_cpu_mem_usage,
   1305         cached_folder=cached_folder,
   1306         revision=revision,
   1307     )
   1308     logger.info(
   1309         f"Loaded {name} as {class_name} from `{name}` subfolder of {pretrained_model_name_or_path}."
   1310     )
   1312 init_kwargs[name] = loaded_sub_model  # UNet(...), # DiffusionSchedule(...)

File /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:531, in load_sub_model(library_name, class_name, importable_classes, pipelines, is_pipeline_module, pipeline_class, torch_dtype, provider, sess_options, device_map, max_memory, offload_folder, offload_state_dict, model_variants, name, from_flax, variant, low_cpu_mem_usage, cached_folder, revision)
    529 # check if the module is in a subdirectory
    530 if os.path.isdir(os.path.join(cached_folder, name)):
--> 531     loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs)
    532 else:
    533     # else load from the root directory
    534     loaded_sub_model = load_method(cached_folder, **loading_kwargs)

File /usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:1854, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
   1851     else:
   1852         logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1854 return cls._from_pretrained(
   1855     resolved_vocab_files,
   1856     pretrained_model_name_or_path,
   1857     init_configuration,
   1858     *init_inputs,
   1859     token=token,
   1860     cache_dir=cache_dir,
   1861     local_files_only=local_files_only,
   1862     _commit_hash=commit_hash,
   1863     _is_local=is_local,
   1864     **kwargs,
   1865 )

File /usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:2073, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
   2066     raise ValueError(
   2067         f"Wrong index found for {token}: should be {tokenizer.convert_tokens_to_ids(token)} but found "
   2068         f"{index}."
   2069     )
   2070 elif not has_tokenizer_file and index != current_index:
   2071     # Tokenizer slow: added token cannot already be in the vocabulary so its index needs to be the
   2072     # current length of the tokenizer.
-> 2073     raise ValueError(
   2074         f"Non-consecutive added token '{token}' found. "
   2075         f"Should have index {current_index} but has index {index} in saved vocabulary."
   2076     )
   2078 is_special = bool(token in special_tokens)
   2079 if is_last_special is None or is_last_special == is_special:

ValueError: Non-consecutive added token '<extra_id_99>' found. Should have index 32100 but has index 32000 in saved vocabulary.

Answered by lawrence-cj

Jan 31, 2024

Upgrading the transformers package will solve it.

View full answer

lawrence-cj · 2024-01-31T11:26:08Z

lawrence-cj
Jan 31, 2024
Maintainer

Upgrading the transformers package will solve it.

1 reply

mnslarcher Jan 31, 2024
Author

Thanks, this fixes it ;)!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

PixArt

PixArtAlphaPipeline.from_pretrained Error: Non-consecutive added token '<extra_id_99>' found #83

{{title}}

Replies: 1 comment 1 reply

{{title}}

{{title}}

Select a reply

PixArt

PixArtAlphaPipeline.from_pretrained Error: Non-consecutive added token '<extra_id_99>' found #83

mnslarcher Jan 30, 2024

Replies: 1 comment · 1 reply

lawrence-cj Jan 31, 2024 Maintainer

mnslarcher Jan 31, 2024 Author

mnslarcher
Jan 30, 2024

Replies: 1 comment 1 reply

lawrence-cj
Jan 31, 2024
Maintainer

mnslarcher Jan 31, 2024
Author