PixArtAlphaPipeline.from_pretrained Error: Non-consecutive added token '<extra_id_99>' found #83
-
Hi, I've trained the model and then converted the weights using the script. Unfortunately, when I attempt to execute the code, I encounter the following error:
Do you have any ideas about what might be causing this? Thanks! Here's the full error for reference: ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [3], line 2
1 #transformer = Transformer2DModel.from_pretrained("/workspace/PixArt-alpha/output/diffusers_trained/transformer", torch_dtype=torch.float16)
----> 2 pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-512x512", torch_dtype=torch.float16, transformer=transformer)
3 pipe = pipe.to("cuda")
File /usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
115 if check_use_auth_token:
116 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)
File /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:1286, in DiffusionPipeline.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1283 loaded_sub_model = passed_class_obj[name]
1284 else:
1285 # load sub model
-> 1286 loaded_sub_model = load_sub_model(
1287 library_name=library_name,
1288 class_name=class_name,
1289 importable_classes=importable_classes,
1290 pipelines=pipelines,
1291 is_pipeline_module=is_pipeline_module,
1292 pipeline_class=pipeline_class,
1293 torch_dtype=torch_dtype,
1294 provider=provider,
1295 sess_options=sess_options,
1296 device_map=device_map,
1297 max_memory=max_memory,
1298 offload_folder=offload_folder,
1299 offload_state_dict=offload_state_dict,
1300 model_variants=model_variants,
1301 name=name,
1302 from_flax=from_flax,
1303 variant=variant,
1304 low_cpu_mem_usage=low_cpu_mem_usage,
1305 cached_folder=cached_folder,
1306 revision=revision,
1307 )
1308 logger.info(
1309 f"Loaded {name} as {class_name} from `{name}` subfolder of {pretrained_model_name_or_path}."
1310 )
1312 init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...)
File /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:531, in load_sub_model(library_name, class_name, importable_classes, pipelines, is_pipeline_module, pipeline_class, torch_dtype, provider, sess_options, device_map, max_memory, offload_folder, offload_state_dict, model_variants, name, from_flax, variant, low_cpu_mem_usage, cached_folder, revision)
529 # check if the module is in a subdirectory
530 if os.path.isdir(os.path.join(cached_folder, name)):
--> 531 loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs)
532 else:
533 # else load from the root directory
534 loaded_sub_model = load_method(cached_folder, **loading_kwargs)
File /usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:1854, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
1851 else:
1852 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1854 return cls._from_pretrained(
1855 resolved_vocab_files,
1856 pretrained_model_name_or_path,
1857 init_configuration,
1858 *init_inputs,
1859 token=token,
1860 cache_dir=cache_dir,
1861 local_files_only=local_files_only,
1862 _commit_hash=commit_hash,
1863 _is_local=is_local,
1864 **kwargs,
1865 )
File /usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:2073, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2066 raise ValueError(
2067 f"Wrong index found for {token}: should be {tokenizer.convert_tokens_to_ids(token)} but found "
2068 f"{index}."
2069 )
2070 elif not has_tokenizer_file and index != current_index:
2071 # Tokenizer slow: added token cannot already be in the vocabulary so its index needs to be the
2072 # current length of the tokenizer.
-> 2073 raise ValueError(
2074 f"Non-consecutive added token '{token}' found. "
2075 f"Should have index {current_index} but has index {index} in saved vocabulary."
2076 )
2078 is_special = bool(token in special_tokens)
2079 if is_last_special is None or is_last_special == is_special:
ValueError: Non-consecutive added token '<extra_id_99>' found. Should have index 32100 but has index 32000 in saved vocabulary. |
Beta Was this translation helpful? Give feedback.
Answered by
lawrence-cj
Jan 31, 2024
Replies: 1 comment 1 reply
-
Upgrading the |
Beta Was this translation helpful? Give feedback.
1 reply
Answer selected by
lawrence-cj
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Upgrading the
transformers
package will solve it.