flake8 cleanup

pull/380/head
evelynmitchell 1 year ago
parent 0f09b76645
commit 512c1460cc

@ -58,11 +58,7 @@ class SentencePieceTokenizer:
def _maybe_add_prefix_space(self, tokens, decoded): def _maybe_add_prefix_space(self, tokens, decoded):
"""maybe add prefix space for incremental decoding.""" """maybe add prefix space for incremental decoding."""
if ( if (len(tokens) and not decoded.startswith(" ") and tokens[0] in self.prefix_space_tokens):
len(tokens)
and not decoded.startswith(" ")
and tokens[0] in self.prefix_space_tokens
):
return " " + decoded return " " + decoded
else: else:
return decoded return decoded
@ -151,10 +147,7 @@ class HuggingFaceTokenizer:
backend_tokenizer_file = osp.join(model_dir, "tokenizer.json") backend_tokenizer_file = osp.join(model_dir, "tokenizer.json")
model_file_exists = osp.exists(model_file) model_file_exists = osp.exists(model_file)
self.logger = get_logger("lmdeploy") self.logger = get_logger("lmdeploy")
if ( if ( not osp.exists(backend_tokenizer_file) and model_file_exists ):
not osp.exists(backend_tokenizer_file)
and model_file_exists
):
self.logger.warning( self.logger.warning(
"Can not find tokenizer.json. " "Can not find tokenizer.json. "
"It may take long time to initialize the tokenizer." "It may take long time to initialize the tokenizer."
@ -164,15 +157,10 @@ class HuggingFaceTokenizer:
) )
self._prefix_space_tokens = None self._prefix_space_tokens = None
# save tokenizer.json to reuse # save tokenizer.json to reuse
if ( if (not osp.exists(backend_tokenizer_file) and model_file_exists):
not osp.exists(backend_tokenizer_file)
and model_file_exists
):
if hasattr(self.model, "backend_tokenizer"): if hasattr(self.model, "backend_tokenizer"):
if os.access(model_dir, os.W_OK): if os.access(model_dir, os.W_OK):
self.model.backend_tokenizer.save( self.model.backend_tokenizer.save(backend_tokenizer_file)
backend_tokenizer_file
)
if self.model.eos_token_id is None: if self.model.eos_token_id is None:
generation_config_file = osp.join( generation_config_file = osp.join(
@ -227,11 +215,7 @@ class HuggingFaceTokenizer:
self, tokens: List[int], decoded: str self, tokens: List[int], decoded: str
): ):
"""maybe add prefix space for incremental decoding.""" """maybe add prefix space for incremental decoding."""
if ( if (len(tokens) and not decoded.startswith(" ") and tokens[0] in self.prefix_space_tokens):
len(tokens)
and not decoded.startswith(" ")
and tokens[0] in self.prefix_space_tokens
):
return " " + decoded return " " + decoded
else: else:
return decoded return decoded
@ -241,9 +225,7 @@ class HuggingFaceTokenizer:
"""Check if self.model.convert_ids_to_tokens return not a str value.""" """Check if self.model.convert_ids_to_tokens return not a str value."""
if self._maybe_decode_bytes is None: if self._maybe_decode_bytes is None:
self._maybe_decode_bytes = False self._maybe_decode_bytes = False
vocab = self.model.convert_ids_to_tokens( vocab = self.model.convert_ids_to_tokens(list(range(self.vocab_size)))
list(range(self.vocab_size))
)
for tok in vocab: for tok in vocab:
if not isinstance(tok, str): if not isinstance(tok, str):
self._maybe_decode_bytes = True self._maybe_decode_bytes = True

Loading…
Cancel
Save