Omni-DNA-DNA2Function / tokenization_olmo_fast.py
zehui127's picture
Upload folder using huggingface_hub
502d727 verified
raw
history blame contribute delete
645 Bytes
from transformers import AutoTokenizer, PreTrainedTokenizerFast
from hf_olmo.configuration_olmo import OLMoConfig
class OLMoTokenizerFast(PreTrainedTokenizerFast):
# Note: OLMo's tokenizer is already a wrapper around huggingface. This is potentially unnecessary.
pass
# def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
# # This is required to make the implementation complete.
# pass
# Register the tokenizer class so that it is available for transformer pipelines, auto-loading etc.
AutoTokenizer.register(OLMoConfig, fast_tokenizer_class=OLMoTokenizerFast)