convert-to-safetensors.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. '''
  2. Converts a transformers model to safetensors format and shards it.
  3. This makes it faster to load (because of safetensors) and lowers its RAM usage
  4. while loading (because of sharding).
  5. Based on the original script by 81300:
  6. https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303
  7. '''
  8. import argparse
  9. from pathlib import Path
  10. from sys import argv
  11. import torch
  12. from transformers import AutoModelForCausalLM
  13. from transformers import AutoTokenizer
  14. parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
  15. parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
  16. parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).')
  17. parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).")
  18. args = parser.parse_args()
  19. if __name__ == '__main__':
  20. path = Path(args.MODEL)
  21. model_name = path.name
  22. print(f"Loading {model_name}...")
  23. model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.float16)
  24. tokenizer = AutoTokenizer.from_pretrained(path)
  25. out_folder = args.output or Path(f"models/{model_name}_safetensors")
  26. print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...")
  27. model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True)
  28. tokenizer.save_pretrained(out_folder)