Procházet zdrojové kódy

Code reuse + indication

Now shows the message in the console when unloading weights. Also reload_model() calls unload_model() first to free the memory so that multiple reloads won't overfill it.
Φφ před 2 roky
rodič
revize
483d173d23
1 změnil soubory, kde provedl 2 přidání a 3 odebrání
  1. 2 3
      server.py

+ 2 - 3
server.py

@@ -64,9 +64,7 @@ def load_model_wrapper(selected_model):
     return selected_model
     return selected_model
 
 
 def reload_model():
 def reload_model():
-    if not shared.args.cpu:
-        gc.collect()
-        torch.cuda.empty_cache()
+    unload_model()
     shared.model, shared.tokenizer = load_model(shared.model_name)
     shared.model, shared.tokenizer = load_model(shared.model_name)
 
 
 def unload_model():
 def unload_model():
@@ -74,6 +72,7 @@ def unload_model():
     if not shared.args.cpu:
     if not shared.args.cpu:
         gc.collect()
         gc.collect()
         torch.cuda.empty_cache()
         torch.cuda.empty_cache()
+        print("Model weights unloaded.")
 
 
 def load_lora_wrapper(selected_lora):
 def load_lora_wrapper(selected_lora):
     shared.lora_name = selected_lora
     shared.lora_name = selected_lora