il y a 2 ans · 2c4699a7e9
--- a/modules/quantized_LLaMA.py
+++ b/modules/quantized_LLaMA.py
@@ -42,7 +42,7 @@ def load_quantized_LLaMA(model_name):
 
				 
			
 
				     model = load_quant(path_to_model, str(pt_path), bits)
			
 
				 
			
 
				-    # Multi-GPU setup
			
 
				+    # Multiple GPUs or GPU+CPU
			
 
				     if shared.args.gpu_memory:
			
 
				         max_memory = {}
			
 
				         for i in range(len(shared.args.gpu_memory)):