3 سال پیش · b801e0d50d
--- a/README.md
+++ b/README.md
@@ -68,9 +68,9 @@ You also need to put GPT-J-6B's config.json file in the same folder: [config.jso
 
				 
			
 
				 The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load:
			
 
				 
			
 
				-    python convert-to-torch.py models/model-name/
			
 
				+    python convert-to-torch.py models/model-name
			
 
				 
			
 
				-The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name/`. 
			
 
				+The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name`. 
			
 
				 
			
 
				 ## Starting the webui
			
 
				 
			
--- a/server.py
+++ b/server.py
@@ -75,7 +75,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
 
				     if selected_model != model_name:
			
 
				         model_name = selected_model
			
 
				         model = None
			
 
				-        tokenier = None
			
 
				+        tokenizer = None
			
 
				         torch.cuda.empty_cache()
			
 
				         model, tokenizer = load_model(model_name)
			
 
				     if inference_settings != loaded_preset:
			
@@ -84,8 +84,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
 
				         loaded_preset = inference_settings
			
 
				 
			
 
				     torch.cuda.empty_cache()
			
 
				-    input_text = question
			
 
				-    input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda()
			
 
				+    input_ids = tokenizer.encode(str(question), return_tensors='pt').cuda()
			
 
				 
			
 
				     output = eval(f"model.generate(input_ids, {preset}).cuda()")
			
 
				     reply = tokenizer.decode(output[0], skip_special_tokens=True)