3 лет назад · b801e0d50d
--- a/README.md
+++ b/README.md
@@ -68,9 +68,9 @@ You also need to put GPT-J-6B's config.json file in the same folder: [config.jso
 
															 The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load:
														
 
															-    python convert-to-torch.py models/model-name/
														
 
															+    python convert-to-torch.py models/model-name
														
 
															-The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name/`. 
														
 
															+The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name`. 
														
 
															 ## Starting the webui
														
--- a/server.py
+++ b/server.py
@@ -75,7 +75,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
 
															     if selected_model != model_name:
														
 
															         model_name = selected_model
														
 
															         model = None
														
 
															-        tokenier = None
														
 
															+        tokenizer = None
														
 
															         torch.cuda.empty_cache()
														
 
															         model, tokenizer = load_model(model_name)
														
 
															     if inference_settings != loaded_preset:
														
@@ -84,8 +84,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
 
															         loaded_preset = inference_settings
														
 
															     torch.cuda.empty_cache()
														
 
															-    input_text = question
														
 
															-    input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda()
														
 
															+    input_ids = tokenizer.encode(str(question), return_tensors='pt').cuda()
														
 
															     output = eval(f"model.generate(input_ids, {preset}).cuda()")
														
 
															     reply = tokenizer.decode(output[0], skip_special_tokens=True)