2 rokov pred · 126bbc6970
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -118,11 +118,9 @@ def generate_reply(question, generate_params, eos_token=None, stopping_strings=[
 
				     # These models are not part of Hugging Face, so we handle them
			
 
				     # separately and terminate the function call earlier
			
 
				     if any((shared.is_RWKV, shared.is_llamacpp)):
			
 
				-
			
 
				         for k in ['temperature', 'top_p', 'top_k', 'repetition_penalty']:
			
 
				             updated_params[k] = generate_params[k]
			
 
				         updated_params["token_count"] = generate_params["max_new_tokens"]
			
 
				-
			
 
				         try:
			
 
				             if shared.args.no_stream:
			
 
				                 reply = shared.model.generate(context=question, **updated_params)
			
@@ -166,11 +164,10 @@ def generate_reply(question, generate_params, eos_token=None, stopping_strings=[
 
				 
			
 
				     updated_params["max_new_tokens"] = generate_params['max_new_tokens']
			
 
				     if not shared.args.flexgen:
			
 
				-        updated_params["eos_token_id"] = eos_token_ids
			
 
				-        updated_params["stopping_criteria"] = stopping_criteria_list
			
 
				         for k in ["do_sample", "temperature", "top_p", "typical_p", "repetition_penalty", "encoder_repetition_penalty", "top_k", "min_length", "no_repeat_ngram_size", "num_beams", "penalty_alpha", "length_penalty", "early_stopping"]:
			
 
				             updated_params[k] = generate_params[k]
			
 
				-
			
 
				+        updated_params["eos_token_id"] = eos_token_ids
			
 
				+        updated_params["stopping_criteria"] = stopping_criteria_list
			
 
				         if shared.args.no_stream:
			
 
				             updated_params["min_length"] = 0
			
 
				     else:
			
@@ -179,7 +176,6 @@ def generate_reply(question, generate_params, eos_token=None, stopping_strings=[
 
				         updated_params["stop"] = generate_params["eos_token_ids"][-1]
			
 
				         if not shared.args.no_stream:
			
 
				             updated_params["max_new_tokens"] = 8
			
 
				-    print(updated_params)
			
 
				 
			
 
				     if shared.args.no_cache:
			
 
				         updated_params.update({"use_cache": False})