2 년 전 · 8660227e1b
--- a/modules/RWKV.py
+++ b/modules/RWKV.py
@@ -33,10 +33,11 @@ class RWKVModel:
 
				         result.pipeline = pipeline
			
 
				         return result
			
 
				 
			
 
				-    def generate(self, context, token_count=20, temperature=1, top_p=1, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
			
 
				+    def generate(self, context, token_count=20, temperature=1, top_p=1, top_k=50, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
			
 
				         args = PIPELINE_ARGS(
			
 
				             temperature = temperature,
			
 
				             top_p = top_p,
			
 
				+            top_k = top_k,
			
 
				             alpha_frequency = alpha_frequency, # Frequency Penalty (as in GPT-3)
			
 
				             alpha_presence = alpha_presence, # Presence Penalty (as in GPT-3)
			
 
				             token_ban = token_ban, # ban the generation of some tokens
			
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -92,7 +92,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
 
				     # separately and terminate the function call earlier
			
 
				     if shared.is_RWKV:
			
 
				         if shared.args.no_stream:
			
 
				-            reply = shared.model.generate(question, token_count=max_new_tokens, temperature=temperature, top_p=top_p)
			
 
				+            reply = shared.model.generate(question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k)
			
 
				             t1 = time.time()
			
 
				             print(f"Output generated in {(t1-t0):.2f} seconds.")
			
 
				             yield formatted_outputs(reply, shared.model_name)
			
@@ -100,7 +100,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
 
				             yield formatted_outputs(question, shared.model_name)
			
 
				             for i in tqdm(range(max_new_tokens//8+1)):
			
 
				                 clear_torch_cache()
			
 
				-                reply = shared.model.generate(question, token_count=8, temperature=temperature, top_p=top_p)
			
 
				+                reply = shared.model.generate(question, token_count=8, temperature=temperature, top_p=top_p, top_k=top_k)
			
 
				                 yield formatted_outputs(reply, shared.model_name)
			
 
				                 question = reply
			
 
				         return
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ bitsandbytes==0.37.0
 
				 flexgen==0.1.7
			
 
				 gradio==3.18.0
			
 
				 numpy
			
 
				-rwkv==0.0.8
			
 
				+rwkv==0.1.0
			
 
				 safetensors==0.2.8
			
 
				 sentencepiece
			
 
				 git+https://github.com/oobabooga/transformers@llama_push