소스 검색

Add top_k to RWKV

oobabooga 2 년 전
부모
커밋
8660227e1b
3개의 변경된 파일5개의 추가작업 그리고 4개의 파일을 삭제
  1. 2 1
      modules/RWKV.py
  2. 2 2
      modules/text_generation.py
  3. 1 1
      requirements.txt

+ 2 - 1
modules/RWKV.py

@@ -33,10 +33,11 @@ class RWKVModel:
         result.pipeline = pipeline
         return result
 
-    def generate(self, context, token_count=20, temperature=1, top_p=1, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
+    def generate(self, context, token_count=20, temperature=1, top_p=1, top_k=50, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
         args = PIPELINE_ARGS(
             temperature = temperature,
             top_p = top_p,
+            top_k = top_k,
             alpha_frequency = alpha_frequency, # Frequency Penalty (as in GPT-3)
             alpha_presence = alpha_presence, # Presence Penalty (as in GPT-3)
             token_ban = token_ban, # ban the generation of some tokens

+ 2 - 2
modules/text_generation.py

@@ -92,7 +92,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
     # separately and terminate the function call earlier
     if shared.is_RWKV:
         if shared.args.no_stream:
-            reply = shared.model.generate(question, token_count=max_new_tokens, temperature=temperature, top_p=top_p)
+            reply = shared.model.generate(question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k)
             t1 = time.time()
             print(f"Output generated in {(t1-t0):.2f} seconds.")
             yield formatted_outputs(reply, shared.model_name)
@@ -100,7 +100,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
             yield formatted_outputs(question, shared.model_name)
             for i in tqdm(range(max_new_tokens//8+1)):
                 clear_torch_cache()
-                reply = shared.model.generate(question, token_count=8, temperature=temperature, top_p=top_p)
+                reply = shared.model.generate(question, token_count=8, temperature=temperature, top_p=top_p, top_k=top_k)
                 yield formatted_outputs(reply, shared.model_name)
                 question = reply
         return

+ 1 - 1
requirements.txt

@@ -3,7 +3,7 @@ bitsandbytes==0.37.0
 flexgen==0.1.7
 gradio==3.18.0
 numpy
-rwkv==0.0.8
+rwkv==0.1.0
 safetensors==0.2.8
 sentencepiece
 git+https://github.com/oobabooga/transformers@llama_push