浏览代码

Show it/s in the same units with or without streaming

Closes #49
oobabooga 3 年之前
父节点
当前提交
a28f0d8bd7
共有 1 个文件被更改,包括 1 次插入1 次删除
  1. 1 1
      server.py

+ 1 - 1
server.py

@@ -242,7 +242,7 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok
             output = eval(f"model.generate(input_ids, {','.join(generate_params)}, {preset}){cuda}")
         reply = decode(output[0])
         t1 = time.time()
-        print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output[0])-len(input_ids[0]))/(t1-t0):.2f} it/s)")
+        print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output[0])-len(input_ids[0]))/(t1-t0)/8:.2f} it/s, {len(output[0])-len(input_ids[0])} tokens)")
         if not (args.chat or args.cai_chat):
             reply = original_question + apply_extensions(reply[len(question):], "output")
         yield formatted_outputs(reply, model_name)