Explorar o código

Fix memory leak in new streaming (second attempt)

oobabooga %!s(int64=2) %!d(string=hai) anos
pai
achega
37f0166b2d
Modificáronse 2 ficheiros con 4 adicións e 2 borrados
  1. 4 1
      modules/callbacks.py
  2. 0 1
      modules/text_generation.py

+ 4 - 1
modules/callbacks.py

@@ -49,7 +49,7 @@ class Iteratorize:
     def __init__(self, func, kwargs={}, callback=None):
         self.mfunc=func
         self.c_callback=callback
-        self.q = Queue(maxsize=1)
+        self.q = Queue()
         self.sentinel = object()
         self.kwargs = kwargs
 
@@ -73,3 +73,6 @@ class Iteratorize:
             raise StopIteration
         else:
             return obj
+
+    def __del__(self):
+        pass

+ 0 - 1
modules/text_generation.py

@@ -187,7 +187,6 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
 
         yield formatted_outputs(original_question, shared.model_name)
         for output in eval(f"generate_with_streaming({', '.join(generate_params)})"):
-            print(print('Used vram in gib:', torch.cuda.memory_allocated() / 1024**3))
             if shared.soft_prompt:
                 output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:]))
             reply = decode(output)