| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- import gc
- import traceback
- from queue import Queue
- from threading import Thread
- import torch
- import transformers
- import modules.shared as shared
- # Copied from https://github.com/PygmalionAI/gradio-ui/
- class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
- def __init__(self, sentinel_token_ids: list, starting_idx: int):
- transformers.StoppingCriteria.__init__(self)
- self.sentinel_token_ids = sentinel_token_ids
- self.starting_idx = starting_idx
- def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor) -> bool:
- for sample in input_ids:
- trimmed_sample = sample[self.starting_idx:]
- for i in range(len(self.sentinel_token_ids)):
- # Can't unfold, output is still too tiny. Skip.
- if trimmed_sample.shape[-1] < self.sentinel_token_ids[i].shape[-1]:
- continue
- for window in trimmed_sample.unfold(0, self.sentinel_token_ids[i].shape[-1], 1):
- if torch.all(torch.eq(self.sentinel_token_ids[i][0], window)):
- return True
- return False
- class Stream(transformers.StoppingCriteria):
- def __init__(self, callback_func=None):
- self.callback_func = callback_func
- def __call__(self, input_ids, scores) -> bool:
- if self.callback_func is not None:
- self.callback_func(input_ids[0])
- return False
- class Iteratorize:
- """
- Transforms a function that takes a callback
- into a lazy iterator (generator).
- """
- def __init__(self, func, kwargs={}, callback=None):
- self.mfunc = func
- self.c_callback = callback
- self.q = Queue()
- self.sentinel = object()
- self.kwargs = kwargs
- self.stop_now = False
- def _callback(val):
- if self.stop_now or shared.stop_everything:
- raise ValueError
- self.q.put(val)
- def gentask():
- try:
- ret = self.mfunc(callback=_callback, **self.kwargs)
- except ValueError:
- pass
- except:
- traceback.print_exc()
- pass
- clear_torch_cache()
- self.q.put(self.sentinel)
- if self.c_callback:
- self.c_callback(ret)
- self.thread = Thread(target=gentask)
- self.thread.start()
- def __iter__(self):
- return self
- def __next__(self):
- obj = self.q.get(True, None)
- if obj is self.sentinel:
- raise StopIteration
- else:
- return obj
- def __del__(self):
- clear_torch_cache()
- def __enter__(self):
- return self
- def __exit__(self, exc_type, exc_val, exc_tb):
- self.stop_now = True
- clear_torch_cache()
- def clear_torch_cache():
- gc.collect()
- if not shared.args.cpu:
- torch.cuda.empty_cache()
|