--no-cache and --gpu-memory in MiB for fine VRAM control

2023-03-19 19:21:41 -03:00
parent 4bafe45a51
commit ddb62470e9
4 changed files with 13 additions and 7 deletions
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -136,7 +136,9 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
        t = encode(stopping_string, 0, add_special_tokens=False)
        stopping_criteria_list.append(_SentinelTokenStoppingCriteria(sentinel_token_ids=t, starting_idx=len(input_ids[0])))

-    generate_params = {}
+    generate_params = {
+        'use_cache': not shared.args.no_cache,
+    }
    if not shared.args.flexgen:
        generate_params.update({
            "max_new_tokens": max_new_tokens,