From 9932ff49c282c61e78245f7766888702506b0f7f Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Wed, 3 Jan 2024 12:17:33 -0800 Subject: [PATCH] update readme and test --- tests/test_inference.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_inference.py b/tests/test_inference.py index e3445d8..e94000b 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -75,5 +75,4 @@ def test_llama2_70b(): num_tokens_to_generate=512, ) - assert within_range(summary_dict["total_decode_latency"], 180.06, - TOLERANCE) + assert within_range(summary_dict["total_decode_latency"], 18.06, TOLERANCE)