add pdf maximum page limit

lm-sys · Jan 4, 2025 · 32c6724 · 32c6724
1 parent 0daef32
commit 32c6724
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 42 deletions.
diff --git a/fastchat/constants.py b/fastchat/constants.py
@@ -40,6 +40,9 @@
 )
 # Maximum conversation turns
 CONVERSATION_TURN_LIMIT = 50
+# Maximum PDF Page Limit
+PDF_PAGE_LIMIT = 50
+PDF_LIMIT_MSG = f"YOU HAVE REACHED THE MAXIMUM PDF PAGE LIMIT ({PDF_PAGE_LIMIT} PAGES). PLEASE UPLOAD A SMALLER DOCUMENT."
 # Session expiration time
 SESSION_EXPIRATION_TIME = 3600
 # The output dir of log files

diff --git a/fastchat/serve/gradio_block_arena_vision.py b/fastchat/serve/gradio_block_arena_vision.py
@@ -260,46 +260,6 @@ def wrap_pdfchat_query(query, document):
 }
 
 
-# TODO: P1: Integrate this.
-def pdf_moderator(images):
-    import base64
-    from openai import OpenAI
-    from io import BytesIO
-
-    base64_urls = []
-    for image in images:
-        buffer = BytesIO()
-        image.save(buffer, format="JPEG")
-
-        image_bytes = buffer.getvalue()
-        image_b64 = base64.b64encode(image_bytes).decode("utf-8")
-
-        # convert to openai format
-        base64_urls.append(
-            {
-                "type": "image_url",
-                "image_url": {
-                    "url": f"data:image/jpeg;base64,{image_b64}",
-                },
-            }
-        )
-
-    # OpenAI's maximum number of images is 1 at the moment.
-    client = OpenAI()
-    moderations = []
-    for url in base64_urls:
-        try:
-            response = client.moderations.create(
-                model="omni-moderation-latest",
-                input=url,
-            )
-            moderations.append(response[0].results.flagged)
-        except Exception as e:
-            print(e)
-
-    return all(moderations)
-
-
 def detect_language_from_doc(pdf_file_path):
     from pdf2image import convert_from_path
     from polyglot.detect import Detector

diff --git a/fastchat/serve/gradio_block_arena_vision_anony.py b/fastchat/serve/gradio_block_arena_vision_anony.py
@@ -14,6 +14,8 @@
     TEXT_MODERATION_MSG,
     IMAGE_MODERATION_MSG,
     PDF_MODERATION_MSG,
+    PDF_LIMIT_MSG,
+    PDF_PAGE_LIMIT,
     MODERATION_MSG,
     CONVERSATION_LIMIT_MSG,
     SLOW_MODEL_MSG,
@@ -78,6 +80,7 @@
     build_logger,
     moderation_filter,
     image_moderation_filter,
+    get_pdf_num_page,
     upload_pdf_file_to_gcs,
     hash_pdf,
 )
@@ -339,6 +342,25 @@ def add_text(
             + [""]
         )
 
+    if len(pdfs) > 0 and get_pdf_num_page(pdfs[0]) > PDF_PAGE_LIMIT:
+        logger.info(f"pdf page limit exceeded. ip: {ip}. text: {text}")
+        for i in range(num_sides):
+            states[i].skip_next = True
+        return (
+            states
+            + [x.to_gradio_chatbot() for x in states]
+            + [
+                {
+                    "text": PDF_LIMIT_MSG
+                    + " PLEASE CLICK 🎲 NEW ROUND TO START A NEW CONVERSATION."
+                },
+                "",
+                no_change_btn,
+            ]
+            + [no_change_btn] * 7
+            + [""]
+        )
+
     model_list = [states[i].model_name for i in range(num_sides)]
 
     images = convert_images_to_conversation_format(images)
@@ -356,11 +378,12 @@ def add_text(
         return (
             states
             + [x.to_gradio_chatbot() for x in states]
-            + [{"text": CONVERSATION_LIMIT_MSG}, "", no_change_btn]
             + [
+                {"text": CONVERSATION_LIMIT_MSG},
+                "",
                 no_change_btn,
             ]
-            * 7
+            + [no_change_btn] * 7
             + [""]
         )
 

diff --git a/fastchat/utils.py b/fastchat/utils.py
@@ -474,6 +474,14 @@ def hash_pdf(file_path):
     return hashlib.md5(file_content).hexdigest()
 
 
+def get_pdf_num_page(file_path):
+    from pypdf import PdfReader
+
+    reader = PdfReader(file_path)
+
+    return len(reader.pages)
+
+
 def image_moderation_request(image_bytes, endpoint, api_key):
     headers = {"Content-Type": "image/jpeg", "Ocp-Apim-Subscription-Key": api_key}