Skip to content

Commit

Permalink
add pdf maximum page limit
Browse files Browse the repository at this point in the history
  • Loading branch information
CodingWithTim committed Jan 4, 2025
1 parent 0daef32 commit 32c6724
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 42 deletions.
3 changes: 3 additions & 0 deletions fastchat/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
)
# Maximum conversation turns
CONVERSATION_TURN_LIMIT = 50
# Maximum PDF Page Limit
PDF_PAGE_LIMIT = 50
PDF_LIMIT_MSG = f"YOU HAVE REACHED THE MAXIMUM PDF PAGE LIMIT ({PDF_PAGE_LIMIT} PAGES). PLEASE UPLOAD A SMALLER DOCUMENT."
# Session expiration time
SESSION_EXPIRATION_TIME = 3600
# The output dir of log files
Expand Down
40 changes: 0 additions & 40 deletions fastchat/serve/gradio_block_arena_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,46 +260,6 @@ def wrap_pdfchat_query(query, document):
}


# TODO: P1: Integrate this.
def pdf_moderator(images):
import base64
from openai import OpenAI
from io import BytesIO

base64_urls = []
for image in images:
buffer = BytesIO()
image.save(buffer, format="JPEG")

image_bytes = buffer.getvalue()
image_b64 = base64.b64encode(image_bytes).decode("utf-8")

# convert to openai format
base64_urls.append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_b64}",
},
}
)

# OpenAI's maximum number of images is 1 at the moment.
client = OpenAI()
moderations = []
for url in base64_urls:
try:
response = client.moderations.create(
model="omni-moderation-latest",
input=url,
)
moderations.append(response[0].results.flagged)
except Exception as e:
print(e)

return all(moderations)


def detect_language_from_doc(pdf_file_path):
from pdf2image import convert_from_path
from polyglot.detect import Detector
Expand Down
27 changes: 25 additions & 2 deletions fastchat/serve/gradio_block_arena_vision_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
TEXT_MODERATION_MSG,
IMAGE_MODERATION_MSG,
PDF_MODERATION_MSG,
PDF_LIMIT_MSG,
PDF_PAGE_LIMIT,
MODERATION_MSG,
CONVERSATION_LIMIT_MSG,
SLOW_MODEL_MSG,
Expand Down Expand Up @@ -78,6 +80,7 @@
build_logger,
moderation_filter,
image_moderation_filter,
get_pdf_num_page,
upload_pdf_file_to_gcs,
hash_pdf,
)
Expand Down Expand Up @@ -339,6 +342,25 @@ def add_text(
+ [""]
)

if len(pdfs) > 0 and get_pdf_num_page(pdfs[0]) > PDF_PAGE_LIMIT:
logger.info(f"pdf page limit exceeded. ip: {ip}. text: {text}")
for i in range(num_sides):
states[i].skip_next = True
return (
states
+ [x.to_gradio_chatbot() for x in states]
+ [
{
"text": PDF_LIMIT_MSG
+ " PLEASE CLICK 🎲 NEW ROUND TO START A NEW CONVERSATION."
},
"",
no_change_btn,
]
+ [no_change_btn] * 7
+ [""]
)

model_list = [states[i].model_name for i in range(num_sides)]

images = convert_images_to_conversation_format(images)
Expand All @@ -356,11 +378,12 @@ def add_text(
return (
states
+ [x.to_gradio_chatbot() for x in states]
+ [{"text": CONVERSATION_LIMIT_MSG}, "", no_change_btn]
+ [
{"text": CONVERSATION_LIMIT_MSG},
"",
no_change_btn,
]
* 7
+ [no_change_btn] * 7
+ [""]
)

Expand Down
8 changes: 8 additions & 0 deletions fastchat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,14 @@ def hash_pdf(file_path):
return hashlib.md5(file_content).hexdigest()


def get_pdf_num_page(file_path):
from pypdf import PdfReader

reader = PdfReader(file_path)

return len(reader.pages)


def image_moderation_request(image_bytes, endpoint, api_key):
headers = {"Content-Type": "image/jpeg", "Ocp-Apim-Subscription-Key": api_key}

Expand Down

0 comments on commit 32c6724

Please sign in to comment.