Skip to content

Commit

Permalink
Add additional filters to page list endpoints (#1622)
Browse files Browse the repository at this point in the history
Fixes #1617 

Filters added:

- reviewed: filter by page has approval or at least one note (true) or
neither (false)
- approved: filter by approval value (accepts list of strings,
comma-separated, each of which are coerced into True, False, or None, or
ignored if they are invalid values)
- hasNotes: filter by has at least one note (true) or not (false)

Tests have also been added to ensure that results are as expected.
  • Loading branch information
tw4l authored Mar 22, 2024
1 parent b3b1e0d commit e9895e7
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 3 deletions.
45 changes: 43 additions & 2 deletions backend/btrixcloud/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
PageNoteDelete,
)
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import from_k8s_date
from .utils import from_k8s_date, str_list_to_bools

if TYPE_CHECKING:
from .crawls import CrawlOps
Expand Down Expand Up @@ -360,13 +360,16 @@ async def list_pages(
qa_gt: Optional[float] = None,
qa_lte: Optional[float] = None,
qa_lt: Optional[float] = None,
reviewed: Optional[bool] = None,
approved: Optional[List[Union[bool, None]]] = None,
has_notes: Optional[bool] = None,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: Optional[str] = None,
sort_direction: Optional[int] = -1,
) -> Tuple[Union[List[PageOut], List[PageOutWithSingleQA]], int]:
"""List all pages in crawl"""
# pylint: disable=duplicate-code, too-many-locals, too-many-branches
# pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
# Zero-index page for query
page = page - 1
skip = page_size * page
Expand All @@ -377,6 +380,24 @@ async def list_pages(
if org:
query["oid"] = org.id

if reviewed:
query["$or"] = [
{"approved": {"$ne": None}},
{"notes.0": {"$exists": True}},
]

if reviewed is False:
query["$and"] = [
{"approved": {"$eq": None}},
{"notes.0": {"$exists": False}},
]

if approved:
query["approved"] = {"$in": approved}

if has_notes is not None:
query["notes.0"] = {"$exists": has_notes}

if qa_run_id:
query[f"qa.{qa_run_id}"] = {"$exists": True}

Expand Down Expand Up @@ -576,15 +597,25 @@ async def delete_page_notes(
async def get_pages_list(
crawl_id: str,
org: Organization = Depends(org_crawl_dep),
reviewed: Optional[bool] = None,
approved: Optional[str] = None,
hasNotes: Optional[bool] = None,
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: Optional[int] = -1,
):
"""Retrieve paginated list of pages"""
formatted_approved: Optional[List[Union[bool, None]]] = None
if approved:
formatted_approved = str_list_to_bools(approved.split(","))

pages, total = await ops.list_pages(
crawl_id=crawl_id,
org=org,
reviewed=reviewed,
approved=formatted_approved,
has_notes=hasNotes,
page_size=pageSize,
page=page,
sort_by=sortBy,
Expand All @@ -605,13 +636,20 @@ async def get_pages_list_with_qa(
gt: Optional[float] = None,
lte: Optional[float] = None,
lt: Optional[float] = None,
reviewed: Optional[bool] = None,
approved: Optional[str] = None,
hasNotes: Optional[bool] = None,
org: Organization = Depends(org_crawl_dep),
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: Optional[int] = -1,
):
"""Retrieve paginated list of pages"""
formatted_approved: Optional[List[Union[bool, None]]] = None
if approved:
formatted_approved = str_list_to_bools(approved.split(","))

pages, total = await ops.list_pages(
crawl_id=crawl_id,
org=org,
Expand All @@ -621,6 +659,9 @@ async def get_pages_list_with_qa(
qa_gt=gt,
qa_lte=lte,
qa_lt=lt,
reviewed=reviewed,
approved=formatted_approved,
has_notes=hasNotes,
page_size=pageSize,
page=page,
sort_by=sortBy,
Expand Down
22 changes: 21 additions & 1 deletion backend/btrixcloud/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,30 @@ def parse_jsonl_error_messages(errors):
def is_bool(stri: Optional[str]) -> bool:
"""Check if the string parameter is stringly true"""
if stri:
return stri.lower() in ("true", "1", "yes")
return stri.lower() in ("true", "1", "yes", "on")
return False


def is_falsy_bool(stri: Optional[str]) -> bool:
"""Check if the string parameter is stringly false"""
if stri:
return stri.lower() in ("false", "0", "no", "off")
return False


def str_list_to_bools(str_list: List[str], allow_none=True) -> List[Union[bool, None]]:
"""Return version of input string list cast to bool or None, ignoring other values"""
output: List[Union[bool, None]] = []
for val in str_list:
if is_bool(val):
output.append(True)
if is_falsy_bool(val):
output.append(False)
if val.lower() in ("none", "null") and allow_none:
output.append(None)
return output


def slug_from_name(name: str) -> str:
"""Generate slug from name"""
return slugify(name.replace("'", ""))
Expand Down
181 changes: 181 additions & 0 deletions backend/test/test_run_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,21 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
assert page.get("modified") is None
assert page.get("approved") is None

# Test reviewed filter (page has no notes or approved so should show up in false)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

# Update page with approval
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
Expand All @@ -470,6 +485,57 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
assert r.status_code == 200
assert r.json()["updated"]

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["approved"]

# Test approval filter
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True,False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=None",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

# Test reviewed filter (page now approved so should show up in True)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
Expand All @@ -490,6 +556,44 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
assert page["modified"]
assert page["approved"]

# Set approved to False and test filter again
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
json={
"approved": False,
},
)
assert r.status_code == 200

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True,False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=None",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0


def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
# Re-add pages and verify they were correctly added
Expand Down Expand Up @@ -565,6 +669,83 @@ def test_crawl_page_notes(crawler_auth_headers, default_org_id, crawler_crawl_id
assert first_note["userName"]
assert first_note["text"] == note_text

# Make sure page approval is set to None and re-test filters
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
json={
"approved": None,
},
)
assert r.status_code == 200
assert r.json()["updated"]

# Test approved filter
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True,False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=None",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=true,false,none",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

# Test reviewed filter (page now has notes so should show up in True)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

# Test notes filter
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?hasNotes=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?hasNotes=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

# Add second note to test selective updates/deletes
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}/notes",
Expand Down

0 comments on commit e9895e7

Please sign in to comment.