Skip to content

Commit

Permalink
Add API endpoint to recalculate org storage (#1943)
Browse files Browse the repository at this point in the history
Fixes #1942 

This process might be a bit slow for large orgs, may consider moving it to background job in #1898.
  • Loading branch information
tw4l authored Jul 20, 2024
1 parent 6ccaad2 commit 2237120
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 1 deletion.
30 changes: 29 additions & 1 deletion backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os
from datetime import timedelta
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
from uuid import UUID
import urllib.parse

Expand Down Expand Up @@ -797,6 +797,34 @@ async def get_all_crawl_search_values(
"firstSeeds": list(first_seeds),
}

async def calculate_org_crawl_file_storage(
self, oid: UUID, type_: Optional[str] = None
) -> Tuple[int, int, int]:
"""Calculate and return total size of crawl files in org.
Returns tuple of (total, crawls only, uploads only)
"""
total_size = 0
crawls_size = 0
uploads_size = 0

cursor = self.crawls.find({"oid": oid})
async for crawl_dict in cursor:
files = crawl_dict.get("files", [])
type_ = crawl_dict.get("type")

item_size = 0
for file_ in files:
item_size += file_.get("size", 0)

total_size += item_size
if type_ == "crawl":
crawls_size += item_size
if type_ == "upload":
uploads_size += item_size

return total_size, crawls_size, uploads_size


# ============================================================================
def init_base_crawls_api(app, user_dep, *args):
Expand Down
40 changes: 40 additions & 0 deletions backend/btrixcloud/orgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
UpdatedResponse,
AddedResponse,
AddedResponseId,
SuccessResponse,
OrgInviteResponse,
OrgAcceptInviteResponse,
OrgDeleteInviteResponse,
Expand Down Expand Up @@ -1319,6 +1320,39 @@ async def delete_org_and_data(
# Delete org
await self.orgs.delete_one({"_id": org.id})

async def recalculate_storage(self, org: Organization) -> dict[str, bool]:
"""Recalculate org storage use"""
try:
total_crawl_size, crawl_size, upload_size = (
await self.base_crawl_ops.calculate_org_crawl_file_storage(
org.id,
)
)
profile_size = await self.profile_ops.calculate_org_profile_file_storage(
org.id
)

org_size = total_crawl_size + profile_size

await self.orgs.find_one_and_update(
{"_id": org.id},
{
"$set": {
"bytesStored": org_size,
"bytesStoredCrawls": crawl_size,
"bytesStoredUploads": upload_size,
"bytesStoredProfiles": profile_size,
}
},
)
# pylint: disable=broad-exception-caught, raise-missing-from
except Exception as err:
raise HTTPException(
status_code=400, detail=f"Error calculating size: {err}"
)

return {"success": True}


# ============================================================================
# pylint: disable=too-many-statements, too-many-arguments
Expand Down Expand Up @@ -1534,6 +1568,12 @@ async def set_role(

return {"updated": True}

@router.post(
"/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
)
async def recalculate_org_storage(org: Organization = Depends(org_owner_dep)):
return await ops.recalculate_storage(org)

@router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
async def invite_user_to_org(
invite: InviteToOrgRequest,
Expand Down
12 changes: 12 additions & 0 deletions backend/btrixcloud/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,18 @@ async def add_profile_file_replica(
{"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
)

async def calculate_org_profile_file_storage(self, oid: UUID) -> int:
"""Calculate and return total size of profile files in org"""
total_size = 0

cursor = self.profiles.find({"oid": oid})
async for profile_dict in cursor:
file_ = profile_dict.get("resource")
if file_:
total_size += file_.get("size", 0)

return total_size


# ============================================================================
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
Expand Down
23 changes: 23 additions & 0 deletions backend/test/test_z_delete_org.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
from .conftest import API_PREFIX


def test_recalculate_org_storage(admin_auth_headers, default_org_id):
# Prior to deleting org, ensure recalculating storage works now that
# resources of all types have been created.
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/recalculate-storage",
headers=admin_auth_headers,
)
assert r.status_code == 200
assert r.json()["success"]

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()

assert data["bytesStored"] > 0
assert data["bytesStoredCrawls"] > 0
assert data["bytesStoredUploads"] > 0
assert data["bytesStoredProfiles"] > 0


def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
# Assert that non-superadmin can't delete org
r = requests.delete(
Expand Down

0 comments on commit 2237120

Please sign in to comment.