Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API endpoint to recalculate org storage #1943

Merged
merged 7 commits into from
Jul 20, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os
from datetime import timedelta
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
from uuid import UUID
import urllib.parse

Expand Down Expand Up @@ -797,6 +797,34 @@ async def get_all_crawl_search_values(
"firstSeeds": list(first_seeds),
}

async def calculate_org_crawl_file_storage(
self, oid: UUID, type_: Optional[str] = None
) -> Tuple[int, int, int]:
"""Calculate and return total size of crawl files in org.

Returns tuple of (total, crawls only, uploads only)
"""
total_size = 0
crawls_size = 0
uploads_size = 0

cursor = self.crawls.find({"oid": oid})
async for crawl_dict in cursor:
files = crawl_dict.get("files", [])
type_ = crawl_dict.get("type")

item_size = 0
for file_ in files:
item_size += file_.get("size", 0)

total_size += item_size
if type_ == "crawl":
crawls_size += item_size
if type_ == "upload":
uploads_size += item_size

return total_size, crawls_size, uploads_size


# ============================================================================
def init_base_crawls_api(app, user_dep, *args):
Expand Down
40 changes: 40 additions & 0 deletions backend/btrixcloud/orgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
UpdatedResponse,
AddedResponse,
AddedResponseId,
SuccessResponse,
OrgInviteResponse,
OrgAcceptInviteResponse,
OrgDeleteInviteResponse,
Expand Down Expand Up @@ -1313,6 +1314,39 @@ async def delete_org_and_data(
# Delete org
await self.orgs.delete_one({"_id": org.id})

async def recalculate_storage(self, org: Organization):
"""Recalculate org storage use"""
try:
total_crawl_size, crawl_size, upload_size = (
await self.base_crawl_ops.calculate_org_crawl_file_storage(
org.id,
)
)
profile_size = await self.profile_ops.calculate_org_profile_file_storage(
org.id
)

org_size = total_crawl_size + profile_size

await self.orgs.find_one_and_update(
{"_id": org.id},
{
"$set": {
"bytesStored": org_size,
"bytesStoredCrawls": crawl_size,
"bytesStoredUploads": upload_size,
"bytesStoredProfiles": profile_size,
}
},
)
# pylint: disable=broad-exception-caught, raise-missing-from
except Exception as err:
raise HTTPException(
status_code=400, detail=f"Error calculating size: {err}"
)

return {"success": True}


# ============================================================================
# pylint: disable=too-many-statements, too-many-arguments
Expand Down Expand Up @@ -1528,6 +1562,12 @@ async def set_role(

return {"updated": True}

@router.post(
"/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
)
async def recalculate_org_storage(org: Organization = Depends(org_owner_dep)):
return await ops.recalculate_storage(org)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first I thought maybe it should return the recalculated storage, but that would make it harder to convert to a bg job, so this makes sense.


@router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
async def invite_user_to_org(
invite: InviteToOrgRequest,
Expand Down
12 changes: 12 additions & 0 deletions backend/btrixcloud/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,18 @@ async def add_profile_file_replica(
{"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
)

async def calculate_org_profile_file_storage(self, oid: UUID) -> int:
"""Calculate and return total size of profile files in org"""
total_size = 0

cursor = self.profiles.find({"oid": oid})
async for profile_dict in cursor:
file_ = profile_dict.get("resource")
if file_:
total_size += file_.get("size", 0)

return total_size


# ============================================================================
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
Expand Down
23 changes: 23 additions & 0 deletions backend/test/test_z_delete_org.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
from .conftest import API_PREFIX


def test_recalculate_org_storage(admin_auth_headers, default_org_id):
# Prior to deleting org, ensure recalculating storage works now that
# resources of all types have been created.
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/recalculate-storage",
headers=admin_auth_headers,
)
assert r.status_code == 200
assert r.json()["success"]

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()

assert data["bytesStored"] > 0
assert data["bytesStoredCrawls"] > 0
assert data["bytesStoredUploads"] > 0
assert data["bytesStoredProfiles"] > 0


def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
# Assert that non-superadmin can't delete org
r = requests.delete(
Expand Down
Loading