Skip to content

Commit

Permalink
Fix S3FileStore / GoogleCloudFileStore directory list & deletion (All…
Browse files Browse the repository at this point in the history
…-Hands-AI#6449)

Co-authored-by: openhands <[email protected]>
  • Loading branch information
tofarr and openhands-agent authored Jan 27, 2025
1 parent 23348af commit c997495
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 28 deletions.
18 changes: 16 additions & 2 deletions openhands/storage/google_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,19 @@ def list(self, path: str) -> List[str]:
return list(blobs)

def delete(self, path: str) -> None:
blob = self.bucket.blob(path)
blob.delete()
# Sanitize path
if not path or path == '/':
path = ''
if path.endswith('/'):
path = path[:-1]

# Try to delete any child resources (Assume the path is a directory)
for blob in self.bucket.list_blobs(prefix=f'{path}/'):
blob.delete()

# Next try to delete item as a file
try:
blob = self.bucket.blob(path)
blob.delete()
except NotFound:
pass
67 changes: 42 additions & 25 deletions openhands/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,36 +66,53 @@ def read(self, path: str) -> str:
)

def list(self, path: str) -> list[str]:
if path and path != '/' and not path.endswith('/'):
if not path or path == '/':
path = ''
elif not path.endswith('/'):
path += '/'
try:
response = self.client.list_objects_v2(Bucket=self.bucket, Prefix=path)
# Check if 'Contents' exists in the response
if 'Contents' in response:
objects = [obj['Key'] for obj in response['Contents']]
return objects
else:
return list()
except botocore.exceptions.ClientError as e:
# Catch all S3-related errors
if e.response['Error']['Code'] == 'NoSuchBucket':
raise FileNotFoundError(
f"Error: The bucket '{self.bucket}' does not exist."
)
elif e.response['Error']['Code'] == 'AccessDenied':
raise FileNotFoundError(
f"Error: Access denied to bucket '{self.bucket}'."
)
else:
raise FileNotFoundError(f"Error: {e.response['Error']['Message']}")
except Exception as e:
raise FileNotFoundError(
f"Error: Failed to read from bucket '{self.bucket}' at path {path}: {e}"
)
# The delimiter logic screens out directories, so we can't use it. :(
# For example, given a structure:
# foo/bar/zap.txt
# foo/bar/bang.txt
# ping.txt
# prefix=None, delimiter="/" yields ["ping.txt"] # :(
# prefix="foo", delimiter="/" yields [] # :(
results = set()
prefix_len = len(path)
response = self.client.list_objects_v2(Bucket=self.bucket, Prefix=path)
contents = response.get('Contents')
if not contents:
return []
paths = [obj['Key'] for obj in response['Contents']]
for sub_path in paths:
if sub_path == path:
continue
try:
index = sub_path.index('/', prefix_len + 1)
if index != prefix_len:
results.add(sub_path[: index + 1])
except ValueError:
results.add(sub_path)
return list(results)

def delete(self, path: str) -> None:
try:
# Sanitize path
if not path or path == '/':
path = ''
if path.endswith('/'):
path = path[:-1]

# Try to delete any child resources (Assume the path is a directory)
response = self.client.list_objects_v2(
Bucket=self.bucket, Prefix=f'{path}/'
)
for content in response.get('Contents') or []:
self.client.delete_object(Bucket=self.bucket, Key=content['Key'])

# Next try to delete item as a file
self.client.delete_object(Bucket=self.bucket, Key=path)

except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == 'NoSuchBucket':
raise FileNotFoundError(
Expand Down
117 changes: 116 additions & 1 deletion tests/unit/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
import shutil
from abc import ABC
from dataclasses import dataclass, field
from io import StringIO
from io import BytesIO, StringIO
from typing import Dict, List, Optional
from unittest import TestCase
from unittest.mock import patch

import botocore.exceptions
from google.api_core.exceptions import NotFound

from openhands.storage.files import FileStore
from openhands.storage.google_cloud import GoogleCloudFileStore
from openhands.storage.local import LocalFileStore
from openhands.storage.memory import InMemoryFileStore
from openhands.storage.s3 import S3FileStore


class _StorageTest(ABC):
Expand Down Expand Up @@ -73,6 +77,35 @@ def test_deep_list(self):
store.delete('foo/bar/qux.txt')
store.delete('foo/bar/quux.txt')

def test_directory_deletion(self):
store = self.get_store()
# Create a directory structure
store.write('foo/bar/baz.txt', 'Hello, world!')
store.write('foo/bar/qux.txt', 'Hello, world!')
store.write('foo/other.txt', 'Hello, world!')
store.write('foo/bar/subdir/file.txt', 'Hello, world!')

# Verify initial structure
self.assertEqual(store.list(''), ['foo/'])
self.assertEqual(sorted(store.list('foo')), ['foo/bar/', 'foo/other.txt'])
self.assertEqual(
sorted(store.list('foo/bar')),
['foo/bar/baz.txt', 'foo/bar/qux.txt', 'foo/bar/subdir/'],
)

# Delete a directory
store.delete('foo/bar')

# Verify directory and its contents are gone, but other files remain
self.assertEqual(store.list(''), ['foo/'])
self.assertEqual(store.list('foo'), ['foo/other.txt'])

# Delete root directory
store.delete('foo')

# Verify everything is gone
self.assertEqual(store.list(''), [])


class TestLocalFileStore(TestCase, _StorageTest):
def setUp(self):
Expand All @@ -94,6 +127,12 @@ def setUp(self):
self.store = GoogleCloudFileStore('dear-liza')


class TestS3FileStore(TestCase, _StorageTest):
def setUp(self):
with patch('boto3.client', lambda service, **kwargs: _MockS3Client()):
self.store = S3FileStore('dear-liza')


# I would have liked to use cloud-storage-mocker here but the python versions were incompatible :(
# If we write tests for the S3 storage class I would definitely recommend we use moto.
class _MockGoogleCloudClient:
Expand Down Expand Up @@ -131,6 +170,8 @@ def open(self, op: str):
return _MockGoogleCloudBlobWriter(self)

def delete(self):
if self.name not in self.bucket.blobs_by_path:
raise NotFound('Blob not found')
del self.bucket.blobs_by_path[self.name]


Expand All @@ -152,3 +193,77 @@ def __exit__(self, exc_type, exc_val, exc_tb):
blob = self.blob
blob.content = self.content
blob.bucket.blobs_by_path[blob.name] = blob


class _MockS3Client:
def __init__(self):
self.objects_by_bucket: Dict[str, Dict[str, _MockS3Object]] = {}

def put_object(self, Bucket: str, Key: str, Body: str | bytes) -> None:
if Bucket not in self.objects_by_bucket:
self.objects_by_bucket[Bucket] = {}
self.objects_by_bucket[Bucket][Key] = _MockS3Object(Key, Body)

def get_object(self, Bucket: str, Key: str) -> Dict:
if Bucket not in self.objects_by_bucket:
raise botocore.exceptions.ClientError(
{
'Error': {
'Code': 'NoSuchBucket',
'Message': f"The bucket '{Bucket}' does not exist",
}
},
'GetObject',
)
if Key not in self.objects_by_bucket[Bucket]:
raise botocore.exceptions.ClientError(
{
'Error': {
'Code': 'NoSuchKey',
'Message': f"The specified key '{Key}' does not exist",
}
},
'GetObject',
)
content = self.objects_by_bucket[Bucket][Key].content
if isinstance(content, bytes):
return {'Body': BytesIO(content)}
return {'Body': StringIO(content)}

def list_objects_v2(self, Bucket: str, Prefix: str = '') -> Dict:
if Bucket not in self.objects_by_bucket:
raise botocore.exceptions.ClientError(
{
'Error': {
'Code': 'NoSuchBucket',
'Message': f"The bucket '{Bucket}' does not exist",
}
},
'ListObjectsV2',
)
objects = self.objects_by_bucket[Bucket]
contents = [
{'Key': key}
for key in objects.keys()
if not Prefix or key.startswith(Prefix)
]
return {'Contents': contents} if contents else {}

def delete_object(self, Bucket: str, Key: str) -> None:
if Bucket not in self.objects_by_bucket:
raise botocore.exceptions.ClientError(
{
'Error': {
'Code': 'NoSuchBucket',
'Message': f"The bucket '{Bucket}' does not exist",
}
},
'DeleteObject',
)
self.objects_by_bucket[Bucket].pop(Key, None)


@dataclass
class _MockS3Object:
key: str
content: str | bytes

0 comments on commit c997495

Please sign in to comment.