Add bucket quota; Versioned objects now count towards the object storage and size count usage

This commit is contained in:
2025-12-03 11:48:08 +08:00
parent 453ac6ea30
commit 9368715b16
8 changed files with 719 additions and 22 deletions

View File

@@ -75,6 +75,15 @@ class StorageError(RuntimeError):
"""Raised when the storage layer encounters an unrecoverable problem."""
class QuotaExceededError(StorageError):
"""Raised when an operation would exceed bucket quota limits."""
def __init__(self, message: str, quota: Dict[str, Any], usage: Dict[str, int]):
super().__init__(message)
self.quota = quota
self.usage = usage
@dataclass
class ObjectMeta:
key: str
@@ -169,16 +178,38 @@ class ObjectStorage:
object_count = 0
total_bytes = 0
version_count = 0
version_bytes = 0
# Count current objects in the bucket folder
for path in bucket_path.rglob("*"):
if path.is_file():
rel = path.relative_to(bucket_path)
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
if not rel.parts:
continue
stat = path.stat()
object_count += 1
total_bytes += stat.st_size
top_folder = rel.parts[0]
if top_folder not in self.INTERNAL_FOLDERS:
stat = path.stat()
object_count += 1
total_bytes += stat.st_size
stats = {"objects": object_count, "bytes": total_bytes}
# Count archived versions in the system folder
versions_root = self._bucket_versions_root(bucket_name)
if versions_root.exists():
for path in versions_root.rglob("*.bin"):
if path.is_file():
stat = path.stat()
version_count += 1
version_bytes += stat.st_size
stats = {
"objects": object_count,
"bytes": total_bytes,
"version_count": version_count,
"version_bytes": version_bytes,
"total_objects": object_count + version_count, # All objects including versions
"total_bytes": total_bytes + version_bytes, # All storage including versions
}
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
@@ -243,6 +274,7 @@ class ObjectStorage:
stream: BinaryIO,
*,
metadata: Optional[Dict[str, str]] = None,
enforce_quota: bool = True,
) -> ObjectMeta:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
@@ -253,12 +285,52 @@ class ObjectStorage:
destination = bucket_path / safe_key
destination.parent.mkdir(parents=True, exist_ok=True)
if self._is_versioning_enabled(bucket_path) and destination.exists():
# Check if this is an overwrite (won't add to object count)
is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0
if self._is_versioning_enabled(bucket_path) and is_overwrite:
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with destination.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
# Write to temp file first to get actual size
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
tmp_dir.mkdir(parents=True, exist_ok=True)
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
try:
checksum = hashlib.md5()
with tmp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
new_size = tmp_path.stat().st_size
# Check quota before finalizing
if enforce_quota:
# Calculate net change (new size minus size being replaced)
size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
# Move to final destination
shutil.move(str(tmp_path), str(destination))
finally:
# Clean up temp file if it still exists
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
stat = destination.stat()
if metadata:
@@ -424,6 +496,124 @@ class ObjectStorage:
bucket_path = self._require_bucket_path(bucket_name)
self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules)
def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]:
"""Get quota configuration for bucket.
Returns:
Dict with 'max_bytes' and 'max_objects' (None if unlimited).
"""
bucket_path = self._require_bucket_path(bucket_name)
config = self._read_bucket_config(bucket_path.name)
quota = config.get("quota")
if isinstance(quota, dict):
return {
"max_bytes": quota.get("max_bytes"),
"max_objects": quota.get("max_objects"),
}
return {"max_bytes": None, "max_objects": None}
def set_bucket_quota(
self,
bucket_name: str,
*,
max_bytes: Optional[int] = None,
max_objects: Optional[int] = None,
) -> None:
"""Set quota limits for a bucket.
Args:
bucket_name: Name of the bucket
max_bytes: Maximum total size in bytes (None to remove limit)
max_objects: Maximum number of objects (None to remove limit)
"""
bucket_path = self._require_bucket_path(bucket_name)
if max_bytes is None and max_objects is None:
# Remove quota entirely
self._set_bucket_config_entry(bucket_path.name, "quota", None)
return
quota: Dict[str, Any] = {}
if max_bytes is not None:
if max_bytes < 0:
raise StorageError("max_bytes must be non-negative")
quota["max_bytes"] = max_bytes
if max_objects is not None:
if max_objects < 0:
raise StorageError("max_objects must be non-negative")
quota["max_objects"] = max_objects
self._set_bucket_config_entry(bucket_path.name, "quota", quota)
def check_quota(
self,
bucket_name: str,
additional_bytes: int = 0,
additional_objects: int = 0,
) -> Dict[str, Any]:
"""Check if an operation would exceed bucket quota.
Args:
bucket_name: Name of the bucket
additional_bytes: Bytes that would be added
additional_objects: Objects that would be added
Returns:
Dict with 'allowed' (bool), 'quota' (current limits),
'usage' (current usage), and 'message' (if not allowed).
"""
quota = self.get_bucket_quota(bucket_name)
if not quota:
return {
"allowed": True,
"quota": None,
"usage": None,
"message": None,
}
# Get current stats (uses cache when available)
stats = self.bucket_stats(bucket_name)
# Use totals which include versions for quota enforcement
current_bytes = stats.get("total_bytes", stats.get("bytes", 0))
current_objects = stats.get("total_objects", stats.get("objects", 0))
result = {
"allowed": True,
"quota": quota,
"usage": {
"bytes": current_bytes,
"objects": current_objects,
"version_count": stats.get("version_count", 0),
"version_bytes": stats.get("version_bytes", 0),
},
"message": None,
}
max_bytes_limit = quota.get("max_bytes")
max_objects = quota.get("max_objects")
if max_bytes_limit is not None:
projected_bytes = current_bytes + additional_bytes
if projected_bytes > max_bytes_limit:
result["allowed"] = False
result["message"] = (
f"Quota exceeded: adding {additional_bytes} bytes would result in "
f"{projected_bytes} bytes, exceeding limit of {max_bytes_limit} bytes"
)
return result
if max_objects is not None:
projected_objects = current_objects + additional_objects
if projected_objects > max_objects:
result["allowed"] = False
result["message"] = (
f"Quota exceeded: adding {additional_objects} objects would result in "
f"{projected_objects} objects, exceeding limit of {max_objects} objects"
)
return result
return result
def get_object_tags(self, bucket_name: str, object_key: str) -> List[Dict[str, str]]:
"""Get tags for an object."""
bucket_path = self._bucket_path(bucket_name)
@@ -540,6 +730,7 @@ class ObjectStorage:
else:
self._delete_metadata(bucket_id, safe_key)
stat = destination.stat()
self._invalidate_bucket_stats_cache(bucket_id)
return ObjectMeta(
key=safe_key.as_posix(),
size=stat.st_size,
@@ -688,6 +879,7 @@ class ObjectStorage:
bucket_name: str,
upload_id: str,
ordered_parts: List[Dict[str, Any]],
enforce_quota: bool = True,
) -> ObjectMeta:
if not ordered_parts:
raise StorageError("parts list required")
@@ -698,6 +890,7 @@ class ObjectStorage:
if not parts_map:
raise StorageError("No uploaded parts found")
validated: List[tuple[int, Dict[str, Any]]] = []
total_size = 0
for part in ordered_parts:
raw_number = part.get("part_number")
if raw_number is None:
@@ -717,10 +910,33 @@ class ObjectStorage:
if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]:
raise StorageError(f"ETag mismatch for part {number}")
validated.append((number, record))
total_size += record.get("size", 0)
validated.sort(key=lambda entry: entry[0])
safe_key = self._sanitize_object_key(manifest["object_key"])
destination = bucket_path / safe_key
# Check if this is an overwrite
is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0
# Check quota before writing
if enforce_quota:
size_delta = total_size - existing_size
object_delta = 0 if is_overwrite else 1
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
destination.parent.mkdir(parents=True, exist_ok=True)
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"