Add bucket quota; Versioned objects now count towards the object storage and size count usage

This commit is contained in:
2025-12-03 11:48:08 +08:00
parent 453ac6ea30
commit 9368715b16
8 changed files with 719 additions and 22 deletions

View File

@@ -266,5 +266,11 @@ class EncryptedObjectStorage:
def list_multipart_parts(self, bucket_name: str, upload_id: str):
return self.storage.list_multipart_parts(bucket_name, upload_id)
def get_bucket_quota(self, bucket_name: str):
return self.storage.get_bucket_quota(bucket_name)
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
def _compute_etag(self, path: Path) -> str:
return self.storage._compute_etag(path)

View File

@@ -129,6 +129,25 @@ class EntityTooLargeError(AppError):
status_code: int = 413
@dataclass
class QuotaExceededAppError(AppError):
"""Bucket quota exceeded."""
code: str = "QuotaExceeded"
message: str = "The bucket quota has been exceeded"
status_code: int = 403
quota: Optional[Dict[str, Any]] = None
usage: Optional[Dict[str, int]] = None
def __post_init__(self):
if self.quota or self.usage:
self.details = {}
if self.quota:
self.details["quota"] = self.quota
if self.usage:
self.details["usage"] = self.usage
super().__post_init__()
def handle_app_error(error: AppError) -> Response:
"""Handle application errors with appropriate response format."""
log_extra = {"error_code": error.code}
@@ -163,5 +182,6 @@ def register_error_handlers(app):
ObjectNotFoundError, InvalidObjectKeyError,
AccessDeniedError, InvalidCredentialsError,
MalformedRequestError, InvalidArgumentError, EntityTooLargeError,
QuotaExceededAppError,
]:
app.register_error_handler(error_class, handle_app_error)

View File

@@ -18,7 +18,7 @@ from .bucket_policies import BucketPolicyStore
from .extensions import limiter
from .iam import IamError, Principal
from .replication import ReplicationManager
from .storage import ObjectStorage, StorageError
from .storage import ObjectStorage, StorageError, QuotaExceededError
s3_api_bp = Blueprint("s3_api", __name__)
@@ -803,6 +803,7 @@ def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
"acl": _bucket_acl_handler,
"versions": _bucket_list_versions_handler,
"lifecycle": _bucket_lifecycle_handler,
"quota": _bucket_quota_handler,
}
requested = [key for key in handlers if key in request.args]
if not requested:
@@ -1400,6 +1401,87 @@ def _parse_lifecycle_config(payload: bytes) -> list:
return rules
def _bucket_quota_handler(bucket_name: str) -> Response:
"""Handle bucket quota configuration (GET/PUT/DELETE /<bucket>?quota)."""
if request.method not in {"GET", "PUT", "DELETE"}:
return _method_not_allowed(["GET", "PUT", "DELETE"])
principal, error = _require_principal()
if error:
return error
try:
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
if not storage.bucket_exists(bucket_name):
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
if request.method == "GET":
quota = storage.get_bucket_quota(bucket_name)
if not quota:
return _error_response("NoSuchQuotaConfiguration", "No quota configuration found", 404)
# Return as JSON for simplicity (not a standard S3 API)
stats = storage.bucket_stats(bucket_name)
return jsonify({
"quota": quota,
"usage": {
"bytes": stats.get("bytes", 0),
"objects": stats.get("objects", 0),
}
})
if request.method == "DELETE":
try:
storage.set_bucket_quota(bucket_name, max_size_bytes=None, max_objects=None)
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
current_app.logger.info("Bucket quota deleted", extra={"bucket": bucket_name})
return Response(status=204)
# PUT
payload = request.get_json(silent=True)
if not payload:
return _error_response("MalformedRequest", "Request body must be JSON with quota limits", 400)
max_size_bytes = payload.get("max_size_bytes")
max_objects = payload.get("max_objects")
if max_size_bytes is None and max_objects is None:
return _error_response("InvalidArgument", "At least one of max_size_bytes or max_objects is required", 400)
# Validate types
if max_size_bytes is not None:
try:
max_size_bytes = int(max_size_bytes)
if max_size_bytes < 0:
raise ValueError("must be non-negative")
except (TypeError, ValueError) as exc:
return _error_response("InvalidArgument", f"max_size_bytes {exc}", 400)
if max_objects is not None:
try:
max_objects = int(max_objects)
if max_objects < 0:
raise ValueError("must be non-negative")
except (TypeError, ValueError) as exc:
return _error_response("InvalidArgument", f"max_objects {exc}", 400)
try:
storage.set_bucket_quota(bucket_name, max_size_bytes=max_size_bytes, max_objects=max_objects)
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
current_app.logger.info(
"Bucket quota updated",
extra={"bucket": bucket_name, "max_size_bytes": max_size_bytes, "max_objects": max_objects}
)
return Response(status=204)
def _bulk_delete_handler(bucket_name: str) -> Response:
principal, error = _require_principal()
if error:
@@ -1749,6 +1831,8 @@ def object_handler(bucket_name: str, object_key: str):
stream,
metadata=metadata or None,
)
except QuotaExceededError as exc:
return _error_response("QuotaExceeded", str(exc), 403)
except StorageError as exc:
message = str(exc)
if "Bucket" in message:
@@ -2256,6 +2340,8 @@ def _complete_multipart_upload(bucket_name: str, object_key: str) -> Response:
try:
meta = _storage().complete_multipart_upload(bucket_name, upload_id, parts)
except QuotaExceededError as exc:
return _error_response("QuotaExceeded", str(exc), 403)
except StorageError as exc:
if "NoSuchBucket" in str(exc):
return _error_response("NoSuchBucket", str(exc), 404)

View File

@@ -75,6 +75,15 @@ class StorageError(RuntimeError):
"""Raised when the storage layer encounters an unrecoverable problem."""
class QuotaExceededError(StorageError):
"""Raised when an operation would exceed bucket quota limits."""
def __init__(self, message: str, quota: Dict[str, Any], usage: Dict[str, int]):
super().__init__(message)
self.quota = quota
self.usage = usage
@dataclass
class ObjectMeta:
key: str
@@ -169,16 +178,38 @@ class ObjectStorage:
object_count = 0
total_bytes = 0
version_count = 0
version_bytes = 0
# Count current objects in the bucket folder
for path in bucket_path.rglob("*"):
if path.is_file():
rel = path.relative_to(bucket_path)
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
if not rel.parts:
continue
stat = path.stat()
object_count += 1
total_bytes += stat.st_size
top_folder = rel.parts[0]
if top_folder not in self.INTERNAL_FOLDERS:
stat = path.stat()
object_count += 1
total_bytes += stat.st_size
stats = {"objects": object_count, "bytes": total_bytes}
# Count archived versions in the system folder
versions_root = self._bucket_versions_root(bucket_name)
if versions_root.exists():
for path in versions_root.rglob("*.bin"):
if path.is_file():
stat = path.stat()
version_count += 1
version_bytes += stat.st_size
stats = {
"objects": object_count,
"bytes": total_bytes,
"version_count": version_count,
"version_bytes": version_bytes,
"total_objects": object_count + version_count, # All objects including versions
"total_bytes": total_bytes + version_bytes, # All storage including versions
}
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
@@ -243,6 +274,7 @@ class ObjectStorage:
stream: BinaryIO,
*,
metadata: Optional[Dict[str, str]] = None,
enforce_quota: bool = True,
) -> ObjectMeta:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
@@ -253,12 +285,52 @@ class ObjectStorage:
destination = bucket_path / safe_key
destination.parent.mkdir(parents=True, exist_ok=True)
if self._is_versioning_enabled(bucket_path) and destination.exists():
# Check if this is an overwrite (won't add to object count)
is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0
if self._is_versioning_enabled(bucket_path) and is_overwrite:
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with destination.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
# Write to temp file first to get actual size
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
tmp_dir.mkdir(parents=True, exist_ok=True)
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
try:
checksum = hashlib.md5()
with tmp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
new_size = tmp_path.stat().st_size
# Check quota before finalizing
if enforce_quota:
# Calculate net change (new size minus size being replaced)
size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
# Move to final destination
shutil.move(str(tmp_path), str(destination))
finally:
# Clean up temp file if it still exists
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
stat = destination.stat()
if metadata:
@@ -424,6 +496,124 @@ class ObjectStorage:
bucket_path = self._require_bucket_path(bucket_name)
self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules)
def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]:
"""Get quota configuration for bucket.
Returns:
Dict with 'max_bytes' and 'max_objects' (None if unlimited).
"""
bucket_path = self._require_bucket_path(bucket_name)
config = self._read_bucket_config(bucket_path.name)
quota = config.get("quota")
if isinstance(quota, dict):
return {
"max_bytes": quota.get("max_bytes"),
"max_objects": quota.get("max_objects"),
}
return {"max_bytes": None, "max_objects": None}
def set_bucket_quota(
self,
bucket_name: str,
*,
max_bytes: Optional[int] = None,
max_objects: Optional[int] = None,
) -> None:
"""Set quota limits for a bucket.
Args:
bucket_name: Name of the bucket
max_bytes: Maximum total size in bytes (None to remove limit)
max_objects: Maximum number of objects (None to remove limit)
"""
bucket_path = self._require_bucket_path(bucket_name)
if max_bytes is None and max_objects is None:
# Remove quota entirely
self._set_bucket_config_entry(bucket_path.name, "quota", None)
return
quota: Dict[str, Any] = {}
if max_bytes is not None:
if max_bytes < 0:
raise StorageError("max_bytes must be non-negative")
quota["max_bytes"] = max_bytes
if max_objects is not None:
if max_objects < 0:
raise StorageError("max_objects must be non-negative")
quota["max_objects"] = max_objects
self._set_bucket_config_entry(bucket_path.name, "quota", quota)
def check_quota(
self,
bucket_name: str,
additional_bytes: int = 0,
additional_objects: int = 0,
) -> Dict[str, Any]:
"""Check if an operation would exceed bucket quota.
Args:
bucket_name: Name of the bucket
additional_bytes: Bytes that would be added
additional_objects: Objects that would be added
Returns:
Dict with 'allowed' (bool), 'quota' (current limits),
'usage' (current usage), and 'message' (if not allowed).
"""
quota = self.get_bucket_quota(bucket_name)
if not quota:
return {
"allowed": True,
"quota": None,
"usage": None,
"message": None,
}
# Get current stats (uses cache when available)
stats = self.bucket_stats(bucket_name)
# Use totals which include versions for quota enforcement
current_bytes = stats.get("total_bytes", stats.get("bytes", 0))
current_objects = stats.get("total_objects", stats.get("objects", 0))
result = {
"allowed": True,
"quota": quota,
"usage": {
"bytes": current_bytes,
"objects": current_objects,
"version_count": stats.get("version_count", 0),
"version_bytes": stats.get("version_bytes", 0),
},
"message": None,
}
max_bytes_limit = quota.get("max_bytes")
max_objects = quota.get("max_objects")
if max_bytes_limit is not None:
projected_bytes = current_bytes + additional_bytes
if projected_bytes > max_bytes_limit:
result["allowed"] = False
result["message"] = (
f"Quota exceeded: adding {additional_bytes} bytes would result in "
f"{projected_bytes} bytes, exceeding limit of {max_bytes_limit} bytes"
)
return result
if max_objects is not None:
projected_objects = current_objects + additional_objects
if projected_objects > max_objects:
result["allowed"] = False
result["message"] = (
f"Quota exceeded: adding {additional_objects} objects would result in "
f"{projected_objects} objects, exceeding limit of {max_objects} objects"
)
return result
return result
def get_object_tags(self, bucket_name: str, object_key: str) -> List[Dict[str, str]]:
"""Get tags for an object."""
bucket_path = self._bucket_path(bucket_name)
@@ -540,6 +730,7 @@ class ObjectStorage:
else:
self._delete_metadata(bucket_id, safe_key)
stat = destination.stat()
self._invalidate_bucket_stats_cache(bucket_id)
return ObjectMeta(
key=safe_key.as_posix(),
size=stat.st_size,
@@ -688,6 +879,7 @@ class ObjectStorage:
bucket_name: str,
upload_id: str,
ordered_parts: List[Dict[str, Any]],
enforce_quota: bool = True,
) -> ObjectMeta:
if not ordered_parts:
raise StorageError("parts list required")
@@ -698,6 +890,7 @@ class ObjectStorage:
if not parts_map:
raise StorageError("No uploaded parts found")
validated: List[tuple[int, Dict[str, Any]]] = []
total_size = 0
for part in ordered_parts:
raw_number = part.get("part_number")
if raw_number is None:
@@ -717,10 +910,33 @@ class ObjectStorage:
if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]:
raise StorageError(f"ETag mismatch for part {number}")
validated.append((number, record))
total_size += record.get("size", 0)
validated.sort(key=lambda entry: entry[0])
safe_key = self._sanitize_object_key(manifest["object_key"])
destination = bucket_path / safe_key
# Check if this is an overwrite
is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0
# Check quota before writing
if enforce_quota:
size_delta = total_size - existing_size
object_delta = 0 if is_overwrite else 1
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
destination.parent.mkdir(parents=True, exist_ok=True)
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"

View File

@@ -260,9 +260,9 @@ def buckets_overview():
visible_buckets.append({
"meta": bucket,
"summary": {
"objects": stats["objects"],
"total_bytes": stats["bytes"],
"human_size": _format_bytes(stats["bytes"]),
"objects": stats["total_objects"],
"total_bytes": stats["total_bytes"],
"human_size": _format_bytes(stats["total_bytes"]),
},
"access_label": access_label,
"access_badge": access_badge,
@@ -372,6 +372,16 @@ def bucket_detail(bucket_name: str):
encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
can_manage_encryption = can_manage_versioning # Same as other bucket properties
# Quota settings (admin only)
bucket_quota = storage.get_bucket_quota(bucket_name)
bucket_stats = storage.bucket_stats(bucket_name)
can_manage_quota = False
try:
_iam().authorize(principal, None, "iam:list_users")
can_manage_quota = True
except IamError:
pass
return render_template(
"bucket_detail.html",
bucket_name=bucket_name,
@@ -392,6 +402,9 @@ def bucket_detail(bucket_name: str):
kms_keys=kms_keys,
kms_enabled=kms_enabled,
encryption_enabled=encryption_enabled,
bucket_quota=bucket_quota,
bucket_stats=bucket_stats,
can_manage_quota=can_manage_quota,
)
@@ -925,6 +938,71 @@ def update_bucket_versioning(bucket_name: str):
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
@ui_bp.post("/buckets/<bucket_name>/quota")
def update_bucket_quota(bucket_name: str):
"""Update bucket quota configuration (admin only)."""
principal = _current_principal()
# Quota management is admin-only
is_admin = False
try:
_iam().authorize(principal, None, "iam:list_users")
is_admin = True
except IamError:
pass
if not is_admin:
flash("Only administrators can manage bucket quotas", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
action = request.form.get("action", "set")
if action == "remove":
try:
_storage().set_bucket_quota(bucket_name, max_bytes=None, max_objects=None)
flash("Bucket quota removed", "info")
except StorageError as exc:
flash(_friendly_error_message(exc), "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Parse quota values
max_mb_str = request.form.get("max_mb", "").strip()
max_objects_str = request.form.get("max_objects", "").strip()
max_bytes = None
max_objects = None
if max_mb_str:
try:
max_mb = int(max_mb_str)
if max_mb < 1:
raise ValueError("Size must be at least 1 MB")
max_bytes = max_mb * 1024 * 1024 # Convert MB to bytes
except ValueError as exc:
flash(f"Invalid size value: {exc}", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
if max_objects_str:
try:
max_objects = int(max_objects_str)
if max_objects < 0:
raise ValueError("Object count must be non-negative")
except ValueError as exc:
flash(f"Invalid object count: {exc}", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
try:
_storage().set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
if max_bytes is None and max_objects is None:
flash("Bucket quota removed", "info")
else:
flash("Bucket quota updated", "success")
except StorageError as exc:
flash(_friendly_error_message(exc), "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
@ui_bp.post("/buckets/<bucket_name>/encryption")
def update_bucket_encryption(bucket_name: str):
"""Update bucket default encryption configuration."""
@@ -1438,13 +1516,16 @@ def metrics_dashboard():
total_objects = 0
total_bytes_used = 0
total_versions = 0
# Note: Uses cached stats from storage layer to improve performance
cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
for bucket in buckets:
stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl)
total_objects += stats["objects"]
total_bytes_used += stats["bytes"]
# Use totals which include archived versions
total_objects += stats.get("total_objects", stats.get("objects", 0))
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
total_versions += stats.get("version_count", 0)
return render_template(
"metrics.html",
@@ -1465,6 +1546,7 @@ def metrics_dashboard():
app={
"buckets": total_buckets,
"objects": total_objects,
"versions": total_versions,
"storage_used": _format_bytes(total_bytes_used),
"storage_raw": total_bytes_used,
}