Add self-heal missing ETags and harden ETag index persistence

This commit is contained in:
2026-03-31 21:10:47 +08:00
parent 3838aed954
commit c498fe7aee
2 changed files with 41 additions and 10 deletions

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import hashlib
import json
import logging
import os
import re
import shutil
@@ -33,7 +34,8 @@ except ImportError:
_rc = None
_HAS_RUST = False
# Platform-specific file locking
logger = logging.getLogger(__name__)
if os.name == "nt":
import msvcrt
@@ -1082,6 +1084,30 @@ class ObjectStorage:
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
return self._read_metadata(bucket_path.name, safe_key) or {}
def heal_missing_etag(self, bucket_name: str, object_key: str, etag: str) -> None:
"""Persist a computed ETag back to metadata (self-heal on read)."""
try:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
return
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
existing = self._read_metadata(bucket_id, safe_key) or {}
if existing.get("__etag__"):
return
existing["__etag__"] = etag
self._write_metadata(bucket_id, safe_key, existing)
with self._obj_cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
obj = cached[0].get(safe_key.as_posix())
if obj and not obj.etag:
obj.etag = etag
self._etag_index_dirty.add(bucket_id)
self._schedule_etag_index_flush()
except Exception:
logger.warning("Failed to heal missing ETag for %s/%s", bucket_name, object_key)
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
"""Remove empty parent directories in a background thread.
@@ -2366,12 +2392,10 @@ class ObjectStorage:
index = {k: v.etag for k, v in objects.items() if v.etag}
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
try:
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
with open(etag_index_path, 'w', encoding='utf-8') as f:
json.dump(index, f)
self._atomic_write_json(etag_index_path, index, sync=False)
self._etag_index_mem[bucket_id] = (index, etag_index_path.stat().st_mtime)
except OSError:
pass
logger.warning("Failed to flush etag index for bucket %s", bucket_id)
def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
"""Pre-warm the object cache for specified buckets or all buckets.