Add self-heal missing ETags and harden ETag index persistence
This commit is contained in:
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
@@ -33,7 +34,8 @@ except ImportError:
|
||||
_rc = None
|
||||
_HAS_RUST = False
|
||||
|
||||
# Platform-specific file locking
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if os.name == "nt":
|
||||
import msvcrt
|
||||
|
||||
@@ -1082,6 +1084,30 @@ class ObjectStorage:
|
||||
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
|
||||
return self._read_metadata(bucket_path.name, safe_key) or {}
|
||||
|
||||
def heal_missing_etag(self, bucket_name: str, object_key: str, etag: str) -> None:
|
||||
"""Persist a computed ETag back to metadata (self-heal on read)."""
|
||||
try:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
return
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
|
||||
existing = self._read_metadata(bucket_id, safe_key) or {}
|
||||
if existing.get("__etag__"):
|
||||
return
|
||||
existing["__etag__"] = etag
|
||||
self._write_metadata(bucket_id, safe_key, existing)
|
||||
with self._obj_cache_lock:
|
||||
cached = self._object_cache.get(bucket_id)
|
||||
if cached:
|
||||
obj = cached[0].get(safe_key.as_posix())
|
||||
if obj and not obj.etag:
|
||||
obj.etag = etag
|
||||
self._etag_index_dirty.add(bucket_id)
|
||||
self._schedule_etag_index_flush()
|
||||
except Exception:
|
||||
logger.warning("Failed to heal missing ETag for %s/%s", bucket_name, object_key)
|
||||
|
||||
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
|
||||
"""Remove empty parent directories in a background thread.
|
||||
|
||||
@@ -2366,12 +2392,10 @@ class ObjectStorage:
|
||||
index = {k: v.etag for k, v in objects.items() if v.etag}
|
||||
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
|
||||
try:
|
||||
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(etag_index_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(index, f)
|
||||
self._atomic_write_json(etag_index_path, index, sync=False)
|
||||
self._etag_index_mem[bucket_id] = (index, etag_index_path.stat().st_mtime)
|
||||
except OSError:
|
||||
pass
|
||||
logger.warning("Failed to flush etag index for bucket %s", bucket_id)
|
||||
|
||||
def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
|
||||
"""Pre-warm the object cache for specified buckets or all buckets.
|
||||
|
||||
Reference in New Issue
Block a user