Reduce CPU usage on heavy uploads: skip SHA256 body hashing in SigV4, use Rust md5_file post-write instead of per-chunk _HashingReader

This commit is contained in:
2026-02-27 21:57:13 +08:00
parent dfc0058d0d
commit 3827235232
3 changed files with 30 additions and 12 deletions

View File

@@ -293,9 +293,7 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
raise IamError("Required headers not signed")
canonical_uri = _get_canonical_uri(req)
payload_hash = req.headers.get("X-Amz-Content-Sha256")
if not payload_hash:
payload_hash = hashlib.sha256(req.get_data()).hexdigest()
payload_hash = req.headers.get("X-Amz-Content-Sha256") or "UNSIGNED-PAYLOAD"
if _HAS_RUST:
query_params = list(req.args.items(multi=True))

View File

@@ -844,10 +844,9 @@ class ObjectStorage:
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
try:
checksum = hashlib.md5()
with tmp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
shutil.copyfileobj(stream, target)
new_size = tmp_path.stat().st_size
size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1
@@ -865,8 +864,20 @@ class ObjectStorage:
quota_check["usage"],
)
if _HAS_RUST:
etag = _rc.md5_file(str(tmp_path))
else:
checksum = hashlib.md5()
with tmp_path.open("rb") as f:
while True:
chunk = f.read(1048576)
if not chunk:
break
checksum.update(chunk)
etag = checksum.hexdigest()
shutil.move(str(tmp_path), str(destination))
finally:
try:
tmp_path.unlink(missing_ok=True)
@@ -874,7 +885,6 @@ class ObjectStorage:
pass
stat = destination.stat()
etag = checksum.hexdigest()
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
combined_meta = {**internal_meta, **(metadata or {})}
@@ -1465,14 +1475,24 @@ class ObjectStorage:
if not upload_root.exists():
raise StorageError("Multipart upload not found")
checksum = hashlib.md5()
part_filename = f"part-{part_number:05d}.part"
part_path = upload_root / part_filename
temp_path = upload_root / f".{part_filename}.tmp"
try:
with temp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
shutil.copyfileobj(stream, target)
if _HAS_RUST:
part_etag = _rc.md5_file(str(temp_path))
else:
checksum = hashlib.md5()
with temp_path.open("rb") as f:
while True:
chunk = f.read(1048576)
if not chunk:
break
checksum.update(chunk)
part_etag = checksum.hexdigest()
temp_path.replace(part_path)
except OSError:
try:
@@ -1482,7 +1502,7 @@ class ObjectStorage:
raise
record = {
"etag": checksum.hexdigest(),
"etag": part_etag,
"size": part_path.stat().st_size,
"filename": part_filename,
}

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
APP_VERSION = "0.3.3"
APP_VERSION = "0.3.4"
def get_version() -> str: