Debug replication corruption issue

This commit is contained in:
2025-11-22 12:11:41 +08:00
parent 840fd176d3
commit 471cf5a305
5 changed files with 386 additions and 55 deletions

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import logging
import mimetypes
import threading
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
@@ -10,9 +11,10 @@ from typing import Dict, Optional
import boto3
from botocore.exceptions import ClientError
from boto3.exceptions import S3UploadFailedError
from .connections import ConnectionStore, RemoteConnection
from .storage import ObjectStorage
from .storage import ObjectStorage, StorageError
logger = logging.getLogger(__name__)
@@ -116,21 +118,73 @@ class ReplicationManager:
# We need the file content.
# Since ObjectStorage is filesystem based, let's get the stream.
# We need to be careful about closing it.
meta = self.storage.get_object_meta(bucket_name, object_key)
if not meta:
try:
path = self.storage.get_object_path(bucket_name, object_key)
except StorageError:
return
with self.storage.open_object(bucket_name, object_key) as f:
extra_args = {}
if meta.metadata:
extra_args["Metadata"] = meta.metadata
s3.upload_fileobj(
f,
rule.target_bucket,
object_key,
ExtraArgs=extra_args
)
metadata = self.storage.get_object_metadata(bucket_name, object_key)
extra_args = {}
if metadata:
extra_args["Metadata"] = metadata
# Guess content type to prevent corruption/wrong handling
content_type, _ = mimetypes.guess_type(path)
file_size = path.stat().st_size
# Debug: Calculate MD5 of source file
import hashlib
md5_hash = hashlib.md5()
with path.open("rb") as f:
# Log first 32 bytes
header = f.read(32)
logger.info(f"Source first 32 bytes: {header.hex()}")
md5_hash.update(header)
for chunk in iter(lambda: f.read(4096), b""):
md5_hash.update(chunk)
source_md5 = md5_hash.hexdigest()
logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, MD5={source_md5}, ContentType={content_type}")
try:
with path.open("rb") as f:
s3.put_object(
Bucket=rule.target_bucket,
Key=object_key,
Body=f,
ContentLength=file_size,
ContentType=content_type or "application/octet-stream",
Metadata=metadata or {}
)
except (ClientError, S3UploadFailedError) as e:
# Check if it's a NoSuchBucket error (either direct or wrapped)
is_no_bucket = False
if isinstance(e, ClientError):
if e.response['Error']['Code'] == 'NoSuchBucket':
is_no_bucket = True
elif isinstance(e, S3UploadFailedError):
if "NoSuchBucket" in str(e):
is_no_bucket = True
if is_no_bucket:
logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.")
try:
s3.create_bucket(Bucket=rule.target_bucket)
# Retry upload
with path.open("rb") as f:
s3.put_object(
Bucket=rule.target_bucket,
Key=object_key,
Body=f,
ContentLength=file_size,
ContentType=content_type or "application/octet-stream",
Metadata=metadata or {}
)
except Exception as create_err:
logger.error(f"Failed to create target bucket {rule.target_bucket}: {create_err}")
raise e # Raise original error
else:
raise e
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")

View File

@@ -1078,7 +1078,13 @@ def object_handler(bucket_name: str, object_key: str):
_, error = _object_principal("write", bucket_name, object_key)
if error:
return error
stream = request.stream
# Debug: Log incoming request details
current_app.logger.info(f"Receiving PUT {bucket_name}/{object_key}")
current_app.logger.info(f"Headers: {dict(request.headers)}")
current_app.logger.info(f"Content-Length: {request.content_length}")
stream = DebugStream(request.stream, current_app.logger)
metadata = _extract_request_metadata()
try:
meta = storage.put_object(
@@ -1252,3 +1258,19 @@ def head_object(bucket_name: str, object_key: str) -> Response:
return _error_response("NoSuchKey", "Object not found", 404)
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
class DebugStream:
def __init__(self, stream, logger):
self.stream = stream
self.logger = logger
self.first_chunk = True
def read(self, size=-1):
chunk = self.stream.read(size)
if self.first_chunk and chunk:
# Log first 32 bytes
prefix = chunk[:32]
self.logger.info(f"Received first 32 bytes: {prefix.hex()}")
self.first_chunk = False
return chunk

View File

@@ -6,7 +6,9 @@ import uuid
from typing import Any
from urllib.parse import urlparse
import boto3
import requests
from botocore.exceptions import ClientError
from flask import (
Blueprint,
Response,
@@ -1070,6 +1072,73 @@ def create_connection():
return redirect(url_for("ui.connections_dashboard"))
@ui_bp.post("/connections/test")
def test_connection():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:list_users")
except IamError:
return jsonify({"status": "error", "message": "Access denied"}), 403
data = request.get_json(silent=True) or request.form
endpoint = data.get("endpoint_url", "").strip()
access_key = data.get("access_key", "").strip()
secret_key = data.get("secret_key", "").strip()
region = data.get("region", "us-east-1").strip()
if not all([endpoint, access_key, secret_key]):
return jsonify({"status": "error", "message": "Missing credentials"}), 400
try:
s3 = boto3.client(
"s3",
endpoint_url=endpoint,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
region_name=region,
)
# Try to list buckets to verify credentials and endpoint
s3.list_buckets()
return jsonify({"status": "ok", "message": "Connection successful"})
except Exception as e:
return jsonify({"status": "error", "message": str(e)}), 400
@ui_bp.post("/connections/<connection_id>/update")
def update_connection(connection_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:list_users")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.buckets_overview"))
conn = _connections().get(connection_id)
if not conn:
flash("Connection not found", "danger")
return redirect(url_for("ui.connections_dashboard"))
name = request.form.get("name", "").strip()
endpoint = request.form.get("endpoint_url", "").strip()
access_key = request.form.get("access_key", "").strip()
secret_key = request.form.get("secret_key", "").strip()
region = request.form.get("region", "us-east-1").strip()
if not all([name, endpoint, access_key, secret_key]):
flash("All fields are required", "danger")
return redirect(url_for("ui.connections_dashboard"))
conn.name = name
conn.endpoint_url = endpoint
conn.access_key = access_key
conn.secret_key = secret_key
conn.region = region
_connections().save()
flash(f"Connection '{name}' updated", "success")
return redirect(url_for("ui.connections_dashboard"))
@ui_bp.post("/connections/<connection_id>/delete")
def delete_connection(connection_id: str):
principal = _current_principal()
@@ -1105,16 +1174,6 @@ def update_bucket_replication(bucket_name: str):
if not target_conn_id or not target_bucket:
flash("Target connection and bucket are required", "danger")
else:
# Check if user wants to create the remote bucket
create_remote = request.form.get("create_remote_bucket") == "on"
if create_remote:
try:
_replication().create_remote_bucket(target_conn_id, target_bucket)
flash(f"Created remote bucket '{target_bucket}'", "success")
except Exception as e:
flash(f"Failed to create remote bucket: {e}", "warning")
# We continue to set the rule even if creation fails (maybe it exists?)
rule = ReplicationRule(
bucket_name=bucket_name,
target_connection_id=target_conn_id,