Fixed replication issue - clean up debug

This commit is contained in:
2025-12-23 13:37:51 +08:00
parent cd9734b398
commit b592fa9fdb
3 changed files with 34 additions and 53 deletions

View File

@@ -22,8 +22,8 @@ from .storage import ObjectStorage, StorageError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0" REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
REPLICATION_CONNECT_TIMEOUT = 5 # seconds to wait for connection REPLICATION_CONNECT_TIMEOUT = 5
REPLICATION_READ_TIMEOUT = 30 # seconds to wait for response REPLICATION_READ_TIMEOUT = 30
REPLICATION_MODE_NEW_ONLY = "new_only" REPLICATION_MODE_NEW_ONLY = "new_only"
REPLICATION_MODE_ALL = "all" REPLICATION_MODE_ALL = "all"
@@ -32,10 +32,10 @@ REPLICATION_MODE_ALL = "all"
@dataclass @dataclass
class ReplicationStats: class ReplicationStats:
"""Statistics for replication operations - computed dynamically.""" """Statistics for replication operations - computed dynamically."""
objects_synced: int = 0 # Objects that exist in both source and destination objects_synced: int = 0
objects_pending: int = 0 # Objects in source but not in destination objects_pending: int = 0
objects_orphaned: int = 0 # Objects in destination but not in source (will be deleted) objects_orphaned: int = 0
bytes_synced: int = 0 # Total bytes synced to destination bytes_synced: int = 0
last_sync_at: Optional[float] = None last_sync_at: Optional[float] = None
last_sync_key: Optional[str] = None last_sync_key: Optional[str] = None
@@ -85,7 +85,6 @@ class ReplicationRule:
@classmethod @classmethod
def from_dict(cls, data: dict) -> "ReplicationRule": def from_dict(cls, data: dict) -> "ReplicationRule":
stats_data = data.pop("stats", {}) stats_data = data.pop("stats", {})
# Handle old rules without mode/created_at
if "mode" not in data: if "mode" not in data:
data["mode"] = REPLICATION_MODE_NEW_ONLY data["mode"] = REPLICATION_MODE_NEW_ONLY
if "created_at" not in data: if "created_at" not in data:
@@ -134,7 +133,7 @@ class ReplicationManager:
user_agent_extra=REPLICATION_USER_AGENT, user_agent_extra=REPLICATION_USER_AGENT,
connect_timeout=REPLICATION_CONNECT_TIMEOUT, connect_timeout=REPLICATION_CONNECT_TIMEOUT,
read_timeout=REPLICATION_READ_TIMEOUT, read_timeout=REPLICATION_READ_TIMEOUT,
retries={'max_attempts': 1} # Don't retry for health checks retries={'max_attempts': 1}
) )
s3 = boto3.client( s3 = boto3.client(
"s3", "s3",
@@ -144,7 +143,6 @@ class ReplicationManager:
region_name=connection.region, region_name=connection.region,
config=config, config=config,
) )
# Simple list_buckets call to verify connectivity
s3.list_buckets() s3.list_buckets()
return True return True
except Exception as e: except Exception as e:
@@ -181,14 +179,12 @@ class ReplicationManager:
connection = self.connections.get(rule.target_connection_id) connection = self.connections.get(rule.target_connection_id)
if not connection: if not connection:
return rule.stats # Return cached stats if connection unavailable return rule.stats
try: try:
# Get source objects
source_objects = self.storage.list_objects_all(bucket_name) source_objects = self.storage.list_objects_all(bucket_name)
source_keys = {obj.key: obj.size for obj in source_objects} source_keys = {obj.key: obj.size for obj in source_objects}
# Get destination objects
s3 = boto3.client( s3 = boto3.client(
"s3", "s3",
endpoint_url=connection.endpoint_url, endpoint_url=connection.endpoint_url,
@@ -208,24 +204,18 @@ class ReplicationManager:
bytes_synced += obj.get('Size', 0) bytes_synced += obj.get('Size', 0)
except ClientError as e: except ClientError as e:
if e.response['Error']['Code'] == 'NoSuchBucket': if e.response['Error']['Code'] == 'NoSuchBucket':
# Destination bucket doesn't exist yet
dest_keys = set() dest_keys = set()
else: else:
raise raise
# Compute stats synced = source_keys.keys() & dest_keys
synced = source_keys.keys() & dest_keys # Objects in both orphaned = dest_keys - source_keys.keys()
orphaned = dest_keys - source_keys.keys() # In dest but not source
# For "new_only" mode, we can't determine pending since we don't know
# which objects existed before replication was enabled. Only "all" mode
# should show pending (objects that should be replicated but aren't yet).
if rule.mode == REPLICATION_MODE_ALL: if rule.mode == REPLICATION_MODE_ALL:
pending = source_keys.keys() - dest_keys # In source but not dest pending = source_keys.keys() - dest_keys
else: else:
pending = set() # New-only mode: don't show pre-existing as pending pending = set()
# Update cached stats with computed values
rule.stats.objects_synced = len(synced) rule.stats.objects_synced = len(synced)
rule.stats.objects_pending = len(pending) rule.stats.objects_pending = len(pending)
rule.stats.objects_orphaned = len(orphaned) rule.stats.objects_orphaned = len(orphaned)
@@ -235,7 +225,7 @@ class ReplicationManager:
except (ClientError, StorageError) as e: except (ClientError, StorageError) as e:
logger.error(f"Failed to compute sync status for {bucket_name}: {e}") logger.error(f"Failed to compute sync status for {bucket_name}: {e}")
return rule.stats # Return cached stats on error return rule.stats
def replicate_existing_objects(self, bucket_name: str) -> None: def replicate_existing_objects(self, bucket_name: str) -> None:
"""Trigger replication for all existing objects in a bucket.""" """Trigger replication for all existing objects in a bucket."""
@@ -248,7 +238,6 @@ class ReplicationManager:
logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found") logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found")
return return
# Check endpoint health before starting bulk replication
if not self.check_endpoint_health(connection): if not self.check_endpoint_health(connection):
logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable") logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
return return
@@ -290,7 +279,6 @@ class ReplicationManager:
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found") logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
return return
# Check endpoint health before attempting replication to prevent hangs
if not self.check_endpoint_health(connection): if not self.check_endpoint_health(connection):
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable") logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
return return
@@ -350,10 +338,8 @@ class ReplicationManager:
return return
# Don't replicate metadata - destination server will generate its own # Don't replicate metadata - destination server will generate its own
# __etag__ and __size__. Replicating them causes signature mismatches # __etag__ and __size__. Replicating them causes signature mismatches when they have None/empty values.
# when they have None/empty values.
# Guess content type to prevent corruption/wrong handling
content_type, _ = mimetypes.guess_type(path) content_type, _ = mimetypes.guess_type(path)
file_size = path.stat().st_size file_size = path.stat().st_size
@@ -375,14 +361,6 @@ class ReplicationManager:
} }
if content_type: if content_type:
put_kwargs["ContentType"] = content_type put_kwargs["ContentType"] = content_type
# Metadata is not replicated - destination generates its own
# Debug logging for signature issues
logger.debug(f"PUT request details: bucket={rule.target_bucket}, key={repr(object_key)}, "
f"content_type={content_type}, body_len={len(file_content)}, "
f"endpoint={conn.endpoint_url}")
logger.debug(f"Key bytes: {object_key.encode('utf-8')}")
s3.put_object(**put_kwargs) s3.put_object(**put_kwargs)
try: try:
@@ -395,7 +373,6 @@ class ReplicationManager:
if "NoSuchBucket" in str(e): if "NoSuchBucket" in str(e):
error_code = 'NoSuchBucket' error_code = 'NoSuchBucket'
# Handle NoSuchBucket - create bucket and retry
if error_code == 'NoSuchBucket': if error_code == 'NoSuchBucket':
logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.") logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.")
bucket_ready = False bucket_ready = False
@@ -404,16 +381,14 @@ class ReplicationManager:
bucket_ready = True bucket_ready = True
logger.info(f"Created target bucket {rule.target_bucket}") logger.info(f"Created target bucket {rule.target_bucket}")
except ClientError as bucket_err: except ClientError as bucket_err:
# BucketAlreadyExists or BucketAlreadyOwnedByYou means another thread created it - that's OK!
if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'): if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'):
logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)") logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)")
bucket_ready = True bucket_ready = True
else: else:
logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}") logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}")
raise e # Raise original NoSuchBucket error raise e
if bucket_ready: if bucket_ready:
# Retry the upload now that bucket exists
do_put_object() do_put_object()
else: else:
raise e raise e
@@ -423,14 +398,6 @@ class ReplicationManager:
except (ClientError, OSError, ValueError) as e: except (ClientError, OSError, ValueError) as e:
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}") logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
# Log additional debug info for signature errors
if isinstance(e, ClientError):
error_code = e.response.get('Error', {}).get('Code', '')
if 'Signature' in str(e) or 'Signature' in error_code:
logger.error(f"Signature debug - Key repr: {repr(object_key)}, "
f"Endpoint: {conn.endpoint_url}, "
f"Region: {conn.region}, "
f"Target bucket: {rule.target_bucket}")
except Exception: except Exception:
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}") logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")

View File

@@ -409,8 +409,22 @@ code {
.bucket-table th:last-child { white-space: nowrap; } .bucket-table th:last-child { white-space: nowrap; }
.object-key { .object-key {
word-break: break-word; max-width: 0;
max-width: 32rem; width: 100%;
overflow: hidden;
text-overflow: ellipsis;
}
.object-key .fw-medium {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.object-key .text-muted {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
} }
.preview-card { top: 1rem; } .preview-card { top: 1rem; }

View File

@@ -134,15 +134,15 @@
data-bulk-delete-endpoint="{{ url_for('ui.bulk_delete_objects', bucket_name=bucket_name) }}" data-bulk-delete-endpoint="{{ url_for('ui.bulk_delete_objects', bucket_name=bucket_name) }}"
data-bulk-download-endpoint="{{ url_for('ui.bulk_download_objects', bucket_name=bucket_name) }}" data-bulk-download-endpoint="{{ url_for('ui.bulk_download_objects', bucket_name=bucket_name) }}"
> >
<table class="table table-hover align-middle mb-0" id="objects-table"> <table class="table table-hover align-middle mb-0" id="objects-table" style="table-layout: fixed;">
<thead class="table-light"> <thead class="table-light">
<tr> <tr>
<th scope="col" class="text-center" style="width: 3rem;"> <th scope="col" class="text-center" style="width: 3rem;">
<input class="form-check-input" type="checkbox" data-select-all aria-label="Select all objects" /> <input class="form-check-input" type="checkbox" data-select-all aria-label="Select all objects" />
</th> </th>
<th scope="col">Key</th> <th scope="col">Key</th>
<th scope="col" class="text-end">Size</th> <th scope="col" class="text-end" style="width: 6rem;">Size</th>
<th scope="col" class="text-end">Actions</th> <th scope="col" class="text-end" style="width: 5.5rem;">Actions</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>