From e287b59645847dc2a096dc852feb00a5373ef6d2 Mon Sep 17 00:00:00 2001 From: kqjy Date: Fri, 21 Nov 2025 22:11:38 +0800 Subject: [PATCH 01/20] Fix Dockerfile permission issues --- .dockerignore | 13 +++++++++++++ Dockerfile | 6 ++++-- app/version.py | 2 +- 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ba575da --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.git +.gitignore +.venv +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.coverage +htmlcov +logs +data +tmp diff --git a/Dockerfile b/Dockerfile index 6c0c97e..dec63e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,11 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . -# Drop privileges -RUN useradd -m -u 1000 myfsio \ +# Create data directory and set permissions +RUN mkdir -p /app/data \ + && useradd -m -u 1000 myfsio \ && chown -R myfsio:myfsio /app + USER myfsio EXPOSE 5000 5100 diff --git a/app/version.py b/app/version.py index d2eb696..122431b 100644 --- a/app/version.py +++ b/app/version.py @@ -1,7 +1,7 @@ """Central location for the application version string.""" from __future__ import annotations -APP_VERSION = "0.1.0" +APP_VERSION = "0.1.0b2" def get_version() -> str: -- 2.49.1 From f2daa8a8a3cd6f26202fbc7dc2649c991fc84e25 Mon Sep 17 00:00:00 2001 From: kqjy Date: Fri, 21 Nov 2025 22:32:42 +0800 Subject: [PATCH 02/20] Fix IAM credentials reset causing presigned URL to fail --- app/iam.py | 13 +++++++++++++ app/ui.py | 6 ++++++ templates/iam.html | 10 ---------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/app/iam.py b/app/iam.py index f6dc33c..3ab64c2 100644 --- a/app/iam.py +++ b/app/iam.py @@ -77,10 +77,20 @@ class IamService: self._users: Dict[str, Dict[str, Any]] = {} self._raw_config: Dict[str, Any] = {} self._failed_attempts: Dict[str, Deque[datetime]] = {} + self._last_load_time = 0.0 self._load() + def _maybe_reload(self) -> None: + """Reload configuration if the file has changed on disk.""" + try: + if self.config_path.stat().st_mtime > self._last_load_time: + self._load() + except OSError: + pass + # ---------------------- authz helpers ---------------------- def authenticate(self, access_key: str, secret_key: str) -> Principal: + self._maybe_reload() access_key = (access_key or "").strip() secret_key = (secret_key or "").strip() if not access_key or not secret_key: @@ -135,12 +145,14 @@ class IamService: return int(max(0, self.auth_lockout_window.total_seconds() - elapsed)) def principal_for_key(self, access_key: str) -> Principal: + self._maybe_reload() record = self._users.get(access_key) if not record: raise IamError("Unknown access key") return self._build_principal(access_key, record) def secret_for_key(self, access_key: str) -> str: + self._maybe_reload() record = self._users.get(access_key) if not record: raise IamError("Unknown access key") @@ -245,6 +257,7 @@ class IamService: # ---------------------- config helpers ---------------------- def _load(self) -> None: try: + self._last_load_time = self.config_path.stat().st_mtime content = self.config_path.read_text(encoding='utf-8') raw = json.loads(content) except FileNotFoundError: diff --git a/app/ui.py b/app/ui.py index cdc587c..ebc04ca 100644 --- a/app/ui.py +++ b/app/ui.py @@ -926,6 +926,12 @@ def rotate_iam_secret(access_key: str): return redirect(url_for("ui.iam_dashboard")) try: new_secret = _iam().rotate_secret(access_key) + # If rotating own key, update session immediately so subsequent API calls (like presign) work + if principal and principal.access_key == access_key: + creds = session.get("credentials", {}) + creds["secret_key"] = new_secret + session["credentials"] = creds + session.modified = True except IamError as exc: if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: return jsonify({"error": str(exc)}), 400 diff --git a/templates/iam.html b/templates/iam.html index 0b52091..81b74d8 100644 --- a/templates/iam.html +++ b/templates/iam.html @@ -286,9 +286,6 @@
+
@@ -71,6 +72,7 @@ {{ conn.access_key }} + -- 2.49.1 From 840fd176d3537df20224f2f94e45024b29c456b5 Mon Sep 17 00:00:00 2001 From: kqjy Date: Fri, 21 Nov 2025 23:16:45 +0800 Subject: [PATCH 04/20] Add missing CSRF tokens --- app/replication.py | 19 +++++++++++++++++++ app/s3_api.py | 15 ++++++++++++--- app/ui.py | 10 ++++++++++ templates/bucket_detail.html | 9 ++++++++- 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/app/replication.py b/app/replication.py index b9d86ee..994bd7b 100644 --- a/app/replication.py +++ b/app/replication.py @@ -66,6 +66,25 @@ class ReplicationManager: del self._rules[bucket_name] self.save_rules() + def create_remote_bucket(self, connection_id: str, bucket_name: str) -> None: + """Create a bucket on the remote connection.""" + connection = self.connections.get(connection_id) + if not connection: + raise ValueError(f"Connection {connection_id} not found") + + try: + s3 = boto3.client( + "s3", + endpoint_url=connection.endpoint_url, + aws_access_key_id=connection.access_key, + aws_secret_access_key=connection.secret_key, + region_name=connection.region, + ) + s3.create_bucket(Bucket=bucket_name) + except ClientError as e: + logger.error(f"Failed to create remote bucket {bucket_name}: {e}") + raise + def trigger_replication(self, bucket_name: str, object_key: str) -> None: rule = self.get_rule(bucket_name) if not rule or not rule.enabled: diff --git a/app/s3_api.py b/app/s3_api.py index 584074b..f9d496f 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -8,7 +8,7 @@ import re import uuid from datetime import datetime, timedelta, timezone from typing import Any, Dict -from urllib.parse import quote, urlencode +from urllib.parse import quote, urlencode, urlparse from xml.etree.ElementTree import Element, SubElement, tostring, fromstring, ParseError from flask import Blueprint, Response, current_app, jsonify, request @@ -468,7 +468,17 @@ def _generate_presigned_url( "X-Amz-Content-Sha256": "UNSIGNED-PAYLOAD", } canonical_query = _encode_query_params(query_params) - host = request.host + + # Determine host and scheme from config or request + api_base = current_app.config.get("API_BASE_URL") + if api_base: + parsed = urlparse(api_base) + host = parsed.netloc + scheme = parsed.scheme + else: + host = request.host + scheme = request.scheme or "http" + canonical_headers = f"host:{host}\n" canonical_request = "\n".join( [ @@ -492,7 +502,6 @@ def _generate_presigned_url( signing_key = _derive_signing_key(secret_key, date_stamp, region, service) signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() query_with_sig = canonical_query + f"&X-Amz-Signature={signature}" - scheme = request.scheme or "http" return f"{scheme}://{host}{_canonical_uri(bucket_name, object_key)}?{query_with_sig}" diff --git a/app/ui.py b/app/ui.py index ebc04ca..fbf48a3 100644 --- a/app/ui.py +++ b/app/ui.py @@ -1105,6 +1105,16 @@ def update_bucket_replication(bucket_name: str): if not target_conn_id or not target_bucket: flash("Target connection and bucket are required", "danger") else: + # Check if user wants to create the remote bucket + create_remote = request.form.get("create_remote_bucket") == "on" + if create_remote: + try: + _replication().create_remote_bucket(target_conn_id, target_bucket) + flash(f"Created remote bucket '{target_bucket}'", "success") + except Exception as e: + flash(f"Failed to create remote bucket: {e}", "warning") + # We continue to set the rule even if creation fails (maybe it exists?) + rule = ReplicationRule( bucket_name=bucket_name, target_connection_id=target_conn_id, diff --git a/templates/bucket_detail.html b/templates/bucket_detail.html index af93fe8..df6604b 100644 --- a/templates/bucket_detail.html +++ b/templates/bucket_detail.html @@ -409,6 +409,7 @@
+
@@ -418,6 +419,7 @@ {% if connections %}
+
@@ -434,7 +436,12 @@
-
The bucket on the remote service must already exist.
+
+ + +
-- 2.49.1 From 471cf5a30518f95e678cf3862c5c5887e1ceed28 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 12:11:41 +0800 Subject: [PATCH 05/20] Debug replication corruption issue --- app/replication.py | 82 ++++++++++--- app/s3_api.py | 24 +++- app/ui.py | 79 +++++++++++-- templates/bucket_detail.html | 39 +++++-- templates/connections.html | 217 ++++++++++++++++++++++++++++++++--- 5 files changed, 386 insertions(+), 55 deletions(-) diff --git a/app/replication.py b/app/replication.py index 994bd7b..5f6d19f 100644 --- a/app/replication.py +++ b/app/replication.py @@ -2,6 +2,7 @@ from __future__ import annotations import logging +import mimetypes import threading from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass @@ -10,9 +11,10 @@ from typing import Dict, Optional import boto3 from botocore.exceptions import ClientError +from boto3.exceptions import S3UploadFailedError from .connections import ConnectionStore, RemoteConnection -from .storage import ObjectStorage +from .storage import ObjectStorage, StorageError logger = logging.getLogger(__name__) @@ -116,21 +118,73 @@ class ReplicationManager: # We need the file content. # Since ObjectStorage is filesystem based, let's get the stream. # We need to be careful about closing it. - meta = self.storage.get_object_meta(bucket_name, object_key) - if not meta: + try: + path = self.storage.get_object_path(bucket_name, object_key) + except StorageError: return - with self.storage.open_object(bucket_name, object_key) as f: - extra_args = {} - if meta.metadata: - extra_args["Metadata"] = meta.metadata - - s3.upload_fileobj( - f, - rule.target_bucket, - object_key, - ExtraArgs=extra_args - ) + metadata = self.storage.get_object_metadata(bucket_name, object_key) + + extra_args = {} + if metadata: + extra_args["Metadata"] = metadata + + # Guess content type to prevent corruption/wrong handling + content_type, _ = mimetypes.guess_type(path) + file_size = path.stat().st_size + + # Debug: Calculate MD5 of source file + import hashlib + md5_hash = hashlib.md5() + with path.open("rb") as f: + # Log first 32 bytes + header = f.read(32) + logger.info(f"Source first 32 bytes: {header.hex()}") + md5_hash.update(header) + for chunk in iter(lambda: f.read(4096), b""): + md5_hash.update(chunk) + source_md5 = md5_hash.hexdigest() + logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, MD5={source_md5}, ContentType={content_type}") + + try: + with path.open("rb") as f: + s3.put_object( + Bucket=rule.target_bucket, + Key=object_key, + Body=f, + ContentLength=file_size, + ContentType=content_type or "application/octet-stream", + Metadata=metadata or {} + ) + except (ClientError, S3UploadFailedError) as e: + # Check if it's a NoSuchBucket error (either direct or wrapped) + is_no_bucket = False + if isinstance(e, ClientError): + if e.response['Error']['Code'] == 'NoSuchBucket': + is_no_bucket = True + elif isinstance(e, S3UploadFailedError): + if "NoSuchBucket" in str(e): + is_no_bucket = True + + if is_no_bucket: + logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.") + try: + s3.create_bucket(Bucket=rule.target_bucket) + # Retry upload + with path.open("rb") as f: + s3.put_object( + Bucket=rule.target_bucket, + Key=object_key, + Body=f, + ContentLength=file_size, + ContentType=content_type or "application/octet-stream", + Metadata=metadata or {} + ) + except Exception as create_err: + logger.error(f"Failed to create target bucket {rule.target_bucket}: {create_err}") + raise e # Raise original error + else: + raise e logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") diff --git a/app/s3_api.py b/app/s3_api.py index f9d496f..86d4e11 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -1078,7 +1078,13 @@ def object_handler(bucket_name: str, object_key: str): _, error = _object_principal("write", bucket_name, object_key) if error: return error - stream = request.stream + + # Debug: Log incoming request details + current_app.logger.info(f"Receiving PUT {bucket_name}/{object_key}") + current_app.logger.info(f"Headers: {dict(request.headers)}") + current_app.logger.info(f"Content-Length: {request.content_length}") + + stream = DebugStream(request.stream, current_app.logger) metadata = _extract_request_metadata() try: meta = storage.put_object( @@ -1252,3 +1258,19 @@ def head_object(bucket_name: str, object_key: str) -> Response: return _error_response("NoSuchKey", "Object not found", 404) except IamError as exc: return _error_response("AccessDenied", str(exc), 403) + + +class DebugStream: + def __init__(self, stream, logger): + self.stream = stream + self.logger = logger + self.first_chunk = True + + def read(self, size=-1): + chunk = self.stream.read(size) + if self.first_chunk and chunk: + # Log first 32 bytes + prefix = chunk[:32] + self.logger.info(f"Received first 32 bytes: {prefix.hex()}") + self.first_chunk = False + return chunk diff --git a/app/ui.py b/app/ui.py index fbf48a3..3b2517f 100644 --- a/app/ui.py +++ b/app/ui.py @@ -6,7 +6,9 @@ import uuid from typing import Any from urllib.parse import urlparse +import boto3 import requests +from botocore.exceptions import ClientError from flask import ( Blueprint, Response, @@ -1070,6 +1072,73 @@ def create_connection(): return redirect(url_for("ui.connections_dashboard")) +@ui_bp.post("/connections/test") +def test_connection(): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:list_users") + except IamError: + return jsonify({"status": "error", "message": "Access denied"}), 403 + + data = request.get_json(silent=True) or request.form + endpoint = data.get("endpoint_url", "").strip() + access_key = data.get("access_key", "").strip() + secret_key = data.get("secret_key", "").strip() + region = data.get("region", "us-east-1").strip() + + if not all([endpoint, access_key, secret_key]): + return jsonify({"status": "error", "message": "Missing credentials"}), 400 + + try: + s3 = boto3.client( + "s3", + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name=region, + ) + # Try to list buckets to verify credentials and endpoint + s3.list_buckets() + return jsonify({"status": "ok", "message": "Connection successful"}) + except Exception as e: + return jsonify({"status": "error", "message": str(e)}), 400 + + +@ui_bp.post("/connections//update") +def update_connection(connection_id: str): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:list_users") + except IamError: + flash("Access denied", "danger") + return redirect(url_for("ui.buckets_overview")) + + conn = _connections().get(connection_id) + if not conn: + flash("Connection not found", "danger") + return redirect(url_for("ui.connections_dashboard")) + + name = request.form.get("name", "").strip() + endpoint = request.form.get("endpoint_url", "").strip() + access_key = request.form.get("access_key", "").strip() + secret_key = request.form.get("secret_key", "").strip() + region = request.form.get("region", "us-east-1").strip() + + if not all([name, endpoint, access_key, secret_key]): + flash("All fields are required", "danger") + return redirect(url_for("ui.connections_dashboard")) + + conn.name = name + conn.endpoint_url = endpoint + conn.access_key = access_key + conn.secret_key = secret_key + conn.region = region + + _connections().save() + flash(f"Connection '{name}' updated", "success") + return redirect(url_for("ui.connections_dashboard")) + + @ui_bp.post("/connections//delete") def delete_connection(connection_id: str): principal = _current_principal() @@ -1105,16 +1174,6 @@ def update_bucket_replication(bucket_name: str): if not target_conn_id or not target_bucket: flash("Target connection and bucket are required", "danger") else: - # Check if user wants to create the remote bucket - create_remote = request.form.get("create_remote_bucket") == "on" - if create_remote: - try: - _replication().create_remote_bucket(target_conn_id, target_bucket) - flash(f"Created remote bucket '{target_bucket}'", "success") - except Exception as e: - flash(f"Failed to create remote bucket: {e}", "warning") - # We continue to set the rule even if creation fails (maybe it exists?) - rule = ReplicationRule( bucket_name=bucket_name, target_connection_id=target_conn_id, diff --git a/templates/bucket_detail.html b/templates/bucket_detail.html index df6604b..7a2a2a1 100644 --- a/templates/bucket_detail.html +++ b/templates/bucket_detail.html @@ -408,11 +408,9 @@
- - - - -
+ {% else %}

Replication allows you to automatically copy new objects from this bucket to a bucket in another S3-compatible service.

@@ -436,12 +434,7 @@
-
- - -
+
If the target bucket does not exist, it will be created automatically.
@@ -715,6 +708,30 @@
+ + + {% endblock %} {% block extra_scripts %} diff --git a/templates/connections.html b/templates/connections.html index 93e7e1e..3a1e603 100644 --- a/templates/connections.html +++ b/templates/connections.html @@ -12,12 +12,12 @@
-
-
+
+
Add New Connection
-
+
@@ -37,44 +37,69 @@
- +
+ + +
- +
+ + +
+
-
-
+
+
Existing Connections
{% if connections %}
- +
- + {% for conn in connections %} - - - - - + + + + {% endfor %} @@ -82,10 +107,164 @@
Name Endpoint Region Access KeyActionsActions
{{ conn.name }}{{ conn.endpoint_url }}{{ conn.region }}{{ conn.access_key }} -
- - -
+
{{ conn.name }}{{ conn.endpoint_url }}{{ conn.region }}{{ conn.access_key }} +
+ + +
{% else %} -

No remote connections configured.

+
+ + + + +

No remote connections configured.

+
{% endif %}
+ + + + + + + + {% endblock %} -- 2.49.1 From fe3eacd2bee98d386bbb1eae27e871c14a5d3994 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 12:56:33 +0800 Subject: [PATCH 06/20] Debug replication corruption issue --- app/replication.py | 13 +------ app/s3_api.py | 95 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 83 insertions(+), 25 deletions(-) diff --git a/app/replication.py b/app/replication.py index 5f6d19f..d6313b5 100644 --- a/app/replication.py +++ b/app/replication.py @@ -133,18 +133,7 @@ class ReplicationManager: content_type, _ = mimetypes.guess_type(path) file_size = path.stat().st_size - # Debug: Calculate MD5 of source file - import hashlib - md5_hash = hashlib.md5() - with path.open("rb") as f: - # Log first 32 bytes - header = f.read(32) - logger.info(f"Source first 32 bytes: {header.hex()}") - md5_hash.update(header) - for chunk in iter(lambda: f.read(4096), b""): - md5_hash.update(chunk) - source_md5 = md5_hash.hexdigest() - logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, MD5={source_md5}, ContentType={content_type}") + logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}") try: with path.open("rb") as f: diff --git a/app/s3_api.py b/app/s3_api.py index 86d4e11..5cfd23d 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -405,7 +405,11 @@ def _canonical_headers_from_request(headers: list[str]) -> str: lines = [] for header in headers: if header == "host": - value = request.host + api_base = current_app.config.get("API_BASE_URL") + if api_base: + value = urlparse(api_base).netloc + else: + value = request.host else: value = request.headers.get(header, "") canonical_value = " ".join(value.strip().split()) if value else "" @@ -1084,7 +1088,12 @@ def object_handler(bucket_name: str, object_key: str): current_app.logger.info(f"Headers: {dict(request.headers)}") current_app.logger.info(f"Content-Length: {request.content_length}") - stream = DebugStream(request.stream, current_app.logger) + stream = request.stream + content_encoding = request.headers.get("Content-Encoding", "").lower() + if "aws-chunked" in content_encoding: + current_app.logger.info("Decoding aws-chunked stream") + stream = AwsChunkedDecoder(stream) + metadata = _extract_request_metadata() try: meta = storage.put_object( @@ -1260,17 +1269,77 @@ def head_object(bucket_name: str, object_key: str) -> Response: return _error_response("AccessDenied", str(exc), 403) -class DebugStream: - def __init__(self, stream, logger): +class AwsChunkedDecoder: + """Decodes aws-chunked encoded streams.""" + def __init__(self, stream): self.stream = stream - self.logger = logger - self.first_chunk = True + self.buffer = b"" + self.chunk_remaining = 0 + self.finished = False def read(self, size=-1): - chunk = self.stream.read(size) - if self.first_chunk and chunk: - # Log first 32 bytes - prefix = chunk[:32] - self.logger.info(f"Received first 32 bytes: {prefix.hex()}") - self.first_chunk = False - return chunk + if self.finished: + return b"" + + result = b"" + while size == -1 or len(result) < size: + if self.chunk_remaining > 0: + to_read = self.chunk_remaining + if size != -1: + to_read = min(to_read, size - len(result)) + + chunk = self.stream.read(to_read) + if not chunk: + raise IOError("Unexpected EOF in chunk data") + + result += chunk + self.chunk_remaining -= len(chunk) + + if self.chunk_remaining == 0: + # Read CRLF after chunk data + crlf = self.stream.read(2) + if crlf != b"\r\n": + raise IOError("Malformed chunk: missing CRLF") + else: + # Read chunk size line + line = b"" + while True: + char = self.stream.read(1) + if not char: + if not line: # EOF at start of chunk size + self.finished = True + return result + raise IOError("Unexpected EOF in chunk size") + line += char + if line.endswith(b"\r\n"): + break + + # Parse chunk size (hex) + try: + line_str = line.decode("ascii").strip() + # Handle chunk-signature extension if present (e.g. "1000;chunk-signature=...") + if ";" in line_str: + line_str = line_str.split(";")[0] + chunk_size = int(line_str, 16) + except ValueError: + raise IOError(f"Invalid chunk size: {line}") + + if chunk_size == 0: + self.finished = True + # Read trailers if any (until empty line) + while True: + line = b"" + while True: + char = self.stream.read(1) + if not char: + break + line += char + if line.endswith(b"\r\n"): + break + if line == b"\r\n" or not line: + break + return result + + self.chunk_remaining = chunk_size + + return result -- 2.49.1 From a32d9dbd771f3900f22dc7ccf4d108442ca3bdc5 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 14:13:41 +0800 Subject: [PATCH 07/20] Fix replication corruption issue --- app/s3_api.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/app/s3_api.py b/app/s3_api.py index 5cfd23d..a0e51b0 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -480,8 +480,8 @@ def _generate_presigned_url( host = parsed.netloc scheme = parsed.scheme else: - host = request.host - scheme = request.scheme or "http" + host = request.headers.get("X-Forwarded-Host", request.host) + scheme = request.headers.get("X-Forwarded-Proto", request.scheme or "http") canonical_headers = f"host:{host}\n" canonical_request = "\n".join( @@ -1083,15 +1083,9 @@ def object_handler(bucket_name: str, object_key: str): if error: return error - # Debug: Log incoming request details - current_app.logger.info(f"Receiving PUT {bucket_name}/{object_key}") - current_app.logger.info(f"Headers: {dict(request.headers)}") - current_app.logger.info(f"Content-Length: {request.content_length}") - stream = request.stream content_encoding = request.headers.get("Content-Encoding", "").lower() if "aws-chunked" in content_encoding: - current_app.logger.info("Decoding aws-chunked stream") stream = AwsChunkedDecoder(stream) metadata = _extract_request_metadata() -- 2.49.1 From 8c00d7bd4b959518e41702334b03fdbe07d925af Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 14:32:28 +0800 Subject: [PATCH 08/20] Enhance replication functionalilty --- app/__init__.py | 4 ++++ app/config.py | 7 +++++-- app/replication.py | 29 +++++++++++++++++++++-------- app/s3_api.py | 16 ++++++++++++++++ app/ui.py | 15 +++++++++++++-- 5 files changed, 59 insertions(+), 12 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 17da538..cf75a06 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -12,6 +12,7 @@ from typing import Any, Dict, Optional from flask import Flask, g, has_request_context, redirect, render_template, request, url_for from flask_cors import CORS from flask_wtf.csrf import CSRFError +from werkzeug.middleware.proxy_fix import ProxyFix from .bucket_policies import BucketPolicyStore from .config import AppConfig @@ -47,6 +48,9 @@ def create_app( if app.config.get("TESTING"): app.config.setdefault("WTF_CSRF_ENABLED", False) + # Trust X-Forwarded-* headers from proxies + app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) + _configure_cors(app) _configure_logging(app) diff --git a/app/config.py b/app/config.py index 3bfd14f..282b3a9 100644 --- a/app/config.py +++ b/app/config.py @@ -39,7 +39,7 @@ class AppConfig: secret_key: str iam_config_path: Path bucket_policy_path: Path - api_base_url: str + api_base_url: Optional[str] aws_region: str aws_service: str ui_enforce_bucket_policies: bool @@ -100,7 +100,10 @@ class AppConfig: bucket_policy_path, legacy_path=None if bucket_policy_override else PROJECT_ROOT / "data" / "bucket_policies.json", ) - api_base_url = str(_get("API_BASE_URL", "http://127.0.0.1:5000")) + api_base_url = _get("API_BASE_URL", None) + if api_base_url: + api_base_url = str(api_base_url) + aws_region = str(_get("AWS_REGION", "us-east-1")) aws_service = str(_get("AWS_SERVICE", "s3")) enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"} diff --git a/app/replication.py b/app/replication.py index d6313b5..c604727 100644 --- a/app/replication.py +++ b/app/replication.py @@ -10,6 +10,7 @@ from pathlib import Path from typing import Dict, Optional import boto3 +from botocore.config import Config from botocore.exceptions import ClientError from boto3.exceptions import S3UploadFailedError @@ -18,6 +19,8 @@ from .storage import ObjectStorage, StorageError logger = logging.getLogger(__name__) +REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0" + @dataclass class ReplicationRule: @@ -87,7 +90,7 @@ class ReplicationManager: logger.error(f"Failed to create remote bucket {bucket_name}: {e}") raise - def trigger_replication(self, bucket_name: str, object_key: str) -> None: + def trigger_replication(self, bucket_name: str, object_key: str, action: str = "write") -> None: rule = self.get_rule(bucket_name) if not rule or not rule.enabled: return @@ -97,24 +100,34 @@ class ReplicationManager: logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found") return - self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection) + self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action) - def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection) -> None: + def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None: try: - # 1. Get local file path - # Note: We are accessing internal storage structure here. - # Ideally storage.py should expose a 'get_file_path' or we read the stream. - # For efficiency, we'll try to read the file directly if we can, or use storage.get_object - # Using boto3 to upload + config = Config(user_agent_extra=REPLICATION_USER_AGENT) s3 = boto3.client( "s3", endpoint_url=conn.endpoint_url, aws_access_key_id=conn.access_key, aws_secret_access_key=conn.secret_key, region_name=conn.region, + config=config, ) + if action == "delete": + try: + s3.delete_object(Bucket=rule.target_bucket, Key=object_key) + logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") + except ClientError as e: + logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}") + return + + # 1. Get local file path + # Note: We are accessing internal storage structure here. + # Ideally storage.py should expose a 'get_file_path' or we read the stream. + # For efficiency, we'll try to read the file directly if we can, or use storage.get_object + # We need the file content. # Since ObjectStorage is filesystem based, let's get the stream. # We need to be careful about closing it. diff --git a/app/s3_api.py b/app/s3_api.py index a0e51b0..950990a 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -17,6 +17,7 @@ from werkzeug.http import http_date from .bucket_policies import BucketPolicyStore from .extensions import limiter from .iam import IamError, Principal +from .replication import ReplicationManager from .storage import ObjectStorage, StorageError s3_api_bp = Blueprint("s3_api", __name__) @@ -31,6 +32,9 @@ def _iam(): return current_app.extensions["iam"] +def _replication_manager() -> ReplicationManager: + return current_app.extensions["replication"] + def _bucket_policies() -> BucketPolicyStore: store: BucketPolicyStore = current_app.extensions["bucket_policies"] @@ -1107,6 +1111,12 @@ def object_handler(bucket_name: str, object_key: str): ) response = Response(status=200) response.headers["ETag"] = f'"{meta.etag}"' + + # Trigger replication if not a replication request + user_agent = request.headers.get("User-Agent", "") + if "S3ReplicationAgent" not in user_agent: + _replication_manager().trigger_replication(bucket_name, object_key, action="write") + return response if request.method in {"GET", "HEAD"}: @@ -1141,6 +1151,12 @@ def object_handler(bucket_name: str, object_key: str): return error storage.delete_object(bucket_name, object_key) current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key}) + + # Trigger replication if not a replication request + user_agent = request.headers.get("User-Agent", "") + if "S3ReplicationAgent" not in user_agent: + _replication_manager().trigger_replication(bucket_name, object_key, action="delete") + return Response(status=204) diff --git a/app/ui.py b/app/ui.py index 3b2517f..5f7a1f3 100644 --- a/app/ui.py +++ b/app/ui.py @@ -703,10 +703,21 @@ def object_presign(bucket_name: str, object_key: str): _authorize_ui(principal, bucket_name, action, object_key=object_key) except IamError as exc: return jsonify({"error": str(exc)}), 403 - api_base = current_app.config["API_BASE_URL"].rstrip("/") + + api_base = current_app.config.get("API_BASE_URL") + if not api_base: + api_base = "http://127.0.0.1:5000" + api_base = api_base.rstrip("/") + url = f"{api_base}/presign/{bucket_name}/{object_key}" + + headers = _api_headers() + # Forward the host so the API knows the public URL + headers["X-Forwarded-Host"] = request.host + headers["X-Forwarded-Proto"] = request.scheme + try: - response = requests.post(url, headers=_api_headers(), json=payload, timeout=5) + response = requests.post(url, headers=headers, json=payload, timeout=5) except requests.RequestException as exc: return jsonify({"error": f"API unavailable: {exc}"}), 502 try: -- 2.49.1 From 96de6164d1a7689d4b5f1798fdfc21d74308ba75 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 14:45:21 +0800 Subject: [PATCH 09/20] Replication fixes --- app/ui.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/ui.py b/app/ui.py index 5f7a1f3..d603259 100644 --- a/app/ui.py +++ b/app/ui.py @@ -40,6 +40,10 @@ def _storage() -> ObjectStorage: return current_app.extensions["object_storage"] +def _replication_manager() -> ReplicationManager: + return current_app.extensions["replication"] + + def _iam(): return current_app.extensions["iam"] @@ -514,6 +518,7 @@ def delete_object(bucket_name: str, object_key: str): flash(f"Permanently deleted '{object_key}' and all versions", "success") else: _storage().delete_object(bucket_name, object_key) + _replication_manager().trigger_replication(bucket_name, object_key, action="delete") flash(f"Deleted '{object_key}'", "success") except (IamError, StorageError) as exc: flash(_friendly_error_message(exc), "danger") @@ -574,6 +579,7 @@ def bulk_delete_objects(bucket_name: str): storage.purge_object(bucket_name, key) else: storage.delete_object(bucket_name, key) + _replication_manager().trigger_replication(bucket_name, key, action="delete") deleted.append(key) except StorageError as exc: errors.append({"key": key, "error": str(exc)}) -- 2.49.1 From ec5d52f20824596b4dbac0214f0c55d51f419405 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 15:02:29 +0800 Subject: [PATCH 10/20] Improve and add two-way replication functionality; Update docs --- app/s3_api.py | 1 + app/ui.py | 1 + docs.md | 25 ++++++++++++++++++++++++- templates/docs.html | 16 ++++++++++++++-- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/app/s3_api.py b/app/s3_api.py index 950990a..b080346 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -1043,6 +1043,7 @@ def bucket_handler(bucket_name: str) -> Response: try: storage.delete_bucket(bucket_name) _bucket_policies().delete_policy(bucket_name) + _replication_manager().delete_rule(bucket_name) except StorageError as exc: code = "BucketNotEmpty" if "not empty" in str(exc) else "NoSuchBucket" status = 409 if code == "BucketNotEmpty" else 404 diff --git a/app/ui.py b/app/ui.py index d603259..adf1f6f 100644 --- a/app/ui.py +++ b/app/ui.py @@ -500,6 +500,7 @@ def delete_bucket(bucket_name: str): _authorize_ui(principal, bucket_name, "delete") _storage().delete_bucket(bucket_name) _bucket_policies().delete_policy(bucket_name) + _replication_manager().delete_rule(bucket_name) flash(f"Bucket '{bucket_name}' removed", "success") except (StorageError, IamError) as exc: flash(_friendly_error_message(exc), "danger") diff --git a/docs.md b/docs.md index e55a737..016ffbb 100644 --- a/docs.md +++ b/docs.md @@ -77,12 +77,20 @@ The repo now tracks a human-friendly release string inside `app/version.py` (see | `SECRET_KEY` | `dev-secret-key` | Flask session key for UI auth. | | `IAM_CONFIG` | `/data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. | | `BUCKET_POLICY_PATH` | `/data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). | -| `API_BASE_URL` | `http://127.0.0.1:5000` | Used by the UI to hit API endpoints (presign/policy). | +| `API_BASE_URL` | `None` | Used by the UI to hit API endpoints (presign/policy). If unset, the UI will auto-detect the host or use `X-Forwarded-*` headers. | | `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. | | `AWS_SERVICE` | `s3` | Service string for SigV4. | Set env vars (or pass overrides to `create_app`) to point the servers at custom paths. +### Proxy Configuration + +If running behind a reverse proxy (e.g., Nginx, Cloudflare, or a tunnel), ensure the proxy sets the standard forwarding headers: +- `X-Forwarded-Host` +- `X-Forwarded-Proto` + +The application automatically trusts these headers to generate correct presigned URLs (e.g., `https://s3.example.com/...` instead of `http://127.0.0.1:5000/...`). Alternatively, you can explicitly set `API_BASE_URL` to your public endpoint. + ## 4. Authentication & IAM 1. On first boot, `data/.myfsio.sys/config/iam.json` is seeded with `localadmin / localadmin` that has wildcard access. @@ -262,6 +270,21 @@ Now, configure the primary instance to replicate to the target. aws --endpoint-url http://target-server:5002 s3 ls s3://backup-bucket ``` +### Bidirectional Replication (Active-Active) + +To set up two-way replication (Server A ↔ Server B): + +1. Follow the steps above to replicate **A → B**. +2. Repeat the process on Server B to replicate **B → A**: + - Create a connection on Server B pointing to Server A. + - Enable replication on the target bucket on Server B. + +**Loop Prevention**: The system automatically detects replication traffic using a custom User-Agent (`S3ReplicationAgent`). This prevents infinite loops where an object replicated from A to B is immediately replicated back to A. + +**Deletes**: Deleting an object on one server will propagate the deletion to the other server. + +**Note**: Deleting a bucket will automatically remove its associated replication configuration. + ## 7. Running Tests ```bash diff --git a/templates/docs.html b/templates/docs.html index e25c124..6f0dd40 100644 --- a/templates/docs.html +++ b/templates/docs.html @@ -290,6 +290,18 @@ s3.complete_multipart_upload(
+ +

Bidirectional Replication (Active-Active)

+

To set up two-way replication (Server A ↔ Server B):

+
    +
  1. Follow the steps above to replicate A → B.
  2. +
  3. Repeat the process on Server B to replicate B → A (create a connection to A, enable rule).
  4. +
+

+ Loop Prevention: The system automatically detects replication traffic using a custom User-Agent (S3ReplicationAgent). This prevents infinite loops where an object replicated from A to B is immediately replicated back to A. +
+ Deletes: Deleting an object on one server will propagate the deletion to the other server. +

@@ -330,8 +342,8 @@ s3.complete_multipart_upload( Requests hit the wrong host - API_BASE_URL not updated after tunneling/forwarding - Set API_BASE_URL in your shell or .env to match the published host. + Proxy headers missing or API_BASE_URL incorrect + Ensure your proxy sends X-Forwarded-Host/Proto headers, or explicitly set API_BASE_URL to your public domain. -- 2.49.1 From 36c08b0ac142a5881b1cd4b6105baf9f574557a5 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 15:06:17 +0800 Subject: [PATCH 11/20] Update dockerfile with gunicorn for prod --- Dockerfile | 5 ++++- docker-entrypoint.sh | 8 ++++++++ requirements.txt | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 docker-entrypoint.sh diff --git a/Dockerfile b/Dockerfile index dec63e2..b18c77c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,9 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . +# Make entrypoint executable +RUN chmod +x docker-entrypoint.sh + # Create data directory and set permissions RUN mkdir -p /app/data \ && useradd -m -u 1000 myfsio \ @@ -31,4 +34,4 @@ ENV APP_HOST=0.0.0.0 \ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ CMD python -c "import requests; requests.get('http://localhost:5000/healthz', timeout=2)" -CMD ["python", "run.py", "--mode", "both"] +CMD ["./docker-entrypoint.sh"] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000..6fbaf2e --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -e + +# Start API server in background +gunicorn "app:create_api_app()" --bind 0.0.0.0:5000 --workers 4 --access-logfile - & + +# Start UI server in foreground +gunicorn "app:create_ui_app()" --bind 0.0.0.0:5100 --workers 4 --access-logfile - diff --git a/requirements.txt b/requirements.txt index 356c1f9..7697a50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ Flask-WTF>=1.2.1 pytest>=7.4 requests>=2.31 boto3>=1.34 +gunicorn>=21.2.0 -- 2.49.1 From 9064f9d60eef3332d83c89215c7c624fb608f293 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 15:13:33 +0800 Subject: [PATCH 12/20] Fix CSRF token issue on login --- app/__init__.py | 5 +++++ app/config.py | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index cf75a06..5afd75c 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -81,6 +81,11 @@ def create_app( app.extensions["connections"] = connections app.extensions["replication"] = replication + @app.after_request + def set_server_header(response): + response.headers["Server"] = "MyFSIO" + return response + @app.errorhandler(500) def internal_error(error): return render_template('500.html'), 500 diff --git a/app/config.py b/app/config.py index 282b3a9..2033cbd 100644 --- a/app/config.py +++ b/app/config.py @@ -78,11 +78,25 @@ class AppConfig: multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024)) default_secret = "dev-secret-key" secret_key = str(_get("SECRET_KEY", default_secret)) + + # If using default/missing secret, try to load/persist a generated one from disk + # This ensures consistency across Gunicorn workers if not secret_key or secret_key == default_secret: - generated = secrets.token_urlsafe(32) - if secret_key == default_secret: - warnings.warn("Using insecure default SECRET_KEY. A random value has been generated; set SECRET_KEY for production", RuntimeWarning) - secret_key = generated + secret_file = storage_root / ".myfsio.sys" / "config" / ".secret" + if secret_file.exists(): + secret_key = secret_file.read_text().strip() + else: + generated = secrets.token_urlsafe(32) + if secret_key == default_secret: + warnings.warn("Using insecure default SECRET_KEY. A random value has been generated and persisted; set SECRET_KEY for production", RuntimeWarning) + try: + secret_file.parent.mkdir(parents=True, exist_ok=True) + secret_file.write_text(generated) + secret_key = generated + except OSError: + # Fallback if we can't write to disk (e.g. read-only fs) + secret_key = generated + iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ -- 2.49.1 From 7d1735a59f82921c7f72fbaef3b22c61602a0f56 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 15:20:42 +0800 Subject: [PATCH 13/20] Fix server headers --- app/__init__.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 5afd75c..a3b8989 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -81,11 +81,6 @@ def create_app( app.extensions["connections"] = connections app.extensions["replication"] = replication - @app.after_request - def set_server_header(response): - response.headers["Server"] = "MyFSIO" - return response - @app.errorhandler(500) def internal_error(error): return render_template('500.html'), 500 @@ -220,5 +215,5 @@ def _configure_logging(app: Flask) -> None: }, ) response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" - response.headers["Server"] = "MyFISO" + response.headers["Server"] = "MyFSIO" return response -- 2.49.1 From 57c40dcdcc112286eb9ec9dff1e4fe0e813b415b Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 15:51:43 +0800 Subject: [PATCH 14/20] Test server header --- docker-entrypoint.sh | 4 ++-- gunicorn_conf.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 gunicorn_conf.py diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 6fbaf2e..56389ad 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -2,7 +2,7 @@ set -e # Start API server in background -gunicorn "app:create_api_app()" --bind 0.0.0.0:5000 --workers 4 --access-logfile - & +gunicorn "app:create_api_app()" -c gunicorn_conf.py --bind 0.0.0.0:5000 --workers 4 --access-logfile - & # Start UI server in foreground -gunicorn "app:create_ui_app()" --bind 0.0.0.0:5100 --workers 4 --access-logfile - +gunicorn "app:create_ui_app()" -c gunicorn_conf.py --bind 0.0.0.0:5100 --workers 4 --access-logfile - diff --git a/gunicorn_conf.py b/gunicorn_conf.py new file mode 100644 index 0000000..3bec3cd --- /dev/null +++ b/gunicorn_conf.py @@ -0,0 +1,3 @@ +import gunicorn +gunicorn.SERVER_SOFTWARE = 'MyFSIO' + -- 2.49.1 From e4b92a32a139208f3aebec635e6fcf4a4d0ad8c5 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 16:09:24 +0800 Subject: [PATCH 15/20] Fix and test custom server header --- gunicorn.conf.py | 3 +++ gunicorn_conf.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 gunicorn.conf.py delete mode 100644 gunicorn_conf.py diff --git a/gunicorn.conf.py b/gunicorn.conf.py new file mode 100644 index 0000000..80964a3 --- /dev/null +++ b/gunicorn.conf.py @@ -0,0 +1,3 @@ +import gunicorn +gunicorn.SERVER = 'MyFSIO' + diff --git a/gunicorn_conf.py b/gunicorn_conf.py deleted file mode 100644 index 3bec3cd..0000000 --- a/gunicorn_conf.py +++ /dev/null @@ -1,3 +0,0 @@ -import gunicorn -gunicorn.SERVER_SOFTWARE = 'MyFSIO' - -- 2.49.1 From 1116353d0f0a710e8697eb554902804f6a8a6620 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 16:10:15 +0800 Subject: [PATCH 16/20] Update docker-entrypoint.sh --- docker-entrypoint.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 56389ad..274d611 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -2,7 +2,7 @@ set -e # Start API server in background -gunicorn "app:create_api_app()" -c gunicorn_conf.py --bind 0.0.0.0:5000 --workers 4 --access-logfile - & +gunicorn "app:create_api_app()" -c gunicorn.conf.py --bind 0.0.0.0:5000 --workers 4 --access-logfile - & # Start UI server in foreground -gunicorn "app:create_ui_app()" -c gunicorn_conf.py --bind 0.0.0.0:5100 --workers 4 --access-logfile - +gunicorn "app:create_ui_app()" -c gunicorn.conf.py --bind 0.0.0.0:5100 --workers 4 --access-logfile - -- 2.49.1 From ebef3dfa5766ec576ce10d8521c9aa5414d36f40 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 16:16:10 +0800 Subject: [PATCH 17/20] Second test for Server Header change --- app/__init__.py | 17 ++++++++++++++++- docker-entrypoint.sh | 4 ++-- gunicorn.conf.py | 3 --- 3 files changed, 18 insertions(+), 6 deletions(-) delete mode 100644 gunicorn.conf.py diff --git a/app/__init__.py b/app/__init__.py index a3b8989..ef8b4b2 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -25,6 +25,21 @@ from .storage import ObjectStorage from .version import get_version +class ServerHeaderMiddleware: + def __init__(self, app, server_name): + self.app = app + self.server_name = server_name + + def __call__(self, environ, start_response): + def custom_start_response(status, headers, exc_info=None): + # Remove existing Server header if present + headers = [(name, value) for name, value in headers if name.lower() != 'server'] + headers.append(('Server', self.server_name)) + return start_response(status, headers, exc_info) + + return self.app(environ, custom_start_response) + + def create_app( test_config: Optional[Dict[str, Any]] = None, *, @@ -50,6 +65,7 @@ def create_app( # Trust X-Forwarded-* headers from proxies app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) + app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app, "MyFSIO") _configure_cors(app) _configure_logging(app) @@ -215,5 +231,4 @@ def _configure_logging(app: Flask) -> None: }, ) response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" - response.headers["Server"] = "MyFSIO" return response diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 274d611..6fbaf2e 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -2,7 +2,7 @@ set -e # Start API server in background -gunicorn "app:create_api_app()" -c gunicorn.conf.py --bind 0.0.0.0:5000 --workers 4 --access-logfile - & +gunicorn "app:create_api_app()" --bind 0.0.0.0:5000 --workers 4 --access-logfile - & # Start UI server in foreground -gunicorn "app:create_ui_app()" -c gunicorn.conf.py --bind 0.0.0.0:5100 --workers 4 --access-logfile - +gunicorn "app:create_ui_app()" --bind 0.0.0.0:5100 --workers 4 --access-logfile - diff --git a/gunicorn.conf.py b/gunicorn.conf.py deleted file mode 100644 index 80964a3..0000000 --- a/gunicorn.conf.py +++ /dev/null @@ -1,3 +0,0 @@ -import gunicorn -gunicorn.SERVER = 'MyFSIO' - -- 2.49.1 From c8b1c331180e9e334fa6a2dd127a300b518c7182 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 17:20:52 +0800 Subject: [PATCH 18/20] Switch gunicorn to waitress --- app/__init__.py | 16 ---------------- docker-entrypoint.sh | 4 ++-- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index ef8b4b2..057579e 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -25,21 +25,6 @@ from .storage import ObjectStorage from .version import get_version -class ServerHeaderMiddleware: - def __init__(self, app, server_name): - self.app = app - self.server_name = server_name - - def __call__(self, environ, start_response): - def custom_start_response(status, headers, exc_info=None): - # Remove existing Server header if present - headers = [(name, value) for name, value in headers if name.lower() != 'server'] - headers.append(('Server', self.server_name)) - return start_response(status, headers, exc_info) - - return self.app(environ, custom_start_response) - - def create_app( test_config: Optional[Dict[str, Any]] = None, *, @@ -65,7 +50,6 @@ def create_app( # Trust X-Forwarded-* headers from proxies app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) - app.wsgi_app = ServerHeaderMiddleware(app.wsgi_app, "MyFSIO") _configure_cors(app) _configure_logging(app) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 6fbaf2e..90d68a4 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -2,7 +2,7 @@ set -e # Start API server in background -gunicorn "app:create_api_app()" --bind 0.0.0.0:5000 --workers 4 --access-logfile - & +waitress-serve --ident=MyFSIO --listen=*:5000 --call app:create_api_app & # Start UI server in foreground -gunicorn "app:create_ui_app()" --bind 0.0.0.0:5100 --workers 4 --access-logfile - +waitress-serve --ident=MyFSIO --listen=*:5100 --call app:create_ui_app diff --git a/requirements.txt b/requirements.txt index 7697a50..43f1ae7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ Flask-WTF>=1.2.1 pytest>=7.4 requests>=2.31 boto3>=1.34 -gunicorn>=21.2.0 +waitress>=2.1.2 -- 2.49.1 From 015c9cb52d3aa31c7e0c86c526f67cdf73a4eeba Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 17:41:23 +0800 Subject: [PATCH 19/20] Cleanup setup --- docker-entrypoint.sh | 7 ++----- run.py | 33 +++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 90d68a4..08c3a3a 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,8 +1,5 @@ #!/bin/sh set -e -# Start API server in background -waitress-serve --ident=MyFSIO --listen=*:5000 --call app:create_api_app & - -# Start UI server in foreground -waitress-serve --ident=MyFSIO --listen=*:5100 --call app:create_ui_app +# Run both services using the python runner in production mode +exec python run.py --prod diff --git a/run.py b/run.py index efd12a2..230ca48 100644 --- a/run.py +++ b/run.py @@ -18,20 +18,28 @@ def _is_debug_enabled() -> bool: return os.getenv("FLASK_DEBUG", "0").lower() in ("1", "true", "yes") -def serve_api(port: int) -> None: +def serve_api(port: int, prod: bool = False) -> None: app = create_api_app() - debug = _is_debug_enabled() - if debug: - warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning) - app.run(host=_server_host(), port=port, debug=debug) + if prod: + from waitress import serve + serve(app, host=_server_host(), port=port, ident="MyFSIO") + else: + debug = _is_debug_enabled() + if debug: + warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning) + app.run(host=_server_host(), port=port, debug=debug) -def serve_ui(port: int) -> None: +def serve_ui(port: int, prod: bool = False) -> None: app = create_ui_app() - debug = _is_debug_enabled() - if debug: - warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning) - app.run(host=_server_host(), port=port, debug=debug) + if prod: + from waitress import serve + serve(app, host=_server_host(), port=port, ident="MyFSIO") + else: + debug = _is_debug_enabled() + if debug: + warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning) + app.run(host=_server_host(), port=port, debug=debug) if __name__ == "__main__": @@ -39,18 +47,19 @@ if __name__ == "__main__": parser.add_argument("--mode", choices=["api", "ui", "both"], default="both") parser.add_argument("--api-port", type=int, default=5000) parser.add_argument("--ui-port", type=int, default=5100) + parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress") args = parser.parse_args() if args.mode in {"api", "both"}: print(f"Starting API server on port {args.api_port}...") - api_proc = Process(target=serve_api, args=(args.api_port,), daemon=True) + api_proc = Process(target=serve_api, args=(args.api_port, args.prod), daemon=True) api_proc.start() else: api_proc = None if args.mode in {"ui", "both"}: print(f"Starting UI server on port {args.ui_port}...") - serve_ui(args.ui_port) + serve_ui(args.ui_port, args.prod) elif api_proc: try: api_proc.join() -- 2.49.1 From dddab6dbbcbc9ddde83f7971ec6baf7a4d1b8d41 Mon Sep 17 00:00:00 2001 From: kqjy Date: Sat, 22 Nov 2025 17:47:01 +0800 Subject: [PATCH 20/20] Change logging method --- app/__init__.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 057579e..3ea9264 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations import logging +import sys import time import uuid from logging.handlers import RotatingFileHandler @@ -171,23 +172,33 @@ class _RequestContextFilter(logging.Filter): def _configure_logging(app: Flask) -> None: - log_file = Path(app.config["LOG_FILE"]) - log_file.parent.mkdir(parents=True, exist_ok=True) - handler = RotatingFileHandler( - log_file, - maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)), - backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)), - encoding="utf-8", - ) formatter = logging.Formatter( "%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s" ) - handler.setFormatter(formatter) - handler.addFilter(_RequestContextFilter()) + + # Stream Handler (stdout) - Primary for Docker + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setFormatter(formatter) + stream_handler.addFilter(_RequestContextFilter()) logger = app.logger logger.handlers.clear() - logger.addHandler(handler) + logger.addHandler(stream_handler) + + # File Handler (optional, if configured) + if app.config.get("LOG_TO_FILE"): + log_file = Path(app.config["LOG_FILE"]) + log_file.parent.mkdir(parents=True, exist_ok=True) + file_handler = RotatingFileHandler( + log_file, + maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)), + backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)), + encoding="utf-8", + ) + file_handler.setFormatter(formatter) + file_handler.addFilter(_RequestContextFilter()) + logger.addHandler(file_handler) + logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO)) @app.before_request -- 2.49.1