Switch from Waitress to Granian (Rust/hyper WSGI server) for improved concurrency

2026-03-14 18:17:39 +08:00
parent 55568d6892
commit d878134ebf
6 changed files with 43 additions and 43 deletions
--- a/app/init.py
+++ b/app/init.py
@@ -107,7 +107,6 @@ def create_app(
            )
        app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)

-    # Enable gzip compression for responses (10-20x smaller JSON payloads)
    if app.config.get("ENABLE_GZIP", True):
        app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)

@@ -678,6 +677,7 @@ def _configure_logging(app: Flask) -> None:
                },
            )
        response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
+        response.headers["Server"] = "MyFSIO"

        operation_metrics = app.extensions.get("operation_metrics")
        if operation_metrics:
--- a/app/config.py
+++ b/app/config.py
@@ -25,7 +25,7 @@ def _calculate_auto_connection_limit() -> int:


 def _calculate_auto_backlog(connection_limit: int) -> int:
-    return max(64, min(connection_limit * 2, 4096))
+    return max(128, min(connection_limit * 2, 4096))


 def _validate_rate_limit(value: str) -> str:
@@ -504,8 +504,8 @@ class AppConfig:
            issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
        if not (10 <= self.server_connection_limit <= 1000):
            issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
-        if not (64 <= self.server_backlog <= 4096):
-            issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
+        if not (128 <= self.server_backlog <= 4096):
+            issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
        if not (10 <= self.server_channel_timeout <= 300):
            issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")

--- a/docs.md
+++ b/docs.md
@@ -180,9 +180,9 @@ All configuration is done via environment variables. The table below lists every

 | Variable | Default | Notes |
 | --- | --- | --- |
-| `SERVER_THREADS` | `0` (auto) | Waitress worker threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
-| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent connections (10-1000). Set to `0` for auto-calculation based on available RAM. |
-| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (64-4096). Set to `0` for auto-calculation (connection_limit × 2). |
+| `SERVER_THREADS` | `0` (auto) | Granian blocking threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
+| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent requests per worker (10-1000). Set to `0` for auto-calculation based on available RAM. |
+| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (128-4096). Set to `0` for auto-calculation (connection_limit × 2). |
 | `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). |

 ### Logging
@@ -339,7 +339,7 @@ Before deploying to production, ensure you:
 4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination
 5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs
 6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption
-7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server
+7. **Use `--prod` flag** - Runs with Granian instead of Flask dev server
 8. **Set credential expiry** - Assign `expires_at` to non-admin users for time-limited access

 ### Proxy Configuration
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ python-dotenv>=1.2.1
 pytest>=9.0.2
 requests>=2.32.5
 boto3>=1.42.14
-waitress>=3.0.2
+granian>=2.2.0
 psutil>=7.1.3
 cryptography>=46.0.3
 defusedxml>=0.7.1
--- a/run.py
+++ b/run.py
@@ -40,24 +40,37 @@ def _is_frozen() -> bool:
    return getattr(sys, 'frozen', False) or '__compiled__' in globals()


+def _serve_granian(target: str, port: int, config: Optional[AppConfig] = None) -> None:
+    from granian import Granian
+    from granian.constants import Interfaces
+    from granian.http import HTTP1Settings
+
+    kwargs: dict = {
+        "target": target,
+        "address": _server_host(),
+        "port": port,
+        "interface": Interfaces.WSGI,
+        "factory": True,
+        "workers": 1,
+    }
+
+    if config:
+        kwargs["blocking_threads"] = config.server_threads
+        kwargs["backlog"] = config.server_backlog
+        kwargs["backpressure"] = config.server_connection_limit
+        kwargs["http1_settings"] = HTTP1Settings(
+            header_read_timeout=config.server_channel_timeout * 1000,
+        )
+
+    server = Granian(**kwargs)
+    server.serve()
+
+
 def serve_api(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
-    app = create_api_app()
    if prod:
-        from waitress import serve
-        if config:
-            serve(
-                app,
-                host=_server_host(),
-                port=port,
-                ident="MyFSIO",
-                threads=config.server_threads,
-                connection_limit=config.server_connection_limit,
-                backlog=config.server_backlog,
-                channel_timeout=config.server_channel_timeout,
-            )
-        else:
-            serve(app, host=_server_host(), port=port, ident="MyFSIO")
+        _serve_granian("app:create_api_app", port, config)
    else:
+        app = create_api_app()
        debug = _is_debug_enabled()
        if debug:
            warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning)
@@ -65,23 +78,10 @@ def serve_api(port: int, prod: bool = False, config: Optional[AppConfig] = None)


 def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
-    app = create_ui_app()
    if prod:
-        from waitress import serve
-        if config:
-            serve(
-                app,
-                host=_server_host(),
-                port=port,
-                ident="MyFSIO",
-                threads=config.server_threads,
-                connection_limit=config.server_connection_limit,
-                backlog=config.server_backlog,
-                channel_timeout=config.server_channel_timeout,
-            )
-        else:
-            serve(app, host=_server_host(), port=port, ident="MyFSIO")
+        _serve_granian("app:create_ui_app", port, config)
    else:
+        app = create_ui_app()
        debug = _is_debug_enabled()
        if debug:
            warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning)
@@ -192,7 +192,7 @@ if __name__ == "__main__":
    parser.add_argument("--mode", choices=["api", "ui", "both", "reset-cred"], default="both")
    parser.add_argument("--api-port", type=int, default=5000)
    parser.add_argument("--ui-port", type=int, default=5100)
-    parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress")
+    parser.add_argument("--prod", action="store_true", help="Run in production mode using Granian")
    parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)")
    parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit")
    parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit")
@@ -235,7 +235,7 @@ if __name__ == "__main__":
            pass
    
    if prod_mode:
-        print("Running in production mode (Waitress)")
+        print("Running in production mode (Granian)")
        issues = config.validate_and_report()
        critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
        if critical_issues:
--- a/templates/docs.html
+++ b/templates/docs.html
@@ -84,7 +84,7 @@ pip install -r requirements.txt
 # Run both API and UI (Development)
 python run.py

-# Run in Production (Waitress server)
+# Run in Production (Granian server)
 python run.py --prod

 # Or run individually
@@ -220,7 +220,7 @@ python run.py --mode ui
              <tr>
                <td><code>SERVER_THREADS</code></td>
                <td><code>0</code> (auto)</td>
-                <td>Waitress worker threads (1-64). 0 = auto (CPU cores × 2).</td>
+                <td>Granian blocking threads (1-64). 0 = auto (CPU cores × 2).</td>
              </tr>
              <tr>
                <td><code>SERVER_CONNECTION_LIMIT</code></td>