diff --git a/app/config.py b/app/config.py index 2778963..8e04441 100644 --- a/app/config.py +++ b/app/config.py @@ -90,6 +90,10 @@ class AppConfig: operation_metrics_enabled: bool operation_metrics_interval_minutes: int operation_metrics_retention_hours: int + server_threads: int + server_connection_limit: int + server_backlog: int + server_channel_timeout: int @classmethod def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": @@ -193,6 +197,11 @@ class AppConfig: operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5)) operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24)) + server_threads = int(_get("SERVER_THREADS", 4)) + server_connection_limit = int(_get("SERVER_CONNECTION_LIMIT", 100)) + server_backlog = int(_get("SERVER_BACKLOG", 1024)) + server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120)) + return cls(storage_root=storage_root, max_upload_size=max_upload_size, ui_page_size=ui_page_size, @@ -236,7 +245,11 @@ class AppConfig: metrics_history_interval_minutes=metrics_history_interval_minutes, operation_metrics_enabled=operation_metrics_enabled, operation_metrics_interval_minutes=operation_metrics_interval_minutes, - operation_metrics_retention_hours=operation_metrics_retention_hours) + operation_metrics_retention_hours=operation_metrics_retention_hours, + server_threads=server_threads, + server_connection_limit=server_connection_limit, + server_backlog=server_backlog, + server_channel_timeout=server_channel_timeout) def validate_and_report(self) -> list[str]: """Validate configuration and return a list of warnings/issues. @@ -296,7 +309,35 @@ class AppConfig: if "*" in self.cors_origins: issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.") - + + if not (1 <= self.server_threads <= 64): + issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.") + if not (10 <= self.server_connection_limit <= 1000): + issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.") + if not (64 <= self.server_backlog <= 4096): + issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.") + if not (10 <= self.server_channel_timeout <= 300): + issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.") + + if sys.platform != "win32": + try: + import resource + soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE) + threshold = int(soft_limit * 0.8) + if self.server_connection_limit > threshold: + issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.") + except (ImportError, OSError): + pass + + try: + import psutil + available_mb = psutil.virtual_memory().available / (1024 * 1024) + estimated_mb = self.server_threads * 50 + if estimated_mb > available_mb * 0.5: + issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).") + except ImportError: + pass + return issues def print_startup_summary(self) -> None: @@ -314,6 +355,10 @@ class AppConfig: print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})") if self.kms_enabled: print(f" KMS: Enabled (Keys: {self.kms_keys_path})") + print(f" SERVER_THREADS: {self.server_threads}") + print(f" CONNECTION_LIMIT: {self.server_connection_limit}") + print(f" BACKLOG: {self.server_backlog}") + print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s") print("=" * 60) issues = self.validate_and_report() @@ -371,4 +416,8 @@ class AppConfig: "OPERATION_METRICS_ENABLED": self.operation_metrics_enabled, "OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes, "OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours, + "SERVER_THREADS": self.server_threads, + "SERVER_CONNECTION_LIMIT": self.server_connection_limit, + "SERVER_BACKLOG": self.server_backlog, + "SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout, } diff --git a/docs.md b/docs.md index c14d40a..54f956b 100644 --- a/docs.md +++ b/docs.md @@ -168,6 +168,15 @@ All configuration is done via environment variables. The table below lists every | `RATE_LIMIT_DEFAULT` | `200 per minute` | Default rate limit for API endpoints. | | `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. | +### Server Configuration + +| Variable | Default | Notes | +| --- | --- | --- | +| `SERVER_THREADS` | `4` | Waitress worker threads (1-64). More threads handle more concurrent requests but use more memory. | +| `SERVER_CONNECTION_LIMIT` | `100` | Maximum concurrent connections (10-1000). Ensure OS file descriptor limits support this value. | +| `SERVER_BACKLOG` | `1024` | TCP listen backlog (64-4096). Connections queue here when all threads are busy. | +| `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). | + ### Logging | Variable | Default | Notes | diff --git a/run.py b/run.py index 3de61c4..013f79c 100644 --- a/run.py +++ b/run.py @@ -18,6 +18,8 @@ for _env_file in [ if _env_file.exists(): load_dotenv(_env_file, override=True) +from typing import Optional + from app import create_api_app, create_ui_app from app.config import AppConfig @@ -36,11 +38,23 @@ def _is_frozen() -> bool: return getattr(sys, 'frozen', False) or '__compiled__' in globals() -def serve_api(port: int, prod: bool = False) -> None: +def serve_api(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None: app = create_api_app() if prod: from waitress import serve - serve(app, host=_server_host(), port=port, ident="MyFSIO") + if config: + serve( + app, + host=_server_host(), + port=port, + ident="MyFSIO", + threads=config.server_threads, + connection_limit=config.server_connection_limit, + backlog=config.server_backlog, + channel_timeout=config.server_channel_timeout, + ) + else: + serve(app, host=_server_host(), port=port, ident="MyFSIO") else: debug = _is_debug_enabled() if debug: @@ -48,11 +62,23 @@ def serve_api(port: int, prod: bool = False) -> None: app.run(host=_server_host(), port=port, debug=debug) -def serve_ui(port: int, prod: bool = False) -> None: +def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None: app = create_ui_app() if prod: from waitress import serve - serve(app, host=_server_host(), port=port, ident="MyFSIO") + if config: + serve( + app, + host=_server_host(), + port=port, + ident="MyFSIO", + threads=config.server_threads, + connection_limit=config.server_connection_limit, + backlog=config.server_backlog, + channel_timeout=config.server_channel_timeout, + ) + else: + serve(app, host=_server_host(), port=port, ident="MyFSIO") else: debug = _is_debug_enabled() if debug: @@ -71,7 +97,6 @@ if __name__ == "__main__": parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit") args = parser.parse_args() - # Handle config check/show modes if args.check_config or args.show_config: config = AppConfig.from_env() config.print_startup_summary() @@ -81,49 +106,50 @@ if __name__ == "__main__": sys.exit(1 if critical else 0) sys.exit(0) - # Default to production mode when running as compiled binary - # unless --dev is explicitly passed prod_mode = args.prod or (_is_frozen() and not args.dev) - # Validate configuration before starting config = AppConfig.from_env() - # Show startup summary only on first run (when marker file doesn't exist) first_run_marker = config.storage_root / ".myfsio.sys" / ".initialized" is_first_run = not first_run_marker.exists() if is_first_run: config.print_startup_summary() - # Check for critical issues that should prevent startup issues = config.validate_and_report() critical_issues = [i for i in issues if i.startswith("CRITICAL:")] if critical_issues: - print("ABORTING: Critical configuration issues detected. Fix them before starting.") + print("ABORTING: Critical configuration issues detected. Please fix them before starting.") sys.exit(1) - # Create the marker file to indicate successful first run try: first_run_marker.parent.mkdir(parents=True, exist_ok=True) first_run_marker.write_text(f"Initialized on {__import__('datetime').datetime.now().isoformat()}\n") except OSError: - pass # Non-critical, just skip marker creation + pass if prod_mode: print("Running in production mode (Waitress)") + issues = config.validate_and_report() + critical_issues = [i for i in issues if i.startswith("CRITICAL:")] + if critical_issues: + for issue in critical_issues: + print(f" {issue}") + print("ABORTING: Critical configuration issues detected. Please fix them before starting.") + sys.exit(1) else: print("Running in development mode (Flask dev server)") if args.mode in {"api", "both"}: print(f"Starting API server on port {args.api_port}...") - api_proc = Process(target=serve_api, args=(args.api_port, prod_mode), daemon=True) + api_proc = Process(target=serve_api, args=(args.api_port, prod_mode, config), daemon=True) api_proc.start() else: api_proc = None if args.mode in {"ui", "both"}: print(f"Starting UI server on port {args.ui_port}...") - serve_ui(args.ui_port, prod_mode) + serve_ui(args.ui_port, prod_mode, config) elif api_proc: try: api_proc.join() diff --git a/templates/docs.html b/templates/docs.html index fb12407..e0794c9 100644 --- a/templates/docs.html +++ b/templates/docs.html @@ -157,6 +157,29 @@ python run.py --mode ui
200 per minuteSERVER_THREADS4SERVER_CONNECTION_LIMIT100SERVER_BACKLOG1024SERVER_CHANNEL_TIMEOUT120