From a7f9b0a22fd08ae635447d53fee780f3e5fbc28a Mon Sep 17 00:00:00 2001 From: kqjy Date: Mon, 23 Mar 2026 17:14:04 +0800 Subject: [PATCH] Convert GC to async with polling to prevent proxy timeouts --- app/admin_api.py | 12 ++-- app/gc.py | 99 +++++++++++++++++++----------- app/ui.py | 43 ++++++++++--- templates/system.html | 139 +++++++++++++++++++++++++++++++----------- tests/test_gc.py | 10 ++- 5 files changed, 218 insertions(+), 85 deletions(-) diff --git a/app/admin_api.py b/app/admin_api.py index d09658d..89c87f0 100644 --- a/app/admin_api.py +++ b/app/admin_api.py @@ -907,15 +907,11 @@ def gc_run_now(): if not gc: return _json_error("InvalidRequest", "GC is not enabled", 400) payload = request.get_json(silent=True) or {} - original_dry_run = gc.dry_run - if "dry_run" in payload: - gc.dry_run = bool(payload["dry_run"]) - try: - result = gc.run_now() - finally: - gc.dry_run = original_dry_run + started = gc.run_async(dry_run=payload.get("dry_run")) logger.info("GC manual run by %s", principal.access_key) - return jsonify(result.to_dict()) + if not started: + return _json_error("Conflict", "GC is already in progress", 409) + return jsonify({"status": "started"}) @admin_api_bp.route("/gc/history", methods=["GET"]) diff --git a/app/gc.py b/app/gc.py index 85f2617..16fa3b7 100644 --- a/app/gc.py +++ b/app/gc.py @@ -173,6 +173,8 @@ class GarbageCollector: self._timer: Optional[threading.Timer] = None self._shutdown = False self._lock = threading.Lock() + self._scanning = False + self._scan_start_time: Optional[float] = None self._io_throttle = max(0, io_throttle_ms) / 1000.0 self.history_store = GCHistoryStore(storage_root, max_records=max_history) @@ -214,45 +216,70 @@ class GarbageCollector: finally: self._schedule_next() - def run_now(self) -> GCResult: - start = time.time() - result = GCResult() + def run_now(self, dry_run: Optional[bool] = None) -> GCResult: + if not self._lock.acquire(blocking=False): + raise RuntimeError("GC is already in progress") - self._clean_temp_files(result) - self._clean_orphaned_multipart(result) - self._clean_stale_locks(result) - self._clean_orphaned_metadata(result) - self._clean_orphaned_versions(result) - self._clean_empty_dirs(result) + effective_dry_run = dry_run if dry_run is not None else self.dry_run - result.execution_time_seconds = time.time() - start + try: + self._scanning = True + self._scan_start_time = time.time() - if result.has_work or result.errors: - logger.info( - "GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), " - "locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s", - result.execution_time_seconds, - result.temp_files_deleted, - result.temp_bytes_freed / (1024 * 1024), - result.multipart_uploads_deleted, - result.multipart_bytes_freed / (1024 * 1024), - result.lock_files_deleted, - result.orphaned_metadata_deleted, - result.orphaned_versions_deleted, - result.orphaned_version_bytes_freed / (1024 * 1024), - result.empty_dirs_removed, - len(result.errors), - " (dry run)" if self.dry_run else "", + start = self._scan_start_time + result = GCResult() + + original_dry_run = self.dry_run + self.dry_run = effective_dry_run + try: + self._clean_temp_files(result) + self._clean_orphaned_multipart(result) + self._clean_stale_locks(result) + self._clean_orphaned_metadata(result) + self._clean_orphaned_versions(result) + self._clean_empty_dirs(result) + finally: + self.dry_run = original_dry_run + + result.execution_time_seconds = time.time() - start + + if result.has_work or result.errors: + logger.info( + "GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), " + "locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s", + result.execution_time_seconds, + result.temp_files_deleted, + result.temp_bytes_freed / (1024 * 1024), + result.multipart_uploads_deleted, + result.multipart_bytes_freed / (1024 * 1024), + result.lock_files_deleted, + result.orphaned_metadata_deleted, + result.orphaned_versions_deleted, + result.orphaned_version_bytes_freed / (1024 * 1024), + result.empty_dirs_removed, + len(result.errors), + " (dry run)" if effective_dry_run else "", + ) + + record = GCExecutionRecord( + timestamp=time.time(), + result=result.to_dict(), + dry_run=effective_dry_run, ) + self.history_store.add(record) - record = GCExecutionRecord( - timestamp=time.time(), - result=result.to_dict(), - dry_run=self.dry_run, - ) - self.history_store.add(record) + return result + finally: + self._scanning = False + self._scan_start_time = None + self._lock.release() - return result + def run_async(self, dry_run: Optional[bool] = None) -> bool: + if self._scanning: + return False + t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True) + t.start() + return True def _system_path(self) -> Path: return self.storage_root / self.SYSTEM_ROOT @@ -553,9 +580,10 @@ class GarbageCollector: return [r.to_dict() for r in records] def get_status(self) -> dict: - return { + status: Dict[str, Any] = { "enabled": not self._shutdown or self._timer is not None, "running": self._timer is not None and not self._shutdown, + "scanning": self._scanning, "interval_hours": self.interval_seconds / 3600.0, "temp_file_max_age_hours": self.temp_file_max_age_hours, "multipart_max_age_days": self.multipart_max_age_days, @@ -563,3 +591,6 @@ class GarbageCollector: "dry_run": self.dry_run, "io_throttle_ms": round(self._io_throttle * 1000), } + if self._scanning and self._scan_start_time: + status["scan_elapsed_seconds"] = time.time() - self._scan_start_time + return status diff --git a/app/ui.py b/app/ui.py index aff4fa4..4423a98 100644 --- a/app/ui.py +++ b/app/ui.py @@ -4179,14 +4179,43 @@ def system_gc_run(): return jsonify({"error": "GC is not enabled"}), 400 payload = request.get_json(silent=True) or {} - original_dry_run = gc.dry_run - if "dry_run" in payload: - gc.dry_run = bool(payload["dry_run"]) + started = gc.run_async(dry_run=payload.get("dry_run")) + if not started: + return jsonify({"error": "GC is already in progress"}), 409 + return jsonify({"status": "started"}) + + +@ui_bp.get("/system/gc/status") +def system_gc_status(): + principal = _current_principal() try: - result = gc.run_now() - finally: - gc.dry_run = original_dry_run - return jsonify(result.to_dict()) + _iam().authorize(principal, None, "iam:*") + except IamError: + return jsonify({"error": "Access denied"}), 403 + + gc = current_app.extensions.get("gc") + if not gc: + return jsonify({"error": "GC is not enabled"}), 400 + + return jsonify(gc.get_status()) + + +@ui_bp.get("/system/gc/history") +def system_gc_history(): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:*") + except IamError: + return jsonify({"error": "Access denied"}), 403 + + gc = current_app.extensions.get("gc") + if not gc: + return jsonify({"executions": []}) + + limit = min(int(request.args.get("limit", 10)), 200) + offset = int(request.args.get("offset", 0)) + records = gc.get_history(limit=limit, offset=offset) + return jsonify({"executions": records}) @ui_bp.post("/system/integrity/run") diff --git a/templates/system.html b/templates/system.html index ed3908d..f306507 100644 --- a/templates/system.html +++ b/templates/system.html @@ -122,6 +122,13 @@ +
+
+
+ GC in progress +
+
+
@@ -376,9 +383,92 @@ return (i === 0 ? b : b.toFixed(1)) + ' ' + units[i]; } + var _gcPollTimer = null; + var _gcLastDryRun = false; + + function _gcSetScanning(scanning) { + var banner = document.getElementById('gcScanningBanner'); + var btns = ['gcRunBtn', 'gcDryRunBtn']; + if (scanning) { + banner.classList.remove('d-none'); + btns.forEach(function (id) { + var el = document.getElementById(id); + if (el) el.disabled = true; + }); + } else { + banner.classList.add('d-none'); + document.getElementById('gcScanElapsed').textContent = ''; + btns.forEach(function (id) { + var el = document.getElementById(id); + if (el) el.disabled = false; + }); + } + } + + function _gcShowResult(data, dryRun) { + var container = document.getElementById('gcResult'); + var alert = document.getElementById('gcResultAlert'); + var title = document.getElementById('gcResultTitle'); + var body = document.getElementById('gcResultBody'); + container.classList.remove('d-none'); + + var totalItems = (data.temp_files_deleted || 0) + (data.multipart_uploads_deleted || 0) + + (data.lock_files_deleted || 0) + (data.orphaned_metadata_deleted || 0) + + (data.orphaned_versions_deleted || 0) + (data.empty_dirs_removed || 0); + var totalFreed = (data.temp_bytes_freed || 0) + (data.multipart_bytes_freed || 0) + + (data.orphaned_version_bytes_freed || 0); + + alert.className = totalItems > 0 ? 'alert alert-success mb-0 small' : 'alert alert-info mb-0 small'; + title.textContent = (dryRun ? '[Dry Run] ' : '') + 'Completed in ' + (data.execution_time_seconds || 0).toFixed(2) + 's'; + + var lines = []; + if (data.temp_files_deleted) lines.push('Temp files: ' + data.temp_files_deleted + ' (' + formatBytes(data.temp_bytes_freed) + ')'); + if (data.multipart_uploads_deleted) lines.push('Multipart uploads: ' + data.multipart_uploads_deleted + ' (' + formatBytes(data.multipart_bytes_freed) + ')'); + if (data.lock_files_deleted) lines.push('Lock files: ' + data.lock_files_deleted); + if (data.orphaned_metadata_deleted) lines.push('Orphaned metadata: ' + data.orphaned_metadata_deleted); + if (data.orphaned_versions_deleted) lines.push('Orphaned versions: ' + data.orphaned_versions_deleted + ' (' + formatBytes(data.orphaned_version_bytes_freed) + ')'); + if (data.empty_dirs_removed) lines.push('Empty directories: ' + data.empty_dirs_removed); + if (totalItems === 0) lines.push('Nothing to clean up.'); + if (totalFreed > 0) lines.push('Total freed: ' + formatBytes(totalFreed)); + if (data.errors && data.errors.length > 0) lines.push('Errors: ' + data.errors.join(', ')); + + body.innerHTML = lines.join('
'); + } + + function _gcPoll() { + fetch('{{ url_for("ui.system_gc_status") }}', { + headers: {'X-CSRFToken': csrfToken} + }) + .then(function (r) { return r.json(); }) + .then(function (status) { + if (status.scanning) { + var elapsed = status.scan_elapsed_seconds || 0; + document.getElementById('gcScanElapsed').textContent = ' (' + elapsed.toFixed(0) + 's)'; + _gcPollTimer = setTimeout(_gcPoll, 2000); + } else { + _gcSetScanning(false); + fetch('{{ url_for("ui.system_gc_history") }}?limit=1', { + headers: {'X-CSRFToken': csrfToken} + }) + .then(function (r) { return r.json(); }) + .then(function (hist) { + if (hist.executions && hist.executions.length > 0) { + var latest = hist.executions[0]; + _gcShowResult(latest.result, latest.dry_run); + } + }) + .catch(function () {}); + } + }) + .catch(function () { + _gcPollTimer = setTimeout(_gcPoll, 3000); + }); + } + window.runGC = function (dryRun) { - setLoading(dryRun ? 'gcDryRunBtn' : 'gcRunBtn', true); - setLoading(dryRun ? 'gcRunBtn' : 'gcDryRunBtn', true, true); + _gcLastDryRun = dryRun; + document.getElementById('gcResult').classList.add('d-none'); + _gcSetScanning(true); fetch('{{ url_for("ui.system_gc_run") }}', { method: 'POST', @@ -387,42 +477,22 @@ }) .then(function (r) { return r.json(); }) .then(function (data) { - var container = document.getElementById('gcResult'); - var alert = document.getElementById('gcResultAlert'); - var title = document.getElementById('gcResultTitle'); - var body = document.getElementById('gcResultBody'); - container.classList.remove('d-none'); - if (data.error) { + _gcSetScanning(false); + var container = document.getElementById('gcResult'); + var alert = document.getElementById('gcResultAlert'); + var title = document.getElementById('gcResultTitle'); + var body = document.getElementById('gcResultBody'); + container.classList.remove('d-none'); alert.className = 'alert alert-danger mb-0 small'; title.textContent = 'Error'; body.textContent = data.error; return; } - - var totalItems = (data.temp_files_deleted || 0) + (data.multipart_uploads_deleted || 0) + - (data.lock_files_deleted || 0) + (data.orphaned_metadata_deleted || 0) + - (data.orphaned_versions_deleted || 0) + (data.empty_dirs_removed || 0); - var totalFreed = (data.temp_bytes_freed || 0) + (data.multipart_bytes_freed || 0) + - (data.orphaned_version_bytes_freed || 0); - - alert.className = totalItems > 0 ? 'alert alert-success mb-0 small' : 'alert alert-info mb-0 small'; - title.textContent = (dryRun ? '[Dry Run] ' : '') + 'Completed in ' + (data.execution_time_seconds || 0).toFixed(2) + 's'; - - var lines = []; - if (data.temp_files_deleted) lines.push('Temp files: ' + data.temp_files_deleted + ' (' + formatBytes(data.temp_bytes_freed) + ')'); - if (data.multipart_uploads_deleted) lines.push('Multipart uploads: ' + data.multipart_uploads_deleted + ' (' + formatBytes(data.multipart_bytes_freed) + ')'); - if (data.lock_files_deleted) lines.push('Lock files: ' + data.lock_files_deleted); - if (data.orphaned_metadata_deleted) lines.push('Orphaned metadata: ' + data.orphaned_metadata_deleted); - if (data.orphaned_versions_deleted) lines.push('Orphaned versions: ' + data.orphaned_versions_deleted + ' (' + formatBytes(data.orphaned_version_bytes_freed) + ')'); - if (data.empty_dirs_removed) lines.push('Empty directories: ' + data.empty_dirs_removed); - if (totalItems === 0) lines.push('Nothing to clean up.'); - if (totalFreed > 0) lines.push('Total freed: ' + formatBytes(totalFreed)); - if (data.errors && data.errors.length > 0) lines.push('Errors: ' + data.errors.join(', ')); - - body.innerHTML = lines.join('
'); + _gcPollTimer = setTimeout(_gcPoll, 2000); }) .catch(function (err) { + _gcSetScanning(false); var container = document.getElementById('gcResult'); var alert = document.getElementById('gcResultAlert'); var title = document.getElementById('gcResultTitle'); @@ -431,13 +501,14 @@ alert.className = 'alert alert-danger mb-0 small'; title.textContent = 'Error'; body.textContent = err.message; - }) - .finally(function () { - setLoading('gcRunBtn', false); - setLoading('gcDryRunBtn', false); }); }; + {% if gc_status.scanning %} + _gcSetScanning(true); + _gcPollTimer = setTimeout(_gcPoll, 2000); + {% endif %} + var _integrityPollTimer = null; var _integrityLastMode = {dryRun: false, autoHeal: false}; diff --git a/tests/test_gc.py b/tests/test_gc.py index 174b1ce..58b3f81 100644 --- a/tests/test_gc.py +++ b/tests/test_gc.py @@ -317,7 +317,7 @@ class TestAdminAPI: ) assert resp.status_code == 200 data = resp.get_json() - assert "temp_files_deleted" in data + assert data["status"] == "started" def test_gc_dry_run(self, gc_app): client = gc_app.test_client() @@ -329,11 +329,17 @@ class TestAdminAPI: ) assert resp.status_code == 200 data = resp.get_json() - assert "temp_files_deleted" in data + assert data["status"] == "started" def test_gc_history(self, gc_app): + import time client = gc_app.test_client() client.post("/admin/gc/run", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}) + for _ in range(50): + time.sleep(0.1) + status = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}).get_json() + if not status.get("scanning"): + break resp = client.get("/admin/gc/history", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}) assert resp.status_code == 200 data = resp.get_json()