diff --git a/app/compression.py b/app/compression.py index 28e6899..bf32504 100644 --- a/app/compression.py +++ b/app/compression.py @@ -36,11 +36,11 @@ class GzipMiddleware: content_type = None content_length = None should_compress = False - is_streaming = False + passthrough = False exc_info_holder = [None] def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None): - nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, is_streaming + nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough response_started = True status_code = int(status.split(' ', 1)[0]) response_headers = list(headers) @@ -51,23 +51,29 @@ class GzipMiddleware: if name_lower == 'content-type': content_type = value.split(';')[0].strip().lower() elif name_lower == 'content-length': - content_length = int(value) + try: + content_length = int(value) + except (ValueError, TypeError): + pass elif name_lower == 'content-encoding': - should_compress = False + passthrough = True return start_response(status, headers, exc_info) elif name_lower == 'x-stream-response': - is_streaming = True + passthrough = True return start_response(status, headers, exc_info) if content_type and content_type in COMPRESSIBLE_MIMES: if content_length is None or content_length >= self.min_size: should_compress = True + else: + passthrough = True + return start_response(status, headers, exc_info) return None app_iter = self.app(environ, custom_start_response) - if is_streaming: + if passthrough: return app_iter response_body = b''.join(app_iter) diff --git a/app/s3_api.py b/app/s3_api.py index 0cdabf3..60898f6 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -2781,7 +2781,7 @@ def object_handler(bucket_name: str, object_key: str): try: stat = path.stat() file_size = stat.st_size - etag = storage._compute_etag(path) + etag = metadata.get("__etag__") or storage._compute_etag(path) except PermissionError: return _error_response("AccessDenied", "Permission denied accessing object", 403) except OSError as exc: @@ -2829,7 +2829,7 @@ def object_handler(bucket_name: str, object_key: str): try: stat = path.stat() response = Response(status=200) - etag = storage._compute_etag(path) + etag = metadata.get("__etag__") or storage._compute_etag(path) except PermissionError: return _error_response("AccessDenied", "Permission denied accessing object", 403) except OSError as exc: diff --git a/app/storage.py b/app/storage.py index e688bea..e3228ce 100644 --- a/app/storage.py +++ b/app/storage.py @@ -188,6 +188,7 @@ class ObjectStorage: self._object_cache_max_size = object_cache_max_size self._object_key_max_length_bytes = object_key_max_length_bytes self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {} + self._meta_index_locks: Dict[str, threading.Lock] = {} self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup") def _get_bucket_lock(self, bucket_id: str) -> threading.Lock: @@ -816,6 +817,10 @@ class ObjectStorage: if not object_path.exists(): raise ObjectNotFoundError("Object does not exist") + entry = self._read_index_entry(bucket_path.name, safe_key) + if entry is not None: + tags = entry.get("tags") + return tags if isinstance(tags, list) else [] for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)): if not meta_file.exists(): continue @@ -839,30 +844,31 @@ class ObjectStorage: if not object_path.exists(): raise ObjectNotFoundError("Object does not exist") - meta_file = self._metadata_file(bucket_path.name, safe_key) - - existing_payload: Dict[str, Any] = {} - if meta_file.exists(): - try: - existing_payload = json.loads(meta_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - pass - + bucket_id = bucket_path.name + existing_entry = self._read_index_entry(bucket_id, safe_key) or {} + if not existing_entry: + meta_file = self._metadata_file(bucket_id, safe_key) + if meta_file.exists(): + try: + existing_entry = json.loads(meta_file.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + pass + if tags: - existing_payload["tags"] = tags + existing_entry["tags"] = tags else: - existing_payload.pop("tags", None) - - if existing_payload.get("metadata") or existing_payload.get("tags"): - meta_file.parent.mkdir(parents=True, exist_ok=True) - meta_file.write_text(json.dumps(existing_payload), encoding="utf-8") - elif meta_file.exists(): - meta_file.unlink() - parent = meta_file.parent - meta_root = self._bucket_meta_root(bucket_path.name) - while parent != meta_root and parent.exists() and not any(parent.iterdir()): - parent.rmdir() - parent = parent.parent + existing_entry.pop("tags", None) + + if existing_entry.get("metadata") or existing_entry.get("tags"): + self._write_index_entry(bucket_id, safe_key, existing_entry) + else: + self._delete_index_entry(bucket_id, safe_key) + old_meta = self._metadata_file(bucket_id, safe_key) + try: + if old_meta.exists(): + old_meta.unlink() + except OSError: + pass def delete_object_tags(self, bucket_name: str, object_key: str) -> None: """Delete all tags from an object.""" @@ -1529,7 +1535,7 @@ class ObjectStorage: if entry.is_dir(follow_symlinks=False): if check_newer(entry.path): return True - elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'): + elif entry.is_file(follow_symlinks=False) and (entry.name.endswith('.meta.json') or entry.name == '_index.json'): if entry.stat().st_mtime > index_mtime: return True except OSError: @@ -1543,22 +1549,50 @@ class ObjectStorage: meta_str = str(meta_root) meta_len = len(meta_str) + 1 meta_files: list[tuple[str, str]] = [] - + index_files: list[str] = [] + def collect_meta_files(dir_path: str) -> None: try: with os.scandir(dir_path) as it: for entry in it: if entry.is_dir(follow_symlinks=False): collect_meta_files(entry.path) - elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'): - rel = entry.path[meta_len:] - key = rel[:-10].replace(os.sep, '/') - meta_files.append((key, entry.path)) + elif entry.is_file(follow_symlinks=False): + if entry.name == '_index.json': + index_files.append(entry.path) + elif entry.name.endswith('.meta.json'): + rel = entry.path[meta_len:] + key = rel[:-10].replace(os.sep, '/') + meta_files.append((key, entry.path)) except OSError: pass - + collect_meta_files(meta_str) - + + meta_cache = {} + + for idx_path in index_files: + try: + with open(idx_path, 'r', encoding='utf-8') as f: + idx_data = json.load(f) + rel_dir = idx_path[meta_len:] + rel_dir = rel_dir.replace(os.sep, '/') + if rel_dir.endswith('/_index.json'): + dir_prefix = rel_dir[:-len('/_index.json')] + else: + dir_prefix = '' + for entry_name, entry_data in idx_data.items(): + if dir_prefix: + key = f"{dir_prefix}/{entry_name}" + else: + key = entry_name + meta = entry_data.get("metadata", {}) + etag = meta.get("__etag__") + if etag: + meta_cache[key] = etag + except (OSError, json.JSONDecodeError): + pass + def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]: key, path = item try: @@ -1575,15 +1609,16 @@ class ObjectStorage: return key, None except (OSError, UnicodeDecodeError): return key, None - - if meta_files: - meta_cache = {} - max_workers = min((os.cpu_count() or 4) * 2, len(meta_files), 16) + + legacy_meta_files = [(k, p) for k, p in meta_files if k not in meta_cache] + if legacy_meta_files: + max_workers = min((os.cpu_count() or 4) * 2, len(legacy_meta_files), 16) with ThreadPoolExecutor(max_workers=max_workers) as executor: - for key, etag in executor.map(read_meta_file, meta_files): + for key, etag in executor.map(read_meta_file, legacy_meta_files): if etag: meta_cache[key] = etag - + + if meta_cache: try: etag_index_path.parent.mkdir(parents=True, exist_ok=True) with open(etag_index_path, 'w', encoding='utf-8') as f: @@ -1833,6 +1868,64 @@ class ObjectStorage: meta_rel = Path(key.as_posix() + ".meta.json") return meta_root / meta_rel + def _index_file_for_key(self, bucket_name: str, key: Path) -> tuple[Path, str]: + meta_root = self._bucket_meta_root(bucket_name) + parent = key.parent + entry_name = key.name + if parent == Path("."): + return meta_root / "_index.json", entry_name + return meta_root / parent / "_index.json", entry_name + + def _get_meta_index_lock(self, index_path: str) -> threading.Lock: + with self._cache_lock: + if index_path not in self._meta_index_locks: + self._meta_index_locks[index_path] = threading.Lock() + return self._meta_index_locks[index_path] + + def _read_index_entry(self, bucket_name: str, key: Path) -> Optional[Dict[str, Any]]: + index_path, entry_name = self._index_file_for_key(bucket_name, key) + if not index_path.exists(): + return None + try: + index_data = json.loads(index_path.read_text(encoding="utf-8")) + return index_data.get(entry_name) + except (OSError, json.JSONDecodeError): + return None + + def _write_index_entry(self, bucket_name: str, key: Path, entry: Dict[str, Any]) -> None: + index_path, entry_name = self._index_file_for_key(bucket_name, key) + lock = self._get_meta_index_lock(str(index_path)) + with lock: + index_path.parent.mkdir(parents=True, exist_ok=True) + index_data: Dict[str, Any] = {} + if index_path.exists(): + try: + index_data = json.loads(index_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + pass + index_data[entry_name] = entry + index_path.write_text(json.dumps(index_data), encoding="utf-8") + + def _delete_index_entry(self, bucket_name: str, key: Path) -> None: + index_path, entry_name = self._index_file_for_key(bucket_name, key) + if not index_path.exists(): + return + lock = self._get_meta_index_lock(str(index_path)) + with lock: + try: + index_data = json.loads(index_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return + if entry_name in index_data: + del index_data[entry_name] + if index_data: + index_path.write_text(json.dumps(index_data), encoding="utf-8") + else: + try: + index_path.unlink() + except OSError: + pass + def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]: if not metadata: return None @@ -1844,9 +1937,13 @@ class ObjectStorage: if not clean: self._delete_metadata(bucket_name, key) return - meta_file = self._metadata_file(bucket_name, key) - meta_file.parent.mkdir(parents=True, exist_ok=True) - meta_file.write_text(json.dumps({"metadata": clean}), encoding="utf-8") + self._write_index_entry(bucket_name, key, {"metadata": clean}) + old_meta = self._metadata_file(bucket_name, key) + try: + if old_meta.exists(): + old_meta.unlink() + except OSError: + pass def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None: bucket_path = self._bucket_path(bucket_name) @@ -1873,6 +1970,10 @@ class ObjectStorage: manifest_path.write_text(json.dumps(record), encoding="utf-8") def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]: + entry = self._read_index_entry(bucket_name, key) + if entry is not None: + data = entry.get("metadata") + return data if isinstance(data, dict) else {} for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)): if not meta_file.exists(): continue @@ -1903,6 +2004,7 @@ class ObjectStorage: raise StorageError(message) from last_error def _delete_metadata(self, bucket_name: str, key: Path) -> None: + self._delete_index_entry(bucket_name, key) locations = ( (self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)), (self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)), diff --git a/app/ui.py b/app/ui.py index c174594..1c23730 100644 --- a/app/ui.py +++ b/app/ui.py @@ -1162,7 +1162,9 @@ def object_preview(bucket_name: str, object_key: str) -> Response: "text/html", "text/xml", "application/xhtml+xml", "application/xml", "image/svg+xml", } - force_download = content_type.split(";")[0].strip().lower() in _DANGEROUS_TYPES + base_ct = content_type.split(";")[0].strip().lower() + if not download and base_ct in _DANGEROUS_TYPES: + content_type = "text/plain; charset=utf-8" def generate(): try: @@ -1181,7 +1183,7 @@ def object_preview(bucket_name: str, object_key: str) -> Response: headers["Content-Length"] = str(content_length) if content_range: headers["Content-Range"] = content_range - disposition = "attachment" if download or force_download else "inline" + disposition = "attachment" if download else "inline" if ascii_safe: headers["Content-Disposition"] = f'{disposition}; filename="{safe_filename}"' else: diff --git a/static/css/main.css b/static/css/main.css index 89a2c5f..0ab8050 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -1288,6 +1288,20 @@ html.sidebar-will-collapse .sidebar-user { padding: 2rem 1rem; } +#preview-text { + padding: 1rem 1.125rem; + max-height: 360px; + overflow: auto; + white-space: pre-wrap; + word-break: break-word; + font-family: 'SFMono-Regular', 'Menlo', 'Consolas', 'Liberation Mono', monospace; + font-size: .8rem; + line-height: 1.6; + tab-size: 4; + color: var(--myfsio-text); + background: transparent; +} + .upload-progress-stack { display: flex; flex-direction: column; diff --git a/static/js/bucket-detail-main.js b/static/js/bucket-detail-main.js index 783462a..041298a 100644 --- a/static/js/bucket-detail-main.js +++ b/static/js/bucket-detail-main.js @@ -101,6 +101,7 @@ const previewImage = document.getElementById('preview-image'); const previewVideo = document.getElementById('preview-video'); const previewAudio = document.getElementById('preview-audio'); + const previewText = document.getElementById('preview-text'); const previewIframe = document.getElementById('preview-iframe'); const downloadButton = document.getElementById('downloadButton'); const presignButton = document.getElementById('presignButton'); @@ -1895,6 +1896,10 @@ el.setAttribute('src', 'about:blank'); } }); + if (previewText) { + previewText.classList.add('d-none'); + previewText.textContent = ''; + } previewPlaceholder.classList.remove('d-none'); }; @@ -1958,11 +1963,28 @@ previewIframe.style.minHeight = '500px'; previewIframe.classList.remove('d-none'); previewPlaceholder.classList.add('d-none'); - } else if (previewUrl && lower.match(/\.(txt|log|json|md|csv|xml|html|htm|js|ts|py|java|c|cpp|h|css|scss|yaml|yml|toml|ini|cfg|conf|sh|bat)$/)) { - previewIframe.src = previewUrl; - previewIframe.style.minHeight = '200px'; - previewIframe.classList.remove('d-none'); + } else if (previewUrl && previewText && lower.match(/\.(txt|log|json|md|csv|xml|html|htm|js|ts|py|java|c|cpp|h|css|scss|yaml|yml|toml|ini|cfg|conf|sh|bat|rs|go|rb|php|sql|r|swift|kt|scala|pl|lua|zig|ex|exs|hs|erl|ps1|psm1|psd1|fish|zsh|env|properties|gradle|makefile|dockerfile|vagrantfile|gitignore|gitattributes|editorconfig|eslintrc|prettierrc)$/)) { + previewText.textContent = 'Loading\u2026'; + previewText.classList.remove('d-none'); previewPlaceholder.classList.add('d-none'); + const currentRow = row; + fetch(previewUrl) + .then((r) => { + if (!r.ok) throw new Error(r.statusText); + const len = parseInt(r.headers.get('Content-Length') || '0', 10); + if (len > 512 * 1024) { + return r.text().then((t) => t.slice(0, 512 * 1024) + '\n\n--- Truncated (file too large for preview) ---'); + } + return r.text(); + }) + .then((text) => { + if (activeRow !== currentRow) return; + previewText.textContent = text; + }) + .catch(() => { + if (activeRow !== currentRow) return; + previewText.textContent = 'Failed to load preview'; + }); } const metadataUrl = row.dataset.metadataUrl; diff --git a/templates/bucket_detail.html b/templates/bucket_detail.html index 2190a11..182e5e7 100644 --- a/templates/bucket_detail.html +++ b/templates/bucket_detail.html @@ -321,7 +321,8 @@ Object preview - +

+