perf: shallow listing, os.scandir stats, server-side search for large buckets
This commit is contained in:
@@ -193,6 +193,9 @@ class EncryptedObjectStorage:
|
||||
def list_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def search_objects(self, bucket_name: str, query: str, **kwargs):
|
||||
return self.storage.search_objects(bucket_name, query, **kwargs)
|
||||
|
||||
def list_objects_all(self, bucket_name: str):
|
||||
return self.storage.list_objects_all(bucket_name)
|
||||
|
||||
|
||||
@@ -692,6 +692,83 @@ class ObjectStorage:
|
||||
next_continuation_token=next_token,
|
||||
)
|
||||
|
||||
def search_objects(
|
||||
self,
|
||||
bucket_name: str,
|
||||
query: str,
|
||||
*,
|
||||
prefix: str = "",
|
||||
limit: int = 500,
|
||||
) -> Dict[str, Any]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.is_dir():
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
|
||||
if prefix:
|
||||
search_root = bucket_path / prefix.replace("/", os.sep)
|
||||
if not search_root.is_dir():
|
||||
return {"results": [], "truncated": False}
|
||||
resolved = search_root.resolve()
|
||||
if not str(resolved).startswith(str(bucket_path.resolve())):
|
||||
return {"results": [], "truncated": False}
|
||||
else:
|
||||
search_root = bucket_path
|
||||
|
||||
query_lower = query.lower()
|
||||
results: list[Dict[str, Any]] = []
|
||||
internal = self.INTERNAL_FOLDERS
|
||||
bucket_str = str(bucket_path)
|
||||
bucket_len = len(bucket_str) + 1
|
||||
meta_root = self._bucket_meta_root(bucket_name)
|
||||
scan_limit = limit * 4
|
||||
|
||||
matched = 0
|
||||
scanned = 0
|
||||
search_str = str(search_root)
|
||||
stack = [search_str]
|
||||
while stack:
|
||||
current = stack.pop()
|
||||
try:
|
||||
with os.scandir(current) as it:
|
||||
for entry in it:
|
||||
if current == bucket_str and entry.name in internal:
|
||||
continue
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
stack.append(entry.path)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
scanned += 1
|
||||
key = entry.path[bucket_len:].replace(os.sep, "/")
|
||||
if query_lower in key.lower():
|
||||
st = entry.stat(follow_symlinks=False)
|
||||
meta_path = meta_root / (key + ".meta.json")
|
||||
last_modified = ""
|
||||
try:
|
||||
if meta_path.exists():
|
||||
md = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
last_modified = md.get("last_modified", "")
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
if not last_modified:
|
||||
last_modified = datetime.fromtimestamp(
|
||||
st.st_mtime, tz=timezone.utc
|
||||
).strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
results.append({
|
||||
"key": key,
|
||||
"size": st.st_size,
|
||||
"last_modified": last_modified,
|
||||
})
|
||||
matched += 1
|
||||
if matched >= scan_limit:
|
||||
break
|
||||
except PermissionError:
|
||||
continue
|
||||
if matched >= scan_limit:
|
||||
break
|
||||
|
||||
results.sort(key=lambda r: r["key"])
|
||||
truncated = len(results) > limit
|
||||
return {"results": results[:limit], "truncated": truncated}
|
||||
|
||||
def put_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
|
||||
27
app/ui.py
27
app/ui.py
@@ -641,6 +641,33 @@ def stream_bucket_objects(bucket_name: str):
|
||||
)
|
||||
|
||||
|
||||
@ui_bp.get("/buckets/<bucket_name>/objects/search")
|
||||
@limiter.limit("30 per minute")
|
||||
def search_bucket_objects(bucket_name: str):
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_authorize_ui(principal, bucket_name, "list")
|
||||
except IamError as exc:
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
|
||||
query = request.args.get("q", "").strip()
|
||||
if not query:
|
||||
return jsonify({"results": [], "truncated": False})
|
||||
|
||||
try:
|
||||
limit = max(1, min(int(request.args.get("limit", 500)), 1000))
|
||||
except (ValueError, TypeError):
|
||||
limit = 500
|
||||
|
||||
prefix = request.args.get("prefix", "").strip()
|
||||
|
||||
storage = _storage()
|
||||
try:
|
||||
return jsonify(storage.search_objects(bucket_name, query, prefix=prefix, limit=limit))
|
||||
except StorageError as exc:
|
||||
return jsonify({"error": str(exc)}), 404
|
||||
|
||||
|
||||
@ui_bp.post("/buckets/<bucket_name>/upload")
|
||||
@limiter.limit("30 per minute")
|
||||
def upload_object(bucket_name: str):
|
||||
|
||||
@@ -374,19 +374,19 @@
|
||||
|
||||
const items = [];
|
||||
|
||||
if (useDelimiterMode && streamFolders.length > 0) {
|
||||
if (searchResults !== null) {
|
||||
searchResults.forEach(obj => {
|
||||
items.push({ type: 'file', data: obj, displayKey: obj.key });
|
||||
});
|
||||
} else if (useDelimiterMode && streamFolders.length > 0) {
|
||||
streamFolders.forEach(folderPath => {
|
||||
const folderName = folderPath.slice(currentPrefix.length).replace(/\/$/, '');
|
||||
if (!currentFilterTerm || folderName.toLowerCase().includes(currentFilterTerm)) {
|
||||
items.push({ type: 'folder', path: folderPath, displayKey: folderName });
|
||||
}
|
||||
});
|
||||
allObjects.forEach(obj => {
|
||||
const remainder = obj.key.slice(currentPrefix.length);
|
||||
if (!remainder) return;
|
||||
if (!currentFilterTerm || remainder.toLowerCase().includes(currentFilterTerm)) {
|
||||
items.push({ type: 'file', data: obj, displayKey: remainder });
|
||||
}
|
||||
});
|
||||
} else {
|
||||
const folders = new Set();
|
||||
@@ -402,9 +402,7 @@
|
||||
const slashIndex = remainder.indexOf('/');
|
||||
|
||||
if (slashIndex === -1 && !isFolderMarker) {
|
||||
if (!currentFilterTerm || remainder.toLowerCase().includes(currentFilterTerm)) {
|
||||
items.push({ type: 'file', data: obj, displayKey: remainder });
|
||||
}
|
||||
} else {
|
||||
const effectiveSlashIndex = isFolderMarker && slashIndex === remainder.length - 1
|
||||
? slashIndex
|
||||
@@ -413,11 +411,9 @@
|
||||
const folderPath = currentPrefix + folderName + '/';
|
||||
if (!folders.has(folderPath)) {
|
||||
folders.add(folderPath);
|
||||
if (!currentFilterTerm || folderName.toLowerCase().includes(currentFilterTerm)) {
|
||||
items.push({ type: 'folder', path: folderPath, displayKey: folderName });
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2094,8 +2090,63 @@
|
||||
}
|
||||
};
|
||||
|
||||
let searchDebounceTimer = null;
|
||||
let searchAbortController = null;
|
||||
let searchResults = null;
|
||||
|
||||
const performServerSearch = async (term) => {
|
||||
if (searchAbortController) searchAbortController.abort();
|
||||
searchAbortController = new AbortController();
|
||||
|
||||
try {
|
||||
const params = new URLSearchParams({ q: term, limit: '500' });
|
||||
if (currentPrefix) params.set('prefix', currentPrefix);
|
||||
const searchUrl = objectsStreamUrl.replace('/stream', '/search');
|
||||
const response = await fetch(`${searchUrl}?${params}`, {
|
||||
signal: searchAbortController.signal
|
||||
});
|
||||
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||
const data = await response.json();
|
||||
searchResults = (data.results || []).map(obj => processStreamObject(obj));
|
||||
memoizedVisibleItems = null;
|
||||
memoizedInputs = { objectCount: -1, folderCount: -1, prefix: null, filterTerm: null };
|
||||
refreshVirtualList();
|
||||
if (loadMoreStatus) {
|
||||
const countText = searchResults.length.toLocaleString();
|
||||
const truncated = data.truncated ? '+' : '';
|
||||
loadMoreStatus.textContent = `${countText}${truncated} result${searchResults.length !== 1 ? 's' : ''}`;
|
||||
}
|
||||
} catch (e) {
|
||||
if (e.name === 'AbortError') return;
|
||||
if (loadMoreStatus) {
|
||||
loadMoreStatus.textContent = 'Search failed';
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
document.getElementById('object-search')?.addEventListener('input', (event) => {
|
||||
currentFilterTerm = event.target.value.toLowerCase();
|
||||
const newTerm = event.target.value.toLowerCase();
|
||||
const wasFiltering = currentFilterTerm.length > 0;
|
||||
const isFiltering = newTerm.length > 0;
|
||||
currentFilterTerm = newTerm;
|
||||
|
||||
clearTimeout(searchDebounceTimer);
|
||||
|
||||
if (isFiltering) {
|
||||
searchDebounceTimer = setTimeout(() => performServerSearch(newTerm), 300);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isFiltering && wasFiltering) {
|
||||
if (searchAbortController) searchAbortController.abort();
|
||||
searchResults = null;
|
||||
memoizedVisibleItems = null;
|
||||
memoizedInputs = { objectCount: -1, folderCount: -1, prefix: null, filterTerm: null };
|
||||
if (loadMoreStatus) {
|
||||
loadMoreStatus.textContent = buildBottomStatusText(streamingComplete);
|
||||
}
|
||||
}
|
||||
|
||||
updateFilterWarning();
|
||||
refreshVirtualList();
|
||||
});
|
||||
@@ -2136,7 +2187,18 @@
|
||||
var searchInput = document.getElementById('object-search');
|
||||
if (searchInput && document.activeElement === searchInput) {
|
||||
searchInput.value = '';
|
||||
const wasFiltering = currentFilterTerm.length > 0;
|
||||
currentFilterTerm = '';
|
||||
if (wasFiltering) {
|
||||
clearTimeout(searchDebounceTimer);
|
||||
if (searchAbortController) searchAbortController.abort();
|
||||
searchResults = null;
|
||||
memoizedVisibleItems = null;
|
||||
memoizedInputs = { objectCount: -1, folderCount: -1, prefix: null, filterTerm: null };
|
||||
if (loadMoreStatus) {
|
||||
loadMoreStatus.textContent = buildBottomStatusText(streamingComplete);
|
||||
}
|
||||
}
|
||||
refreshVirtualList();
|
||||
searchInput.blur();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user