From a779b002d7423508b43fd0627bfc9aaa90ba2025 Mon Sep 17 00:00:00 2001 From: kqjy Date: Mon, 2 Feb 2026 13:30:06 +0800 Subject: [PATCH] Optimize CPU usage via caching and reducing ThreadPoolExecutor workers to prevent CPU saturation --- app/bucket_policies.py | 10 ++++++++-- app/iam.py | 23 +++++++++++++++++++++-- app/s3_api.py | 35 ++++++++++++++++++++++++++++++++++- app/storage.py | 3 ++- app/version.py | 2 +- 5 files changed, 66 insertions(+), 7 deletions(-) diff --git a/app/bucket_policies.py b/app/bucket_policies.py index 95a7ee2..fcb1e41 100644 --- a/app/bucket_policies.py +++ b/app/bucket_policies.py @@ -6,6 +6,7 @@ import re import time from dataclasses import dataclass, field from fnmatch import fnmatch, translate +from functools import lru_cache from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple @@ -13,9 +14,14 @@ from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple RESOURCE_PREFIX = "arn:aws:s3:::" +@lru_cache(maxsize=256) +def _compile_pattern(pattern: str) -> Pattern[str]: + return re.compile(translate(pattern), re.IGNORECASE) + + def _match_string_like(value: str, pattern: str) -> bool: - regex = translate(pattern) - return bool(re.match(regex, value, re.IGNORECASE)) + compiled = _compile_pattern(pattern) + return bool(compiled.match(value)) def _ip_in_cidr(ip_str: str, cidr: str) -> bool: diff --git a/app/iam.py b/app/iam.py index caf6b07..6ab4f38 100644 --- a/app/iam.py +++ b/app/iam.py @@ -4,6 +4,7 @@ import hashlib import hmac import json import math +import os import secrets import threading import time @@ -121,7 +122,8 @@ class IamService: self._failed_attempts: Dict[str, Deque[datetime]] = {} self._last_load_time = 0.0 self._principal_cache: Dict[str, Tuple[Principal, float]] = {} - self._cache_ttl = 10.0 + self._secret_key_cache: Dict[str, Tuple[str, float]] = {} + self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0")) self._last_stat_check = 0.0 self._stat_check_interval = 1.0 self._sessions: Dict[str, Dict[str, Any]] = {} @@ -139,6 +141,7 @@ class IamService: if self.config_path.stat().st_mtime > self._last_load_time: self._load() self._principal_cache.clear() + self._secret_key_cache.clear() except OSError: pass @@ -367,6 +370,9 @@ class IamService: user["secret_key"] = new_secret self._save() self._principal_cache.pop(access_key, None) + self._secret_key_cache.pop(access_key, None) + from .s3_api import clear_signing_key_cache + clear_signing_key_cache() self._load() return new_secret @@ -385,6 +391,10 @@ class IamService: raise IamError("User not found") self._raw_config["users"] = remaining self._save() + self._principal_cache.pop(access_key, None) + self._secret_key_cache.pop(access_key, None) + from .s3_api import clear_signing_key_cache + clear_signing_key_cache() self._load() def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: @@ -546,10 +556,19 @@ class IamService: raise IamError("User not found") def get_secret_key(self, access_key: str) -> str | None: + now = time.time() + cached = self._secret_key_cache.get(access_key) + if cached: + secret_key, cached_time = cached + if now - cached_time < self._cache_ttl: + return secret_key + self._maybe_reload() record = self._users.get(access_key) if record: - return record["secret_key"] + secret_key = record["secret_key"] + self._secret_key_cache[access_key] = (secret_key, now) + return secret_key return None def get_principal(self, access_key: str) -> Principal | None: diff --git a/app/s3_api.py b/app/s3_api.py index 822997b..f5f599e 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -6,9 +6,12 @@ import hmac import logging import mimetypes import re +import threading +import time import uuid +from collections import OrderedDict from datetime import datetime, timedelta, timezone -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple from urllib.parse import quote, urlencode, urlparse, unquote from xml.etree.ElementTree import Element, SubElement, tostring, ParseError from defusedxml.ElementTree import fromstring @@ -181,11 +184,41 @@ def _sign(key: bytes, msg: str) -> bytes: return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() +_SIGNING_KEY_CACHE: OrderedDict[Tuple[str, str, str, str], Tuple[bytes, float]] = OrderedDict() +_SIGNING_KEY_CACHE_LOCK = threading.Lock() +_SIGNING_KEY_CACHE_TTL = 60.0 +_SIGNING_KEY_CACHE_MAX_SIZE = 256 + + +def clear_signing_key_cache() -> None: + with _SIGNING_KEY_CACHE_LOCK: + _SIGNING_KEY_CACHE.clear() + + def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes: + cache_key = (key, date_stamp, region_name, service_name) + now = time.time() + + with _SIGNING_KEY_CACHE_LOCK: + cached = _SIGNING_KEY_CACHE.get(cache_key) + if cached: + signing_key, cached_time = cached + if now - cached_time < _SIGNING_KEY_CACHE_TTL: + _SIGNING_KEY_CACHE.move_to_end(cache_key) + return signing_key + else: + del _SIGNING_KEY_CACHE[cache_key] + k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp) k_region = _sign(k_date, region_name) k_service = _sign(k_region, service_name) k_signing = _sign(k_service, "aws4_request") + + with _SIGNING_KEY_CACHE_LOCK: + if len(_SIGNING_KEY_CACHE) >= _SIGNING_KEY_CACHE_MAX_SIZE: + _SIGNING_KEY_CACHE.popitem(last=False) + _SIGNING_KEY_CACHE[cache_key] = (k_signing, now) + return k_signing diff --git a/app/storage.py b/app/storage.py index 2a034d0..502a93c 100644 --- a/app/storage.py +++ b/app/storage.py @@ -1469,7 +1469,8 @@ class ObjectStorage: if meta_files: meta_cache = {} - with ThreadPoolExecutor(max_workers=min(64, len(meta_files))) as executor: + max_workers = min((os.cpu_count() or 4) * 2, len(meta_files), 16) + with ThreadPoolExecutor(max_workers=max_workers) as executor: for key, etag in executor.map(read_meta_file, meta_files): if etag: meta_cache[key] = etag diff --git a/app/version.py b/app/version.py index 54f0689..be22c33 100644 --- a/app/version.py +++ b/app/version.py @@ -1,6 +1,6 @@ from __future__ import annotations -APP_VERSION = "0.2.4" +APP_VERSION = "0.2.5" def get_version() -> str: