MyFSIO/app/bucket_policies.py

from __future__ import annotations

import ipaddress
import json
import re
import time
from dataclasses import dataclass, field
from fnmatch import fnmatch, translate
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple


RESOURCE_PREFIX = "arn:aws:s3:::"


def _match_string_like(value: str, pattern: str) -> bool:
    regex = translate(pattern)
    return bool(re.match(regex, value, re.IGNORECASE))


def _ip_in_cidr(ip_str: str, cidr: str) -> bool:
    try:
        ip = ipaddress.ip_address(ip_str)
        network = ipaddress.ip_network(cidr, strict=False)
        return ip in network
    except ValueError:
        return False


def _evaluate_condition_operator(
    operator: str,
    condition_key: str,
    condition_values: List[str],
    context: Dict[str, Any],
) -> bool:
    context_value = context.get(condition_key)
    op_lower = operator.lower()
    if_exists = op_lower.endswith("ifexists")
    if if_exists:
        op_lower = op_lower[:-8]

    if context_value is None:
        return if_exists

    context_value_str = str(context_value)
    context_value_lower = context_value_str.lower()

    if op_lower == "stringequals":
        return context_value_str in condition_values
    elif op_lower == "stringnotequals":
        return context_value_str not in condition_values
    elif op_lower == "stringequalsignorecase":
        return context_value_lower in [v.lower() for v in condition_values]
    elif op_lower == "stringnotequalsignorecase":
        return context_value_lower not in [v.lower() for v in condition_values]
    elif op_lower == "stringlike":
        return any(_match_string_like(context_value_str, p) for p in condition_values)
    elif op_lower == "stringnotlike":
        return not any(_match_string_like(context_value_str, p) for p in condition_values)
    elif op_lower == "ipaddress":
        return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
    elif op_lower == "notipaddress":
        return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
    elif op_lower == "bool":
        bool_val = context_value_lower in ("true", "1", "yes")
        return str(bool_val).lower() in [v.lower() for v in condition_values]
    elif op_lower == "null":
        is_null = context_value is None or context_value == ""
        expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
        return is_null == expected_null

    return True

ACTION_ALIASES = {
    "s3:listbucket": "list",
    "s3:listallmybuckets": "list",
    "s3:listbucketversions": "list",
    "s3:listmultipartuploads": "list",
    "s3:listparts": "list",
    "s3:getobject": "read",
    "s3:getobjectversion": "read",
    "s3:getobjecttagging": "read",
    "s3:getobjectversiontagging": "read",
    "s3:getobjectacl": "read",
    "s3:getbucketversioning": "read",
    "s3:headobject": "read",
    "s3:headbucket": "read",
    "s3:putobject": "write",
    "s3:createbucket": "write",
    "s3:putobjecttagging": "write",
    "s3:putbucketversioning": "write",
    "s3:createmultipartupload": "write",
    "s3:uploadpart": "write",
    "s3:completemultipartupload": "write",
    "s3:abortmultipartupload": "write",
    "s3:copyobject": "write",
    "s3:deleteobject": "delete",
    "s3:deleteobjectversion": "delete",
    "s3:deletebucket": "delete",
    "s3:deleteobjecttagging": "delete",
    "s3:putobjectacl": "share",
    "s3:putbucketacl": "share",
    "s3:getbucketacl": "share",
    "s3:putbucketpolicy": "policy",
    "s3:getbucketpolicy": "policy",
    "s3:deletebucketpolicy": "policy",
    "s3:getreplicationconfiguration": "replication",
    "s3:putreplicationconfiguration": "replication",
    "s3:deletereplicationconfiguration": "replication",
    "s3:replicateobject": "replication",
    "s3:replicatetags": "replication",
    "s3:replicatedelete": "replication",
    "s3:getlifecycleconfiguration": "lifecycle",
    "s3:putlifecycleconfiguration": "lifecycle",
    "s3:deletelifecycleconfiguration": "lifecycle",
    "s3:getbucketlifecycle": "lifecycle",
    "s3:putbucketlifecycle": "lifecycle",
    "s3:getbucketcors": "cors",
    "s3:putbucketcors": "cors",
    "s3:deletebucketcors": "cors",
}


def _normalize_action(action: str) -> str:
    action = action.strip().lower()
    if action == "*":
        return "*"
    return ACTION_ALIASES.get(action, action)


def _normalize_actions(actions: Iterable[str]) -> List[str]:
    values: List[str] = []
    for action in actions:
        canonical = _normalize_action(action)
        if canonical == "*" and "*" not in values:
            return ["*"]
        if canonical and canonical not in values:
            values.append(canonical)
    return values


def _normalize_principals(principal_field: Any) -> List[str] | str:
    if principal_field == "*":
        return "*"

    def _collect(values: Any) -> List[str]:
        if values is None:
            return []
        if values == "*":
            return ["*"]
        if isinstance(values, str):
            return [values]
        if isinstance(values, dict):
            aggregated: List[str] = []
            for nested in values.values():
                chunk = _collect(nested)
                if "*" in chunk:
                    return ["*"]
                aggregated.extend(chunk)
            return aggregated
        if isinstance(values, Iterable):
            aggregated = []
            for nested in values:
                chunk = _collect(nested)
                if "*" in chunk:
                    return ["*"]
                aggregated.extend(chunk)
            return aggregated
        return [str(values)]

    normalized: List[str] = []
    for entry in _collect(principal_field):
        token = str(entry).strip()
        if token == "*":
            return "*"
        if token and token not in normalized:
            normalized.append(token)
    return normalized or "*"


def _parse_resource(resource: str) -> tuple[str | None, str | None]:
    if not resource.startswith(RESOURCE_PREFIX):
        return None, None
    remainder = resource[len(RESOURCE_PREFIX) :]
    if "/" not in remainder:
        bucket = remainder or "*"
        return bucket, None
    bucket, _, key_pattern = remainder.partition("/")
    return bucket or "*", key_pattern or "*"


@dataclass
class BucketPolicyStatement:
    sid: Optional[str]
    effect: str
    principals: List[str] | str
    actions: List[str]
    resources: List[Tuple[str | None, str | None]]
    conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
    _compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None

    def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
        if self._compiled_patterns is None:
            self._compiled_patterns = []
            for resource_bucket, key_pattern in self.resources:
                if key_pattern is None:
                    self._compiled_patterns.append((resource_bucket, None))
                else:
                    regex_pattern = translate(key_pattern)
                    self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
        return self._compiled_patterns

    def matches_principal(self, access_key: Optional[str]) -> bool:
        if self.principals == "*":
            return True
        if access_key is None:
            return False
        return access_key in self.principals

    def matches_action(self, action: str) -> bool:
        action = _normalize_action(action)
        return "*" in self.actions or action in self.actions

    def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
        bucket = (bucket or "*").lower()
        key = object_key or ""
        for resource_bucket, compiled_pattern in self._get_compiled_patterns():
            resource_bucket = (resource_bucket or "*").lower()
            if resource_bucket not in {"*", bucket}:
                continue
            if compiled_pattern is None:
                if not key:
                    return True
                continue
            if compiled_pattern.match(key):
                return True
        return False

    def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool:
        if not self.conditions:
            return True
        if context is None:
            context = {}
        for operator, key_values in self.conditions.items():
            for condition_key, condition_values in key_values.items():
                if not _evaluate_condition_operator(operator, condition_key, condition_values, context):
                    return False
        return True


class BucketPolicyStore:
    """Loads bucket policies from disk and evaluates statements."""

    def __init__(self, policy_path: Path) -> None:
        self.policy_path = Path(policy_path)
        self.policy_path.parent.mkdir(parents=True, exist_ok=True)
        if not self.policy_path.exists():
            self.policy_path.write_text(json.dumps({"policies": {}}, indent=2))
        self._raw: Dict[str, Any] = {}
        self._policies: Dict[str, List[BucketPolicyStatement]] = {}
        self._load()
        self._last_mtime = self._current_mtime()
        # Performance: Avoid stat() on every request
        self._last_stat_check = 0.0
        self._stat_check_interval = 1.0  # Only check mtime every 1 second

    def maybe_reload(self) -> None:
        # Performance: Skip stat check if we checked recently
        now = time.time()
        if now - self._last_stat_check < self._stat_check_interval:
            return
        self._last_stat_check = now
        current = self._current_mtime()
        if current is None or current == self._last_mtime:
            return
        self._load()
        self._last_mtime = current

    def _current_mtime(self) -> float | None:
        try:
            return self.policy_path.stat().st_mtime
        except FileNotFoundError:
            return None

    def evaluate(
        self,
        access_key: Optional[str],
        bucket: Optional[str],
        object_key: Optional[str],
        action: str,
        context: Optional[Dict[str, Any]] = None,
    ) -> str | None:
        bucket = (bucket or "").lower()
        statements = self._policies.get(bucket) or []
        decision: Optional[str] = None
        for statement in statements:
            if not statement.matches_principal(access_key):
                continue
            if not statement.matches_action(action):
                continue
            if not statement.matches_resource(bucket, object_key):
                continue
            if not statement.matches_condition(context):
                continue
            if statement.effect == "deny":
                return "deny"
            decision = "allow"
        return decision

    def get_policy(self, bucket: str) -> Dict[str, Any] | None:
        return self._raw.get(bucket.lower())

    def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None:
        bucket = bucket.lower()
        statements = self._normalize_policy(policy_payload)
        if not statements:
            raise ValueError("Policy must include at least one valid statement")
        self._raw[bucket] = policy_payload
        self._policies[bucket] = statements
        self._persist()

    def delete_policy(self, bucket: str) -> None:
        bucket = bucket.lower()
        self._raw.pop(bucket, None)
        self._policies.pop(bucket, None)
        self._persist()

    def _load(self) -> None:
        try:
            content = self.policy_path.read_text(encoding='utf-8')
            raw_payload = json.loads(content)
        except FileNotFoundError:
            raw_payload = {"policies": {}}
        except json.JSONDecodeError as e:
            raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}")
        except PermissionError as e:
            raise ValueError(f"Cannot read bucket policy file (permission denied): {e}")
        except (OSError, ValueError) as e:
            raise ValueError(f"Failed to load bucket policies: {e}")

        policies: Dict[str, Any] = raw_payload.get("policies", {})
        parsed: Dict[str, List[BucketPolicyStatement]] = {}
        for bucket, policy in policies.items():
            parsed[bucket.lower()] = self._normalize_policy(policy)
        self._raw = {bucket.lower(): policy for bucket, policy in policies.items()}
        self._policies = parsed

    def _persist(self) -> None:
        payload = {"policies": self._raw}
        self.policy_path.write_text(json.dumps(payload, indent=2))

    def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]:
        statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", [])
        statements: List[BucketPolicyStatement] = []
        for statement in statements_raw:
            actions = _normalize_actions(statement.get("Action", []))
            principals = _normalize_principals(statement.get("Principal", "*"))
            resources_field = statement.get("Resource", [])
            if isinstance(resources_field, str):
                resources_field = [resources_field]
            resources: List[tuple[str | None, str | None]] = []
            for resource in resources_field:
                bucket, pattern = _parse_resource(str(resource))
                if bucket:
                    resources.append((bucket, pattern))
            if not resources:
                continue
            effect = statement.get("Effect", "Allow").lower()
            conditions = self._normalize_conditions(statement.get("Condition", {}))
            statements.append(
                BucketPolicyStatement(
                    sid=statement.get("Sid"),
                    effect=effect,
                    principals=principals,
                    actions=actions or ["*"],
                    resources=resources,
                    conditions=conditions,
                )
            )
        return statements

    def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]:
        if not condition_block or not isinstance(condition_block, dict):
            return {}
        normalized: Dict[str, Dict[str, List[str]]] = {}
        for operator, key_values in condition_block.items():
            if not isinstance(key_values, dict):
                continue
            normalized[operator] = {}
            for cond_key, cond_values in key_values.items():
                if isinstance(cond_values, str):
                    normalized[operator][cond_key] = [cond_values]
                elif isinstance(cond_values, list):
                    normalized[operator][cond_key] = [str(v) for v in cond_values]
                else:
                    normalized[operator][cond_key] = [str(cond_values)]
        return normalized