diff --git a/.dockerignore b/.dockerignore index ba575da..713410f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,13 +1,9 @@ .git .gitignore -.venv -__pycache__ -*.pyc -*.pyo -*.pyd -.pytest_cache -.coverage -htmlcov logs data tmp +target +crates/*/tests +Dockerfile +.dockerignore diff --git a/.gitignore b/.gitignore index 210bd29..fa994a2 100644 --- a/.gitignore +++ b/.gitignore @@ -26,9 +26,8 @@ dist/ *.egg-info/ .eggs/ -# Rust / maturin build artifacts -myfsio_core/target/ -myfsio_core/Cargo.lock +# Rust engine build artifacts +target/ # Local runtime artifacts logs/ diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..d52b982 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,5253 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common 0.1.7", + "generic-array", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures 0.2.17", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num-traits", +] + +[[package]] +name = "arrow-array" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" +dependencies = [ + "ahash 0.8.12", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-buffer" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + +[[package]] +name = "arrow-cast" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-ord", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num-traits", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-ord" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" +dependencies = [ + "bitflags", +] + +[[package]] +name = "arrow-select" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" +dependencies = [ + "ahash 0.8.12", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num-traits", +] + +[[package]] +name = "arrow-string" +version = "58.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num-traits", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-compression" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-config" +version = "1.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.0", + "sha1 0.10.6", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.129.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d4e8410fadbc0ee453145dd77a4958227b18b05bf67c2795d0a8b8596c9aa0f" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "http-body 1.0.1", + "lru 0.16.4", + "percent-encoding", + "regex-lite", + "sha2 0.10.9", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.99.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc35b7a14cabdad13795fbbbd26d5ddec0882c01492ceedf2af575aad5f37dd" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint 0.5.5", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "ring", + "sha2 0.10.9", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.64.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10efbbcec1e044b81600e2fc562a391951d291152d95b482d5b7e7132299d762" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "md-5 0.11.0", + "pin-project-lite", + "sha1 0.11.0", + "sha2 0.11.0", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf09d74e5e32f76b8762da505a3cd59303e367a664ca67295387baa8c1d7548" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.13", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.9.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0504b1ab12debb5959e5165ee5fe97dd387e7aa7ea6a477bfd7635dfe769a4f5" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71a13df6ada0aafbf21a73bdfcdf9324cfa9df77d96b8446045be3cde61b42e" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api-macros", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.9.0", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + +[[package]] +name = "borsh" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" +dependencies = [ + "borsh-derive", + "bytes", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfcfdc083699101d5a7965e49925975f2f55060f94f9a05e7187be95d530ca59" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + +[[package]] +name = "cc" +version = "1.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common 0.1.7", + "inout", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "comfy-table" +version = "7.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "compression-codecs" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "time", + "version_check", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc-fast" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d" +dependencies = [ + "crc", + "digest 0.10.7", + "rustversion", + "spin 0.10.0", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "typenum", +] + +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid 0.9.6", + "zeroize", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common 0.1.7", + "subtle", +] + +[[package]] +name = "digest" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +dependencies = [ + "block-buffer 0.12.0", + "const-oid 0.10.2", + "crypto-common 0.2.1", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "duckdb" +version = "1.10501.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f13bc6d6487032fc2825a62ef8b4924b2378a2eb3166e132e5f3141ae9dd633f" +dependencies = [ + "arrow", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "num-integer", + "rust_decimal", + "strum", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der", + "elliptic-curve", + "rfc6979", + "signature", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", + "zlib-rs", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags", + "ignore", + "walkdir", +] + +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "http-range-header" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c" + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + +[[package]] +name = "hybrid-array" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214" +dependencies = [ + "typenum", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.4.0", + "hyper 1.9.0", + "hyper-util", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.9.0", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.3", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core 0.62.2", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "libc" +version = "0.2.184" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" + +[[package]] +name = "libduckdb-sys" +version = "1.10501.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12096c1694924782b3fe21e790630b77bacb4fcb7ad9d7ee0fec626f985bf248" +dependencies = [ + "cc", + "flate2", + "pkg-config", + "reqwest", + "serde", + "serde_json", + "tar", + "vcpkg", + "zip", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" +dependencies = [ + "bitflags", + "libc", + "plain", + "redox_syscall 0.7.3", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest 0.10.7", +] + +[[package]] +name = "md-5" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" +dependencies = [ + "cfg-if", + "digest 0.11.2", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "multer" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http 1.4.0", + "httparse", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + +[[package]] +name = "myfsio-auth" +version = "0.5.0" +dependencies = [ + "aes", + "base64", + "cbc", + "chrono", + "hex", + "hmac", + "lru 0.14.0", + "myfsio-common", + "parking_lot", + "pbkdf2", + "percent-encoding", + "rand 0.8.5", + "serde", + "serde_json", + "sha2 0.10.9", + "tempfile", + "thiserror", + "tracing", + "uuid", +] + +[[package]] +name = "myfsio-common" +version = "0.5.0" +dependencies = [ + "chrono", + "serde", + "serde_json", + "thiserror", + "uuid", +] + +[[package]] +name = "myfsio-crypto" +version = "0.5.0" +dependencies = [ + "aes-gcm", + "base64", + "chrono", + "hex", + "hkdf", + "md-5 0.10.6", + "myfsio-common", + "rand 0.8.5", + "serde", + "serde_json", + "sha2 0.10.9", + "tempfile", + "thiserror", + "tokio", + "uuid", +] + +[[package]] +name = "myfsio-server" +version = "0.5.0" +dependencies = [ + "aes-gcm", + "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-s3", + "aws-smithy-types", + "axum", + "base64", + "bytes", + "chrono", + "clap", + "cookie", + "crc32fast", + "dotenvy", + "duckdb", + "futures", + "hex", + "http-body 1.0.1", + "http-body-util", + "hyper 1.9.0", + "md-5 0.10.6", + "mime_guess", + "multer", + "myfsio-auth", + "myfsio-common", + "myfsio-crypto", + "myfsio-storage", + "myfsio-xml", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand 0.8.5", + "regex", + "reqwest", + "roxmltree", + "serde", + "serde_json", + "serde_urlencoded", + "sha2 0.10.9", + "subtle", + "sysinfo", + "tempfile", + "tera", + "tokio", + "tokio-stream", + "tokio-util", + "tower", + "tower-http", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "myfsio-storage" +version = "0.5.0" +dependencies = [ + "chrono", + "dashmap", + "hex", + "md-5 0.10.6", + "myfsio-common", + "myfsio-crypto", + "parking_lot", + "regex", + "serde", + "serde_json", + "sha2 0.10.9", + "tempfile", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "unicode-normalization", + "uuid", +] + +[[package]] +name = "myfsio-xml" +version = "0.5.0" +dependencies = [ + "chrono", + "myfsio-common", + "percent-encoding", + "quick-xml", + "serde", +] + +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2 0.10.9", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.18", + "smallvec", + "windows-link", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest 0.10.7", + "hmac", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2 0.10.9", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.37", + "socket2 0.6.3", + "thiserror", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls 0.23.37", + "rustls-pki-types", + "slab", + "thiserror", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.6.3", + "tracing", + "windows-sys 0.60.2", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_syscall" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.9.0", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.37", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots", +] + +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rkyv" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "roxmltree" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" + +[[package]] +name = "rust_decimal" +version = "1.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ce901f9a19d251159075a4c37af514c3b8ef99c22e02dd8c19161cf397ee94a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.5", + "rkyv", + "serde", + "serde_json", + "wasm-bindgen", +] + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "aws-lc-rs", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki 0.103.10", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.2", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.2", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest 0.10.7", + "rand_core 0.6.4", +] + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "slug" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" + +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "sysinfo" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c33cd241af0f2e9e3b5c32163b873b29956890b5342e6745b917ce9d490f4af" +dependencies = [ + "core-foundation-sys", + "libc", + "memchr", + "ntapi", + "rayon", + "windows", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tar" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "tera" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8004bca281f2d32df3bacd59bc67b312cb4c70cea46cbd79dbe8ac5ed206722" +dependencies = [ + "chrono", + "chrono-tz", + "globwalk", + "humansize", + "lazy_static", + "percent-encoding", + "pest", + "pest_derive", + "rand 0.8.5", + "regex", + "serde", + "serde_json", + "slug", + "unicode-segmentation", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2 0.6.3", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls 0.23.37", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.10+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a82418ca169e235e6c399a84e395ab6debeb3bc90edc959bf0f48647c6a32d1b" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "async-compression", + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "http-range-header", + "httpdate", + "iri-string", + "mime", + "mime_guess", + "percent-encoding", + "pin-project-lite", + "tokio", + "tokio-util", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common 0.1.7", + "subtle", +] + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "serde", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03623de6905b7206edd0a75f69f747f134b7f0a2323392d664448bf2d3c5d87e" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" +dependencies = [ + "windows-core 0.57.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" +dependencies = [ + "windows-implement 0.57.0", + "windows-interface 0.57.0", + "windows-result 0.1.2", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement 0.60.2", + "windows-interface 0.59.3", + "windows-link", + "windows-result 0.4.1", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a9c20a2 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,62 @@ +[workspace] +resolver = "2" +members = [ + "crates/myfsio-common", + "crates/myfsio-auth", + "crates/myfsio-crypto", + "crates/myfsio-storage", + "crates/myfsio-xml", + "crates/myfsio-server", +] + +[workspace.package] +version = "0.5.0" +edition = "2021" + +[workspace.dependencies] +tokio = { version = "1", features = ["full"] } +axum = { version = "0.8" } +tower = { version = "0.5" } +tower-http = { version = "0.6", features = ["cors", "trace", "fs", "compression-gzip", "timeout", "set-header"] } +hyper = { version = "1" } +bytes = "1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +quick-xml = { version = "0.37", features = ["serialize"] } +hmac = "0.12" +sha2 = "0.10" +md-5 = "0.10" +hex = "0.4" +aes = "0.8" +aes-gcm = "0.10" +cbc = { version = "0.1", features = ["alloc"] } +hkdf = "0.12" +uuid = { version = "1", features = ["v4"] } +parking_lot = "0.12" +lru = "0.14" +percent-encoding = "2" +regex = "1" +unicode-normalization = "0.1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +thiserror = "2" +chrono = { version = "0.4", features = ["serde"] } +base64 = "0.22" +tokio-util = { version = "0.7", features = ["io", "io-util"] } +tokio-stream = "0.1" +futures = "0.3" +dashmap = "6" +crc32fast = "1" +duckdb = { version = "1", features = ["bundled"] } +reqwest = { version = "0.12", default-features = false, features = ["stream", "rustls-tls", "json"] } +aws-sdk-s3 = { version = "1", features = ["behavior-version-latest", "rt-tokio"] } +aws-config = { version = "1", features = ["behavior-version-latest"] } +aws-credential-types = "1" +aws-smithy-runtime-api = "1" +aws-smithy-types = "1" +async-trait = "0.1" +tera = "1" +cookie = "0.18" +subtle = "2" +clap = { version = "4", features = ["derive"] } +dotenvy = "0.15" diff --git a/Dockerfile b/Dockerfile index a50ec8b..b1b96a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,45 +1,50 @@ -FROM python:3.14.3-slim +FROM rust:1-slim-bookworm AS builder -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 +WORKDIR /build + +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential pkg-config libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +COPY Cargo.toml Cargo.lock ./ +COPY crates ./crates + +RUN cargo build --release --bin myfsio-server \ + && strip target/release/myfsio-server + + +FROM debian:bookworm-slim WORKDIR /app RUN apt-get update \ - && apt-get install -y --no-install-recommends build-essential curl \ - && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \ - && rm -rf /var/lib/apt/lists/* + && apt-get install -y --no-install-recommends ca-certificates curl \ + && rm -rf /var/lib/apt/lists/* \ + && mkdir -p /app/data \ + && useradd -m -u 1000 myfsio \ + && chown -R myfsio:myfsio /app -ENV PATH="/root/.cargo/bin:${PATH}" +COPY --from=builder /build/target/release/myfsio-server /usr/local/bin/myfsio-server +COPY --from=builder /build/crates/myfsio-server/templates /app/templates +COPY --from=builder /build/crates/myfsio-server/static /app/static +COPY docker-entrypoint.sh /app/docker-entrypoint.sh -COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt - -COPY . . - -RUN pip install --no-cache-dir maturin \ - && cd myfsio_core \ - && maturin build --release \ - && pip install target/wheels/*.whl \ - && cd .. \ - && rm -rf myfsio_core/target \ - && pip uninstall -y maturin \ - && rustup self uninstall -y - -RUN chmod +x docker-entrypoint.sh - -RUN mkdir -p /app/data \ - && useradd -m -u 1000 myfsio \ +RUN chmod +x /app/docker-entrypoint.sh \ && chown -R myfsio:myfsio /app USER myfsio -EXPOSE 5000 5100 -ENV APP_HOST=0.0.0.0 \ - FLASK_ENV=production \ - FLASK_DEBUG=0 +EXPOSE 5000 +EXPOSE 5100 +ENV HOST=0.0.0.0 \ + PORT=5000 \ + UI_PORT=5100 \ + STORAGE_ROOT=/app/data \ + TEMPLATES_DIR=/app/templates \ + STATIC_DIR=/app/static \ + RUST_LOG=info HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD python -c "import requests; requests.get('http://localhost:5000/myfsio/health', timeout=2)" + CMD curl -fsS "http://localhost:${PORT}/myfsio/health" || exit 1 -CMD ["./docker-entrypoint.sh"] +CMD ["/app/docker-entrypoint.sh"] diff --git a/README.md b/README.md index 772178d..0eba2df 100644 --- a/README.md +++ b/README.md @@ -1,255 +1,205 @@ # MyFSIO -A lightweight, S3-compatible object storage system built with Flask. MyFSIO implements core AWS S3 REST API operations with filesystem-backed storage, making it ideal for local development, testing, and self-hosted storage scenarios. +MyFSIO is an S3-compatible object storage server with a Rust runtime and a filesystem-backed storage engine. The repository root is the Cargo workspace; the server serves both the S3 API and the built-in web UI from a single process. ## Features -**Core Storage** -- S3-compatible REST API with AWS Signature Version 4 authentication -- Bucket and object CRUD operations -- Object versioning with version history -- Multipart uploads for large files -- Presigned URLs (1 second to 7 days validity) +- S3-compatible REST API with Signature Version 4 authentication +- Browser UI for buckets, objects, IAM users, policies, replication, metrics, and site administration +- Filesystem-backed storage rooted at `data/` +- Bucket versioning, multipart uploads, presigned URLs, CORS, object and bucket tagging +- Server-side encryption and built-in KMS support +- Optional background services for lifecycle, garbage collection, integrity scanning, operation metrics, and system metrics history +- Replication, site sync, and static website hosting support -**Security & Access Control** -- IAM users with access key management and rotation -- Bucket policies (AWS Policy Version 2012-10-17) -- Server-side encryption (SSE-S3 and SSE-KMS) -- Built-in Key Management Service (KMS) -- Rate limiting per endpoint +## Runtime Model -**Advanced Features** -- Cross-bucket replication to remote S3-compatible endpoints -- Hot-reload for bucket policies (no restart required) -- CORS configuration per bucket +MyFSIO now runs as one Rust process: -**Management UI** -- Web console for bucket and object management -- IAM dashboard for user administration -- Inline JSON policy editor with presets -- Object browser with folder navigation and bulk operations -- Dark mode support +- API listener on `HOST` + `PORT` (default `127.0.0.1:5000`) +- UI listener on `HOST` + `UI_PORT` (default `127.0.0.1:5100`) +- Shared state for storage, IAM, policies, sessions, metrics, and background workers -## Architecture - -``` -+------------------+ +------------------+ -| API Server | | UI Server | -| (port 5000) | | (port 5100) | -| | | | -| - S3 REST API |<------->| - Web Console | -| - SigV4 Auth | | - IAM Dashboard | -| - Presign URLs | | - Bucket Editor | -+--------+---------+ +------------------+ - | - v -+------------------+ +------------------+ -| Object Storage | | System Metadata | -| (filesystem) | | (.myfsio.sys/) | -| | | | -| data// | | - IAM config | -| | | - Bucket policies| -| | | - Encryption keys| -+------------------+ +------------------+ -``` +If you want API-only mode, set `UI_ENABLED=false`. There is no separate "UI-only" runtime anymore. ## Quick Start +From the repository root: + ```bash -# Clone and setup -git clone https://gitea.jzwsite.com/kqjy/MyFSIO -cd s3 -python -m venv .venv - -# Activate virtual environment -# Windows PowerShell: -.\.venv\Scripts\Activate.ps1 -# Windows CMD: -.venv\Scripts\activate.bat -# Linux/macOS: -source .venv/bin/activate - -# Install dependencies -pip install -r requirements.txt - -# (Optional) Build Rust native extension for better performance -# Requires Rust toolchain: https://rustup.rs -pip install maturin -cd myfsio_core && maturin develop --release && cd .. - -# Start both servers -python run.py - -# Or start individually -python run.py --mode api # API only (port 5000) -python run.py --mode ui # UI only (port 5100) +cargo run -p myfsio-server -- ``` -**Credentials:** Generated automatically on first run and printed to the console. If missed, check the IAM config file at `/.myfsio.sys/config/iam.json`. +Useful URLs: -- **Web Console:** http://127.0.0.1:5100/ui -- **API Endpoint:** http://127.0.0.1:5000 +- UI: `http://127.0.0.1:5100/ui` +- API: `http://127.0.0.1:5000/` +- Health: `http://127.0.0.1:5000/myfsio/health` + +On first boot, MyFSIO creates `data/.myfsio.sys/config/iam.json` and prints the generated admin access key and secret key to the console. + +### Common CLI commands + +```bash +# Show resolved configuration +cargo run -p myfsio-server -- --show-config + +# Validate configuration and exit non-zero on critical issues +cargo run -p myfsio-server -- --check-config + +# Reset admin credentials +cargo run -p myfsio-server -- --reset-cred + +# API only +UI_ENABLED=false cargo run -p myfsio-server -- +``` + +## Building a Binary + +```bash +cargo build --release -p myfsio-server +``` + +Binary locations: + +- Linux/macOS: `target/release/myfsio-server` +- Windows: `target/release/myfsio-server.exe` + +Run the built binary directly: + +```bash +./target/release/myfsio-server +``` ## Configuration +The server reads environment variables from the process environment and also loads, when present: + +- `/opt/myfsio/myfsio.env` +- `.env` +- `myfsio.env` + +Core settings: + | Variable | Default | Description | -|----------|---------|-------------| -| `STORAGE_ROOT` | `./data` | Filesystem root for bucket storage | -| `IAM_CONFIG` | `.myfsio.sys/config/iam.json` | IAM user and policy store | -| `BUCKET_POLICY_PATH` | `.myfsio.sys/config/bucket_policies.json` | Bucket policy store | -| `API_BASE_URL` | `http://127.0.0.1:5000` | API endpoint for UI calls | -| `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size in bytes (1 GB) | -| `MULTIPART_MIN_PART_SIZE` | `5242880` | Minimum multipart part size (5 MB) | -| `UI_PAGE_SIZE` | `100` | Default page size for listings | -| `SECRET_KEY` | `dev-secret-key` | Flask session secret | -| `AWS_REGION` | `us-east-1` | Region for SigV4 signing | -| `AWS_SERVICE` | `s3` | Service name for SigV4 signing | -| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption | -| `KMS_ENABLED` | `false` | Enable Key Management Service | -| `LOG_LEVEL` | `INFO` | Logging verbosity | -| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests | -| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) | -| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout | -| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync | -| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object | +| --- | --- | --- | +| `HOST` | `127.0.0.1` | Bind address for API and UI listeners | +| `PORT` | `5000` | API port | +| `UI_PORT` | `5100` | UI port | +| `UI_ENABLED` | `true` | Disable to run API-only | +| `STORAGE_ROOT` | `./data` | Root directory for buckets and system metadata | +| `IAM_CONFIG` | `/.myfsio.sys/config/iam.json` | IAM config path | +| `API_BASE_URL` | unset | Public API base used by the UI and presigned URL generation | +| `AWS_REGION` | `us-east-1` | Region used in SigV4 scope | +| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Allowed request time skew | +| `PRESIGNED_URL_MIN_EXPIRY_SECONDS` | `1` | Minimum presigned URL expiry | +| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Maximum presigned URL expiry | +| `SECRET_KEY` | loaded from `.myfsio.sys/config/.secret` if present | Session signing key and IAM-at-rest encryption key | +| `ADMIN_ACCESS_KEY` | unset | Optional first-run or reset access key | +| `ADMIN_SECRET_KEY` | unset | Optional first-run or reset secret key | + +Feature toggles: + +| Variable | Default | +| --- | --- | +| `ENCRYPTION_ENABLED` | `false` | +| `KMS_ENABLED` | `false` | +| `GC_ENABLED` | `false` | +| `INTEGRITY_ENABLED` | `false` | +| `LIFECYCLE_ENABLED` | `false` | +| `METRICS_HISTORY_ENABLED` | `false` | +| `OPERATION_METRICS_ENABLED` | `false` | +| `WEBSITE_HOSTING_ENABLED` | `false` | +| `SITE_SYNC_ENABLED` | `false` | + +Metrics and replication tuning: + +| Variable | Default | +| --- | --- | +| `OPERATION_METRICS_INTERVAL_MINUTES` | `5` | +| `OPERATION_METRICS_RETENTION_HOURS` | `24` | +| `METRICS_HISTORY_INTERVAL_MINUTES` | `5` | +| `METRICS_HISTORY_RETENTION_HOURS` | `24` | +| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | +| `REPLICATION_READ_TIMEOUT_SECONDS` | `30` | +| `REPLICATION_MAX_RETRIES` | `2` | +| `REPLICATION_STREAMING_THRESHOLD_BYTES` | `10485760` | +| `REPLICATION_MAX_FAILURES_PER_BUCKET` | `50` | +| `SITE_SYNC_INTERVAL_SECONDS` | `60` | +| `SITE_SYNC_BATCH_SIZE` | `100` | +| `SITE_SYNC_CONNECT_TIMEOUT_SECONDS` | `10` | +| `SITE_SYNC_READ_TIMEOUT_SECONDS` | `120` | +| `SITE_SYNC_MAX_RETRIES` | `2` | +| `SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS` | `1.0` | + +UI asset overrides: + +| Variable | Default | +| --- | --- | +| `TEMPLATES_DIR` | built-in crate templates directory | +| `STATIC_DIR` | built-in crate static directory | + +See [docs.md](./docs.md) for the full Rust-side operations guide. ## Data Layout -``` +```text data/ -├── / # User buckets with objects -└── .myfsio.sys/ # System metadata - ├── config/ - │ ├── iam.json # IAM users and policies - │ ├── bucket_policies.json # Bucket policies - │ ├── replication_rules.json - │ └── connections.json # Remote S3 connections - ├── buckets// - │ ├── meta/ # Object metadata (.meta.json) - │ ├── versions/ # Archived object versions - │ └── .bucket.json # Bucket config (versioning, CORS) - ├── multipart/ # Active multipart uploads - └── keys/ # Encryption keys (SSE-S3/KMS) + / + .myfsio.sys/ + config/ + iam.json + bucket_policies.json + connections.json + operation_metrics.json + metrics_history.json + buckets// + meta/ + versions/ + multipart/ + keys/ ``` -## API Reference - -All endpoints require AWS Signature Version 4 authentication unless using presigned URLs or public bucket policies. - -### Bucket Operations - -| Method | Endpoint | Description | -|--------|----------|-------------| -| `GET` | `/` | List all buckets | -| `PUT` | `/` | Create bucket | -| `DELETE` | `/` | Delete bucket (must be empty) | -| `HEAD` | `/` | Check bucket exists | - -### Object Operations - -| Method | Endpoint | Description | -|--------|----------|-------------| -| `GET` | `/` | List objects (supports `list-type=2`) | -| `PUT` | `//` | Upload object | -| `GET` | `//` | Download object | -| `DELETE` | `//` | Delete object | -| `HEAD` | `//` | Get object metadata | -| `POST` | `//?uploads` | Initiate multipart upload | -| `PUT` | `//?partNumber=N&uploadId=X` | Upload part | -| `POST` | `//?uploadId=X` | Complete multipart upload | -| `DELETE` | `//?uploadId=X` | Abort multipart upload | - -### Bucket Policies (S3-compatible) - -| Method | Endpoint | Description | -|--------|----------|-------------| -| `GET` | `/?policy` | Get bucket policy | -| `PUT` | `/?policy` | Set bucket policy | -| `DELETE` | `/?policy` | Delete bucket policy | - -### Versioning - -| Method | Endpoint | Description | -|--------|----------|-------------| -| `GET` | `//?versionId=X` | Get specific version | -| `DELETE` | `//?versionId=X` | Delete specific version | -| `GET` | `/?versions` | List object versions | - -### Health Check - -| Method | Endpoint | Description | -|--------|----------|-------------| -| `GET` | `/myfsio/health` | Health check endpoint | - -## IAM & Access Control - -### Users and Access Keys - -On first run, MyFSIO creates a default admin user (`localadmin`/`localadmin`). Use the IAM dashboard to: - -- Create and delete users -- Generate and rotate access keys -- Attach inline policies to users -- Control IAM management permissions - -### Bucket Policies - -Bucket policies follow AWS policy grammar (Version `2012-10-17`) with support for: - -- Principal-based access (`*` for anonymous, specific users) -- Action-based permissions (`s3:GetObject`, `s3:PutObject`, etc.) -- Resource patterns (`arn:aws:s3:::bucket/*`) -- Condition keys - -**Policy Presets:** -- **Public:** Grants anonymous read access (`s3:GetObject`, `s3:ListBucket`) -- **Private:** Removes bucket policy (IAM-only access) -- **Custom:** Manual policy editing with draft preservation - -Policies hot-reload when the JSON file changes. - -## Server-Side Encryption - -MyFSIO supports two encryption modes: - -- **SSE-S3:** Server-managed keys with automatic key rotation -- **SSE-KMS:** Customer-managed keys via built-in KMS - -Enable encryption with: -```bash -ENCRYPTION_ENABLED=true python run.py -``` - -## Cross-Bucket Replication - -Replicate objects to remote S3-compatible endpoints: - -1. Configure remote connections in the UI -2. Create replication rules specifying source/destination -3. Objects are automatically replicated on upload - ## Docker +Build the Rust image from the repository root: + ```bash docker build -t myfsio . -docker run -p 5000:5000 -p 5100:5100 -v ./data:/app/data myfsio +docker run --rm -p 5000:5000 -p 5100:5100 -v "${PWD}/data:/app/data" myfsio ``` +If the instance sits behind a reverse proxy, set `API_BASE_URL` to the public S3 endpoint. + +## Linux Installation + +The repository includes `scripts/install.sh` for systemd-style Linux installs. Build the Rust binary first, then pass it to the installer: + +```bash +cargo build --release -p myfsio-server + +sudo ./scripts/install.sh --binary ./target/release/myfsio-server +``` + +The installer copies the binary into `/opt/myfsio/myfsio`, writes `/opt/myfsio/myfsio.env`, and can register a `myfsio.service` unit. + ## Testing +Run the Rust test suite from the workspace: + ```bash -# Run all tests -pytest tests/ -v - -# Run specific test file -pytest tests/test_api.py -v - -# Run with coverage -pytest tests/ --cov=app --cov-report=html +cargo test ``` -## References +## Health Check -- [Amazon S3 Documentation](https://docs.aws.amazon.com/s3/) -- [AWS Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) -- [S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html) +`GET /myfsio/health` returns: + +```json +{ + "status": "ok", + "version": "0.5.0" +} +``` + +The `version` field comes from the Rust crate version in `crates/myfsio-server/Cargo.toml`. diff --git a/app/__init__.py b/app/__init__.py deleted file mode 100644 index 25f0028..0000000 --- a/app/__init__.py +++ /dev/null @@ -1,763 +0,0 @@ -from __future__ import annotations - -import html as html_module -import itertools -import logging -import mimetypes -import os -import shutil -import sys -import time -from logging.handlers import RotatingFileHandler -from pathlib import Path -from datetime import timedelta -from typing import Any, Dict, List, Optional - -from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for -from flask_cors import CORS -from flask_wtf.csrf import CSRFError -from werkzeug.middleware.proxy_fix import ProxyFix - -import io - -from .access_logging import AccessLoggingService -from .operation_metrics import OperationMetricsCollector, classify_endpoint -from .compression import GzipMiddleware -from .acl import AclService -from .bucket_policies import BucketPolicyStore -from .config import AppConfig -from .connections import ConnectionStore -from .encryption import EncryptionManager -from .extensions import limiter, csrf -from .iam import IamService -from .kms import KMSManager -from .gc import GarbageCollector -from .integrity import IntegrityChecker -from .lifecycle import LifecycleManager -from .notifications import NotificationService -from .object_lock import ObjectLockService -from .replication import ReplicationManager -from .secret_store import EphemeralSecretStore -from .site_registry import SiteRegistry, SiteInfo -from .storage import ObjectStorage, StorageError -from .version import get_version -from .website_domains import WebsiteDomainStore - -_request_counter = itertools.count(1) - - -class _ChunkedTransferMiddleware: - - def __init__(self, app): - self.app = app - - def __call__(self, environ, start_response): - if environ.get("REQUEST_METHOD") not in ("PUT", "POST"): - return self.app(environ, start_response) - - transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "") - content_length = environ.get("CONTENT_LENGTH") - - if "chunked" in transfer_encoding.lower(): - if content_length: - del environ["HTTP_TRANSFER_ENCODING"] - else: - raw = environ.get("wsgi.input") - if raw: - try: - if hasattr(raw, "seek"): - raw.seek(0) - body = raw.read() - except Exception: - body = b"" - if body: - environ["wsgi.input"] = io.BytesIO(body) - environ["CONTENT_LENGTH"] = str(len(body)) - del environ["HTTP_TRANSFER_ENCODING"] - - content_length = environ.get("CONTENT_LENGTH") - if not content_length or content_length == "0": - sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "") - decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "") - content_encoding = environ.get("HTTP_CONTENT_ENCODING", "") - if ("STREAMING" in sha256.upper() or decoded_len - or "aws-chunked" in content_encoding.lower()): - raw = environ.get("wsgi.input") - if raw: - try: - if hasattr(raw, "seek"): - raw.seek(0) - body = raw.read() - except Exception: - body = b"" - if body: - environ["wsgi.input"] = io.BytesIO(body) - environ["CONTENT_LENGTH"] = str(len(body)) - - raw = environ.get("wsgi.input") - if raw and hasattr(raw, "seek"): - try: - raw.seek(0) - except Exception: - pass - - return self.app(environ, start_response) - - -def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path: - """Migrate config file from legacy locations to the active path. - - Checks each legacy path in order and moves the first one found to the active path. - This ensures backward compatibility for users upgrading from older versions. - """ - active_path.parent.mkdir(parents=True, exist_ok=True) - - if active_path.exists(): - return active_path - - for legacy_path in legacy_paths: - if legacy_path.exists(): - try: - shutil.move(str(legacy_path), str(active_path)) - except OSError: - shutil.copy2(legacy_path, active_path) - try: - legacy_path.unlink(missing_ok=True) - except OSError: - pass - break - - return active_path - - -def create_app( - test_config: Optional[Dict[str, Any]] = None, - *, - include_api: bool = True, - include_ui: bool = True, -) -> Flask: - """Create and configure the Flask application.""" - config = AppConfig.from_env(test_config) - - if getattr(sys, "frozen", False): - project_root = Path(sys._MEIPASS) - else: - project_root = Path(__file__).resolve().parent.parent - - app = Flask( - __name__, - static_folder=str(project_root / "static"), - template_folder=str(project_root / "templates"), - ) - app.config.update(config.to_flask_config()) - if test_config: - app.config.update(test_config) - app.config.setdefault("APP_VERSION", get_version()) - app.permanent_session_lifetime = timedelta(days=int(app.config.get("SESSION_LIFETIME_DAYS", 30))) - if app.config.get("TESTING"): - app.config.setdefault("WTF_CSRF_ENABLED", False) - - # Trust X-Forwarded-* headers from proxies - num_proxies = app.config.get("NUM_TRUSTED_PROXIES", 1) - if num_proxies: - if "NUM_TRUSTED_PROXIES" not in os.environ: - logging.getLogger(__name__).warning( - "NUM_TRUSTED_PROXIES not set, defaulting to 1. " - "Set NUM_TRUSTED_PROXIES=0 if not behind a reverse proxy." - ) - app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies) - - if app.config.get("ENABLE_GZIP", True): - app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6) - - app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app) - - _configure_cors(app) - _configure_logging(app) - - limiter.init_app(app) - csrf.init_app(app) - - storage = ObjectStorage( - Path(app.config["STORAGE_ROOT"]), - cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60), - object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100), - bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0), - object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024), - meta_read_cache_max=app.config.get("META_READ_CACHE_MAX", 2048), - ) - - if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"): - storage.warm_cache_async() - - iam = IamService( - Path(app.config["IAM_CONFIG"]), - auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5), - auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15), - encryption_key=app.config.get("SECRET_KEY"), - ) - bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) - secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) - - storage_root = Path(app.config["STORAGE_ROOT"]) - config_dir = storage_root / ".myfsio.sys" / "config" - config_dir.mkdir(parents=True, exist_ok=True) - - connections_path = _migrate_config_file( - active_path=config_dir / "connections.json", - legacy_paths=[ - storage_root / ".myfsio.sys" / "connections.json", - storage_root / ".connections.json", - ], - ) - replication_rules_path = _migrate_config_file( - active_path=config_dir / "replication_rules.json", - legacy_paths=[ - storage_root / ".myfsio.sys" / "replication_rules.json", - storage_root / ".replication_rules.json", - ], - ) - - connections = ConnectionStore(connections_path) - replication = ReplicationManager( - storage, - connections, - replication_rules_path, - storage_root, - connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5), - read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30), - max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2), - streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024), - max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50), - ) - - site_registry_path = config_dir / "site_registry.json" - site_registry = SiteRegistry(site_registry_path) - if app.config.get("SITE_ID") and not site_registry.get_local_site(): - site_registry.set_local_site(SiteInfo( - site_id=app.config["SITE_ID"], - endpoint=app.config.get("SITE_ENDPOINT") or "", - region=app.config.get("SITE_REGION", "us-east-1"), - priority=app.config.get("SITE_PRIORITY", 100), - )) - - encryption_config = { - "encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False), - "encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"), - "default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"), - "encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024), - } - encryption_manager = EncryptionManager(encryption_config) - - kms_manager = None - if app.config.get("KMS_ENABLED", False): - kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", "")) - kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", "")) - kms_manager = KMSManager( - kms_keys_path, - kms_master_key_path, - generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1), - generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024), - ) - encryption_manager.set_kms_provider(kms_manager) - - if app.config.get("ENCRYPTION_ENABLED", False): - from .encrypted_storage import EncryptedObjectStorage - storage = EncryptedObjectStorage(storage, encryption_manager) - - acl_service = AclService(storage_root) - object_lock_service = ObjectLockService(storage_root) - notification_service = NotificationService( - storage_root, - allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False), - ) - access_logging_service = AccessLoggingService(storage_root) - access_logging_service.set_storage(storage) - - lifecycle_manager = None - if app.config.get("LIFECYCLE_ENABLED", False): - base_storage = storage.storage if hasattr(storage, 'storage') else storage - lifecycle_manager = LifecycleManager( - base_storage, - interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600), - storage_root=storage_root, - max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50), - ) - lifecycle_manager.start() - - gc_collector = None - if app.config.get("GC_ENABLED", False): - gc_collector = GarbageCollector( - storage_root=storage_root, - interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0), - temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0), - multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7), - lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0), - dry_run=app.config.get("GC_DRY_RUN", False), - io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10), - ) - gc_collector.start() - - integrity_checker = None - if app.config.get("INTEGRITY_ENABLED", False): - integrity_checker = IntegrityChecker( - storage_root=storage_root, - interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0), - batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000), - auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False), - dry_run=app.config.get("INTEGRITY_DRY_RUN", False), - io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10), - ) - integrity_checker.start() - - app.extensions["object_storage"] = storage - app.extensions["iam"] = iam - app.extensions["bucket_policies"] = bucket_policies - app.extensions["secret_store"] = secret_store - app.extensions["limiter"] = limiter - app.extensions["connections"] = connections - app.extensions["replication"] = replication - app.extensions["encryption"] = encryption_manager - app.extensions["kms"] = kms_manager - app.extensions["acl"] = acl_service - app.extensions["lifecycle"] = lifecycle_manager - app.extensions["gc"] = gc_collector - app.extensions["integrity"] = integrity_checker - app.extensions["object_lock"] = object_lock_service - app.extensions["notifications"] = notification_service - app.extensions["access_logging"] = access_logging_service - app.extensions["site_registry"] = site_registry - - website_domains_store = None - if app.config.get("WEBSITE_HOSTING_ENABLED", False): - website_domains_path = config_dir / "website_domains.json" - website_domains_store = WebsiteDomainStore(website_domains_path) - app.extensions["website_domains"] = website_domains_store - - from .s3_client import S3ProxyClient - api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000" - app.extensions["s3_proxy"] = S3ProxyClient( - api_base_url=api_base, - region=app.config.get("AWS_REGION", "us-east-1"), - ) - - operation_metrics_collector = None - if app.config.get("OPERATION_METRICS_ENABLED", False): - operation_metrics_collector = OperationMetricsCollector( - storage_root, - interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5), - retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24), - ) - app.extensions["operation_metrics"] = operation_metrics_collector - - system_metrics_collector = None - if app.config.get("METRICS_HISTORY_ENABLED", False): - from .system_metrics import SystemMetricsCollector - system_metrics_collector = SystemMetricsCollector( - storage_root, - interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5), - retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24), - ) - system_metrics_collector.set_storage(storage) - app.extensions["system_metrics"] = system_metrics_collector - - site_sync_worker = None - if app.config.get("SITE_SYNC_ENABLED", False): - from .site_sync import SiteSyncWorker - site_sync_worker = SiteSyncWorker( - storage=storage, - connections=connections, - replication_manager=replication, - storage_root=storage_root, - interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60), - batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100), - connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10), - read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120), - max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2), - clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0), - ) - site_sync_worker.start() - app.extensions["site_sync"] = site_sync_worker - - @app.errorhandler(500) - def internal_error(error): - wants_html = request.accept_mimetypes.accept_html - path = request.path or "" - if include_ui and wants_html and (path.startswith("/ui") or path == "/"): - return render_template('500.html'), 500 - error_xml = ( - '' - '' - 'InternalError' - 'An internal server error occurred' - f'{path}' - f'{getattr(g, "request_id", "-")}' - '' - ) - return error_xml, 500, {'Content-Type': 'application/xml'} - - @app.errorhandler(CSRFError) - def handle_csrf_error(e): - wants_html = request.accept_mimetypes.accept_html - path = request.path or "" - if include_ui and wants_html and (path.startswith("/ui") or path == "/"): - return render_template('csrf_error.html', reason=e.description), 400 - error_xml = ( - '' - '' - 'CSRFError' - f'{e.description}' - f'{path}' - f'{getattr(g, "request_id", "-")}' - '' - ) - return error_xml, 400, {'Content-Type': 'application/xml'} - - @app.template_filter("filesizeformat") - def filesizeformat(value: int) -> str: - """Format bytes as human-readable file size.""" - for unit in ["B", "KB", "MB", "GB", "TB", "PB"]: - if abs(value) < 1024.0 or unit == "PB": - if unit == "B": - return f"{int(value)} {unit}" - return f"{value:.1f} {unit}" - value /= 1024.0 - return f"{value:.1f} PB" - - @app.template_filter("timestamp_to_datetime") - def timestamp_to_datetime(value: float) -> str: - """Format Unix timestamp as human-readable datetime in configured timezone.""" - from datetime import datetime, timezone as dt_timezone - from zoneinfo import ZoneInfo - if not value: - return "Never" - try: - dt_utc = datetime.fromtimestamp(value, dt_timezone.utc) - display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC") - if display_tz and display_tz != "UTC": - try: - tz = ZoneInfo(display_tz) - dt_local = dt_utc.astimezone(tz) - return dt_local.strftime("%Y-%m-%d %H:%M:%S") - except (KeyError, ValueError): - pass - return dt_utc.strftime("%Y-%m-%d %H:%M:%S UTC") - except (ValueError, OSError): - return "Unknown" - - @app.template_filter("format_datetime") - def format_datetime_filter(dt, include_tz: bool = True) -> str: - """Format datetime object as human-readable string in configured timezone.""" - from datetime import datetime, timezone as dt_timezone - from zoneinfo import ZoneInfo - if not dt: - return "" - try: - display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC") - if display_tz and display_tz != "UTC": - try: - tz = ZoneInfo(display_tz) - if dt.tzinfo is None: - dt = dt.replace(tzinfo=dt_timezone.utc) - dt = dt.astimezone(tz) - except (KeyError, ValueError): - pass - tz_abbr = dt.strftime("%Z") or "UTC" - if include_tz: - return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})" - return dt.strftime("%b %d, %Y %H:%M") - except (ValueError, AttributeError): - return str(dt) - - if include_api: - from .s3_api import s3_api_bp - from .kms_api import kms_api_bp - from .admin_api import admin_api_bp - - app.register_blueprint(s3_api_bp) - app.register_blueprint(kms_api_bp) - app.register_blueprint(admin_api_bp) - csrf.exempt(s3_api_bp) - csrf.exempt(kms_api_bp) - csrf.exempt(admin_api_bp) - - if include_ui: - from .ui import ui_bp - - app.register_blueprint(ui_bp) - if not include_api: - @app.get("/") - def ui_root_redirect(): - return redirect(url_for("ui.buckets_overview")) - - @app.errorhandler(404) - def handle_not_found(error): - wants_html = request.accept_mimetypes.accept_html - path = request.path or "" - if include_ui and wants_html: - if not include_api or path.startswith("/ui") or path == "/": - return render_template("404.html"), 404 - return error - - @app.get("/myfsio/health") - def healthcheck() -> Dict[str, str]: - return {"status": "ok"} - - return app - - -def create_api_app(test_config: Optional[Dict[str, Any]] = None) -> Flask: - return create_app(test_config, include_api=True, include_ui=False) - - -def create_ui_app(test_config: Optional[Dict[str, Any]] = None) -> Flask: - return create_app(test_config, include_api=False, include_ui=True) - - -def _configure_cors(app: Flask) -> None: - origins = app.config.get("CORS_ORIGINS", ["*"]) - methods = app.config.get("CORS_METHODS", ["GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD"]) - allow_headers = app.config.get("CORS_ALLOW_HEADERS", ["*"]) - expose_headers = app.config.get("CORS_EXPOSE_HEADERS", ["*"]) - CORS( - app, - resources={r"/*": {"origins": origins, "methods": methods, "allow_headers": allow_headers, "expose_headers": expose_headers}}, - supports_credentials=True, - ) - - -class _RequestContextFilter(logging.Filter): - """Inject request-specific attributes into log records.""" - - def filter(self, record: logging.LogRecord) -> bool: - if has_request_context(): - record.request_id = getattr(g, "request_id", "-") - record.path = request.path - record.method = request.method - record.remote_addr = request.remote_addr or "-" - else: - record.request_id = getattr(record, "request_id", "-") - record.path = getattr(record, "path", "-") - record.method = getattr(record, "method", "-") - record.remote_addr = getattr(record, "remote_addr", "-") - return True - - -def _configure_logging(app: Flask) -> None: - formatter = logging.Formatter( - "%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s" - ) - - stream_handler = logging.StreamHandler(sys.stdout) - stream_handler.setFormatter(formatter) - stream_handler.addFilter(_RequestContextFilter()) - - logger = app.logger - for handler in logger.handlers[:]: - handler.close() - logger.handlers.clear() - logger.addHandler(stream_handler) - - if app.config.get("LOG_TO_FILE"): - log_file = Path(app.config["LOG_FILE"]) - log_file.parent.mkdir(parents=True, exist_ok=True) - file_handler = RotatingFileHandler( - log_file, - maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)), - backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)), - encoding="utf-8", - ) - file_handler.setFormatter(formatter) - file_handler.addFilter(_RequestContextFilter()) - logger.addHandler(file_handler) - - logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO)) - - @app.before_request - def _log_request_start() -> None: - g.request_id = f"{os.getpid():x}{next(_request_counter):012x}" - g.request_started_at = time.perf_counter() - g.request_bytes_in = request.content_length or 0 - - @app.before_request - def _maybe_serve_website(): - if not app.config.get("WEBSITE_HOSTING_ENABLED"): - return None - if request.method not in {"GET", "HEAD"}: - return None - host = request.host - if ":" in host: - host = host.rsplit(":", 1)[0] - host = host.lower() - store = app.extensions.get("website_domains") - if not store: - return None - bucket = store.get_bucket(host) - if not bucket: - return None - storage = app.extensions["object_storage"] - if not storage.bucket_exists(bucket): - return _website_error_response(404, "Not Found") - website_config = storage.get_bucket_website(bucket) - if not website_config: - return _website_error_response(404, "Not Found") - index_doc = website_config.get("index_document", "index.html") - error_doc = website_config.get("error_document") - req_path = request.path.lstrip("/") - if not req_path or req_path.endswith("/"): - object_key = req_path + index_doc - else: - object_key = req_path - try: - obj_path = storage.get_object_path(bucket, object_key) - except (StorageError, OSError): - if object_key == req_path: - try: - obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc) - object_key = req_path + "/" + index_doc - except (StorageError, OSError): - return _serve_website_error(storage, bucket, error_doc, 404) - else: - return _serve_website_error(storage, bucket, error_doc, 404) - content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream" - is_encrypted = False - try: - metadata = storage.get_object_metadata(bucket, object_key) - is_encrypted = "x-amz-server-side-encryption" in metadata - except (StorageError, OSError): - pass - if is_encrypted and hasattr(storage, "get_object_data"): - try: - data, _ = storage.get_object_data(bucket, object_key) - file_size = len(data) - except (StorageError, OSError): - return _website_error_response(500, "Internal Server Error") - else: - data = None - try: - stat = obj_path.stat() - file_size = stat.st_size - except OSError: - return _website_error_response(500, "Internal Server Error") - if request.method == "HEAD": - response = Response(status=200) - response.headers["Content-Length"] = file_size - response.headers["Content-Type"] = content_type - response.headers["Accept-Ranges"] = "bytes" - return response - from .s3_api import _parse_range_header - range_header = request.headers.get("Range") - if range_header: - ranges = _parse_range_header(range_header, file_size) - if ranges is None: - return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"}) - start, end = ranges[0] - length = end - start + 1 - if data is not None: - partial_data = data[start:end + 1] - response = Response(partial_data, status=206, mimetype=content_type) - else: - def _stream_range(file_path, start_pos, length_to_read): - with file_path.open("rb") as f: - f.seek(start_pos) - remaining = length_to_read - while remaining > 0: - chunk = f.read(min(262144, remaining)) - if not chunk: - break - remaining -= len(chunk) - yield chunk - response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True) - response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}" - response.headers["Content-Length"] = length - response.headers["Accept-Ranges"] = "bytes" - return response - if data is not None: - response = Response(data, mimetype=content_type) - response.headers["Content-Length"] = file_size - response.headers["Accept-Ranges"] = "bytes" - return response - def _stream(file_path): - with file_path.open("rb") as f: - while True: - chunk = f.read(65536) - if not chunk: - break - yield chunk - response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True) - response.headers["Content-Length"] = file_size - response.headers["Accept-Ranges"] = "bytes" - return response - - def _serve_website_error(storage, bucket, error_doc_key, status_code): - if not error_doc_key: - return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error") - try: - obj_path = storage.get_object_path(bucket, error_doc_key) - except (StorageError, OSError): - return _website_error_response(status_code, "Not Found") - content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html" - is_encrypted = False - try: - metadata = storage.get_object_metadata(bucket, error_doc_key) - is_encrypted = "x-amz-server-side-encryption" in metadata - except (StorageError, OSError): - pass - if is_encrypted and hasattr(storage, "get_object_data"): - try: - data, _ = storage.get_object_data(bucket, error_doc_key) - response = Response(data, status=status_code, mimetype=content_type) - response.headers["Content-Length"] = len(data) - return response - except (StorageError, OSError): - return _website_error_response(status_code, "Not Found") - try: - data = obj_path.read_bytes() - response = Response(data, status=status_code, mimetype=content_type) - response.headers["Content-Length"] = len(data) - return response - except OSError: - return _website_error_response(status_code, "Not Found") - - def _website_error_response(status_code, message): - safe_msg = html_module.escape(str(message)) - safe_code = html_module.escape(str(status_code)) - body = f"{safe_code} {safe_msg}

{safe_code} {safe_msg}

" - return Response(body, status=status_code, mimetype="text/html") - - @app.after_request - def _log_request_end(response): - duration_ms = 0.0 - if hasattr(g, "request_started_at"): - duration_ms = (time.perf_counter() - g.request_started_at) * 1000 - request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}") - response.headers.setdefault("X-Request-ID", request_id) - if app.logger.isEnabledFor(logging.INFO): - app.logger.info( - "Request completed", - extra={ - "path": request.path, - "method": request.method, - "remote_addr": request.remote_addr, - }, - ) - response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" - response.headers["Server"] = "MyFSIO" - - operation_metrics = app.extensions.get("operation_metrics") - if operation_metrics: - bytes_in = getattr(g, "request_bytes_in", 0) - bytes_out = response.content_length or 0 - error_code = getattr(g, "s3_error_code", None) - endpoint_type = classify_endpoint(request.path) - operation_metrics.record_request( - method=request.method, - endpoint_type=endpoint_type, - status_code=response.status_code, - latency_ms=duration_ms, - bytes_in=bytes_in, - bytes_out=bytes_out, - error_code=error_code, - ) - - return response diff --git a/app/access_logging.py b/app/access_logging.py deleted file mode 100644 index f07ac99..0000000 --- a/app/access_logging.py +++ /dev/null @@ -1,265 +0,0 @@ -from __future__ import annotations - -import io -import json -import logging -import queue -import threading -import time -import uuid -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional - -logger = logging.getLogger(__name__) - - -@dataclass -class AccessLogEntry: - bucket_owner: str = "-" - bucket: str = "-" - timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) - remote_ip: str = "-" - requester: str = "-" - request_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16].upper()) - operation: str = "-" - key: str = "-" - request_uri: str = "-" - http_status: int = 200 - error_code: str = "-" - bytes_sent: int = 0 - object_size: int = 0 - total_time_ms: int = 0 - turn_around_time_ms: int = 0 - referrer: str = "-" - user_agent: str = "-" - version_id: str = "-" - host_id: str = "-" - signature_version: str = "SigV4" - cipher_suite: str = "-" - authentication_type: str = "AuthHeader" - host_header: str = "-" - tls_version: str = "-" - - def to_log_line(self) -> str: - time_str = self.timestamp.strftime("[%d/%b/%Y:%H:%M:%S %z]") - return ( - f'{self.bucket_owner} {self.bucket} {time_str} {self.remote_ip} ' - f'{self.requester} {self.request_id} {self.operation} {self.key} ' - f'"{self.request_uri}" {self.http_status} {self.error_code or "-"} ' - f'{self.bytes_sent or "-"} {self.object_size or "-"} {self.total_time_ms or "-"} ' - f'{self.turn_around_time_ms or "-"} "{self.referrer}" "{self.user_agent}" {self.version_id}' - ) - - def to_dict(self) -> Dict[str, Any]: - return { - "bucket_owner": self.bucket_owner, - "bucket": self.bucket, - "timestamp": self.timestamp.isoformat(), - "remote_ip": self.remote_ip, - "requester": self.requester, - "request_id": self.request_id, - "operation": self.operation, - "key": self.key, - "request_uri": self.request_uri, - "http_status": self.http_status, - "error_code": self.error_code, - "bytes_sent": self.bytes_sent, - "object_size": self.object_size, - "total_time_ms": self.total_time_ms, - "referrer": self.referrer, - "user_agent": self.user_agent, - "version_id": self.version_id, - } - - -@dataclass -class LoggingConfiguration: - target_bucket: str - target_prefix: str = "" - enabled: bool = True - - def to_dict(self) -> Dict[str, Any]: - return { - "LoggingEnabled": { - "TargetBucket": self.target_bucket, - "TargetPrefix": self.target_prefix, - } - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> Optional["LoggingConfiguration"]: - logging_enabled = data.get("LoggingEnabled") - if not logging_enabled: - return None - return cls( - target_bucket=logging_enabled.get("TargetBucket", ""), - target_prefix=logging_enabled.get("TargetPrefix", ""), - enabled=True, - ) - - -class AccessLoggingService: - def __init__(self, storage_root: Path, flush_interval: int = 60, max_buffer_size: int = 1000): - self.storage_root = storage_root - self.flush_interval = flush_interval - self.max_buffer_size = max_buffer_size - self._configs: Dict[str, LoggingConfiguration] = {} - self._buffer: Dict[str, List[AccessLogEntry]] = {} - self._buffer_lock = threading.Lock() - self._shutdown = threading.Event() - self._storage = None - - self._flush_thread = threading.Thread(target=self._flush_loop, name="access-log-flush", daemon=True) - self._flush_thread.start() - - def set_storage(self, storage: Any) -> None: - self._storage = storage - - def _config_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "logging.json" - - def get_bucket_logging(self, bucket_name: str) -> Optional[LoggingConfiguration]: - if bucket_name in self._configs: - return self._configs[bucket_name] - - config_path = self._config_path(bucket_name) - if not config_path.exists(): - return None - - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - config = LoggingConfiguration.from_dict(data) - if config: - self._configs[bucket_name] = config - return config - except (json.JSONDecodeError, OSError) as e: - logger.warning(f"Failed to load logging config for {bucket_name}: {e}") - return None - - def set_bucket_logging(self, bucket_name: str, config: LoggingConfiguration) -> None: - config_path = self._config_path(bucket_name) - config_path.parent.mkdir(parents=True, exist_ok=True) - config_path.write_text(json.dumps(config.to_dict(), indent=2), encoding="utf-8") - self._configs[bucket_name] = config - - def delete_bucket_logging(self, bucket_name: str) -> None: - config_path = self._config_path(bucket_name) - try: - if config_path.exists(): - config_path.unlink() - except OSError: - pass - self._configs.pop(bucket_name, None) - - def log_request( - self, - bucket_name: str, - *, - operation: str, - key: str = "-", - remote_ip: str = "-", - requester: str = "-", - request_uri: str = "-", - http_status: int = 200, - error_code: str = "", - bytes_sent: int = 0, - object_size: int = 0, - total_time_ms: int = 0, - referrer: str = "-", - user_agent: str = "-", - version_id: str = "-", - request_id: str = "", - ) -> None: - config = self.get_bucket_logging(bucket_name) - if not config or not config.enabled: - return - - entry = AccessLogEntry( - bucket_owner="local-owner", - bucket=bucket_name, - remote_ip=remote_ip, - requester=requester, - request_id=request_id or uuid.uuid4().hex[:16].upper(), - operation=operation, - key=key, - request_uri=request_uri, - http_status=http_status, - error_code=error_code, - bytes_sent=bytes_sent, - object_size=object_size, - total_time_ms=total_time_ms, - referrer=referrer, - user_agent=user_agent, - version_id=version_id, - ) - - target_key = f"{config.target_bucket}:{config.target_prefix}" - should_flush = False - with self._buffer_lock: - if target_key not in self._buffer: - self._buffer[target_key] = [] - self._buffer[target_key].append(entry) - should_flush = len(self._buffer[target_key]) >= self.max_buffer_size - - if should_flush: - self._flush_buffer(target_key) - - def _flush_loop(self) -> None: - while not self._shutdown.is_set(): - self._shutdown.wait(timeout=self.flush_interval) - if not self._shutdown.is_set(): - self._flush_all() - - def _flush_all(self) -> None: - with self._buffer_lock: - targets = list(self._buffer.keys()) - - for target_key in targets: - self._flush_buffer(target_key) - - def _flush_buffer(self, target_key: str) -> None: - with self._buffer_lock: - entries = self._buffer.pop(target_key, []) - - if not entries or not self._storage: - return - - try: - bucket_name, prefix = target_key.split(":", 1) - except ValueError: - logger.error(f"Invalid target key: {target_key}") - return - - now = datetime.now(timezone.utc) - log_key = f"{prefix}{now.strftime('%Y-%m-%d-%H-%M-%S')}-{uuid.uuid4().hex[:8]}" - - log_content = "\n".join(entry.to_log_line() for entry in entries) + "\n" - - try: - stream = io.BytesIO(log_content.encode("utf-8")) - self._storage.put_object(bucket_name, log_key, stream, enforce_quota=False) - logger.info(f"Flushed {len(entries)} access log entries to {bucket_name}/{log_key}") - except Exception as e: - logger.error(f"Failed to write access log to {bucket_name}/{log_key}: {e}") - with self._buffer_lock: - if target_key not in self._buffer: - self._buffer[target_key] = [] - self._buffer[target_key] = entries + self._buffer[target_key] - - def flush(self) -> None: - self._flush_all() - - def shutdown(self) -> None: - self._shutdown.set() - self._flush_all() - self._flush_thread.join(timeout=5.0) - - def get_stats(self) -> Dict[str, Any]: - with self._buffer_lock: - buffered = sum(len(entries) for entries in self._buffer.values()) - return { - "buffered_entries": buffered, - "target_buckets": len(self._buffer), - } diff --git a/app/acl.py b/app/acl.py deleted file mode 100644 index 6ee2be0..0000000 --- a/app/acl.py +++ /dev/null @@ -1,204 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional, Set - - -ACL_PERMISSION_FULL_CONTROL = "FULL_CONTROL" -ACL_PERMISSION_WRITE = "WRITE" -ACL_PERMISSION_WRITE_ACP = "WRITE_ACP" -ACL_PERMISSION_READ = "READ" -ACL_PERMISSION_READ_ACP = "READ_ACP" - -ALL_PERMISSIONS = { - ACL_PERMISSION_FULL_CONTROL, - ACL_PERMISSION_WRITE, - ACL_PERMISSION_WRITE_ACP, - ACL_PERMISSION_READ, - ACL_PERMISSION_READ_ACP, -} - -PERMISSION_TO_ACTIONS = { - ACL_PERMISSION_FULL_CONTROL: {"read", "write", "delete", "list", "share"}, - ACL_PERMISSION_WRITE: {"write", "delete"}, - ACL_PERMISSION_WRITE_ACP: {"share"}, - ACL_PERMISSION_READ: {"read", "list"}, - ACL_PERMISSION_READ_ACP: {"share"}, -} - -GRANTEE_ALL_USERS = "*" -GRANTEE_AUTHENTICATED_USERS = "authenticated" - - -@dataclass -class AclGrant: - grantee: str - permission: str - - def to_dict(self) -> Dict[str, str]: - return {"grantee": self.grantee, "permission": self.permission} - - @classmethod - def from_dict(cls, data: Dict[str, str]) -> "AclGrant": - return cls(grantee=data["grantee"], permission=data["permission"]) - - -@dataclass -class Acl: - owner: str - grants: List[AclGrant] = field(default_factory=list) - - def to_dict(self) -> Dict[str, Any]: - return { - "owner": self.owner, - "grants": [g.to_dict() for g in self.grants], - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Acl": - return cls( - owner=data.get("owner", ""), - grants=[AclGrant.from_dict(g) for g in data.get("grants", [])], - ) - - def get_allowed_actions(self, principal_id: Optional[str], is_authenticated: bool = True) -> Set[str]: - actions: Set[str] = set() - if principal_id and principal_id == self.owner: - actions.update(PERMISSION_TO_ACTIONS[ACL_PERMISSION_FULL_CONTROL]) - for grant in self.grants: - if grant.grantee == GRANTEE_ALL_USERS: - actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set())) - elif grant.grantee == GRANTEE_AUTHENTICATED_USERS and is_authenticated: - actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set())) - elif principal_id and grant.grantee == principal_id: - actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set())) - return actions - - -CANNED_ACLS = { - "private": lambda owner: Acl( - owner=owner, - grants=[AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL)], - ), - "public-read": lambda owner: Acl( - owner=owner, - grants=[ - AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL), - AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ), - ], - ), - "public-read-write": lambda owner: Acl( - owner=owner, - grants=[ - AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL), - AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ), - AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_WRITE), - ], - ), - "authenticated-read": lambda owner: Acl( - owner=owner, - grants=[ - AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL), - AclGrant(grantee=GRANTEE_AUTHENTICATED_USERS, permission=ACL_PERMISSION_READ), - ], - ), - "bucket-owner-read": lambda owner: Acl( - owner=owner, - grants=[ - AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL), - ], - ), - "bucket-owner-full-control": lambda owner: Acl( - owner=owner, - grants=[ - AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL), - ], - ), -} - - -def create_canned_acl(canned_acl: str, owner: str) -> Acl: - factory = CANNED_ACLS.get(canned_acl) - if not factory: - return CANNED_ACLS["private"](owner) - return factory(owner) - - -class AclService: - def __init__(self, storage_root: Path): - self.storage_root = storage_root - self._bucket_acl_cache: Dict[str, Acl] = {} - - def _bucket_acl_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".acl.json" - - def get_bucket_acl(self, bucket_name: str) -> Optional[Acl]: - if bucket_name in self._bucket_acl_cache: - return self._bucket_acl_cache[bucket_name] - acl_path = self._bucket_acl_path(bucket_name) - if not acl_path.exists(): - return None - try: - data = json.loads(acl_path.read_text(encoding="utf-8")) - acl = Acl.from_dict(data) - self._bucket_acl_cache[bucket_name] = acl - return acl - except (OSError, json.JSONDecodeError): - return None - - def set_bucket_acl(self, bucket_name: str, acl: Acl) -> None: - acl_path = self._bucket_acl_path(bucket_name) - acl_path.parent.mkdir(parents=True, exist_ok=True) - acl_path.write_text(json.dumps(acl.to_dict(), indent=2), encoding="utf-8") - self._bucket_acl_cache[bucket_name] = acl - - def set_bucket_canned_acl(self, bucket_name: str, canned_acl: str, owner: str) -> Acl: - acl = create_canned_acl(canned_acl, owner) - self.set_bucket_acl(bucket_name, acl) - return acl - - def delete_bucket_acl(self, bucket_name: str) -> None: - acl_path = self._bucket_acl_path(bucket_name) - if acl_path.exists(): - acl_path.unlink() - self._bucket_acl_cache.pop(bucket_name, None) - - def evaluate_bucket_acl( - self, - bucket_name: str, - principal_id: Optional[str], - action: str, - is_authenticated: bool = True, - ) -> bool: - acl = self.get_bucket_acl(bucket_name) - if not acl: - return False - allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated) - return action in allowed_actions - - def get_object_acl(self, bucket_name: str, object_key: str, object_metadata: Dict[str, Any]) -> Optional[Acl]: - acl_data = object_metadata.get("__acl__") - if not acl_data: - return None - try: - return Acl.from_dict(acl_data) - except (TypeError, KeyError): - return None - - def create_object_acl_metadata(self, acl: Acl) -> Dict[str, Any]: - return {"__acl__": acl.to_dict()} - - def evaluate_object_acl( - self, - object_metadata: Dict[str, Any], - principal_id: Optional[str], - action: str, - is_authenticated: bool = True, - ) -> bool: - acl = self.get_object_acl("", "", object_metadata) - if not acl: - return False - allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated) - return action in allowed_actions diff --git a/app/admin_api.py b/app/admin_api.py deleted file mode 100644 index 89c87f0..0000000 --- a/app/admin_api.py +++ /dev/null @@ -1,984 +0,0 @@ -from __future__ import annotations - -import ipaddress -import json -import logging -import re -import socket -import time -from typing import Any, Dict, Optional, Tuple -from urllib.parse import urlparse - -import requests -from flask import Blueprint, Response, current_app, jsonify, request - -from .connections import ConnectionStore -from .extensions import limiter -from .gc import GarbageCollector -from .integrity import IntegrityChecker -from .iam import IamError, Principal -from .replication import ReplicationManager -from .site_registry import PeerSite, SiteInfo, SiteRegistry -from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain - - -def _is_safe_url(url: str, allow_internal: bool = False) -> bool: - """Check if a URL is safe to make requests to (not internal/private). - - Args: - url: The URL to check. - allow_internal: If True, allows internal/private IP addresses. - Use for self-hosted deployments on internal networks. - """ - try: - parsed = urlparse(url) - hostname = parsed.hostname - if not hostname: - return False - cloud_metadata_hosts = { - "metadata.google.internal", - "169.254.169.254", - } - if hostname.lower() in cloud_metadata_hosts: - return False - if allow_internal: - return True - blocked_hosts = { - "localhost", - "127.0.0.1", - "0.0.0.0", - "::1", - "[::1]", - } - if hostname.lower() in blocked_hosts: - return False - try: - resolved_ip = socket.gethostbyname(hostname) - ip = ipaddress.ip_address(resolved_ip) - if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: - return False - except (socket.gaierror, ValueError): - return False - return True - except Exception: - return False - - -def _validate_endpoint(endpoint: str) -> Optional[str]: - """Validate endpoint URL format. Returns error message or None.""" - try: - parsed = urlparse(endpoint) - if not parsed.scheme or parsed.scheme not in ("http", "https"): - return "Endpoint must be http or https URL" - if not parsed.netloc: - return "Endpoint must have a host" - return None - except Exception: - return "Invalid endpoint URL" - - -def _validate_priority(priority: Any) -> Optional[str]: - """Validate priority value. Returns error message or None.""" - try: - p = int(priority) - if p < 0 or p > 1000: - return "Priority must be between 0 and 1000" - return None - except (TypeError, ValueError): - return "Priority must be an integer" - - -def _validate_region(region: str) -> Optional[str]: - """Validate region format. Returns error message or None.""" - if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region): - return "Region must match format like us-east-1" - return None - - -def _validate_site_id(site_id: str) -> Optional[str]: - """Validate site_id format. Returns error message or None.""" - if not site_id or len(site_id) > 63: - return "site_id must be 1-63 characters" - if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id): - return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores" - return None - - -logger = logging.getLogger(__name__) - -admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin") - - -def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]: - from .s3_api import _require_principal as s3_require_principal - return s3_require_principal() - - -def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]: - principal, error = _require_principal() - if error: - return None, error - - try: - _iam().authorize(principal, None, "iam:*") - return principal, None - except IamError: - return None, _json_error("AccessDenied", "Admin access required", 403) - - -def _site_registry() -> SiteRegistry: - return current_app.extensions["site_registry"] - - -def _connections() -> ConnectionStore: - return current_app.extensions["connections"] - - -def _replication() -> ReplicationManager: - return current_app.extensions["replication"] - - -def _iam(): - return current_app.extensions["iam"] - - -def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]: - return {"error": {"code": code, "message": message}}, status - - -def _get_admin_rate_limit() -> str: - return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute") - - -@admin_api_bp.route("/site", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def get_local_site(): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - local_site = registry.get_local_site() - - if local_site: - return jsonify(local_site.to_dict()) - - config_site_id = current_app.config.get("SITE_ID") - config_endpoint = current_app.config.get("SITE_ENDPOINT") - - if config_site_id: - return jsonify({ - "site_id": config_site_id, - "endpoint": config_endpoint or "", - "region": current_app.config.get("SITE_REGION", "us-east-1"), - "priority": current_app.config.get("SITE_PRIORITY", 100), - "display_name": config_site_id, - "source": "environment", - }) - - return _json_error("NotFound", "Local site not configured", 404) - - -@admin_api_bp.route("/site", methods=["PUT"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def update_local_site(): - principal, error = _require_admin() - if error: - return error - - payload = request.get_json(silent=True) or {} - - site_id = payload.get("site_id") - endpoint = payload.get("endpoint") - - if not site_id: - return _json_error("ValidationError", "site_id is required", 400) - - site_id_error = _validate_site_id(site_id) - if site_id_error: - return _json_error("ValidationError", site_id_error, 400) - - if endpoint: - endpoint_error = _validate_endpoint(endpoint) - if endpoint_error: - return _json_error("ValidationError", endpoint_error, 400) - - if "priority" in payload: - priority_error = _validate_priority(payload["priority"]) - if priority_error: - return _json_error("ValidationError", priority_error, 400) - - if "region" in payload: - region_error = _validate_region(payload["region"]) - if region_error: - return _json_error("ValidationError", region_error, 400) - - registry = _site_registry() - existing = registry.get_local_site() - - site = SiteInfo( - site_id=site_id, - endpoint=endpoint or "", - region=payload.get("region", "us-east-1"), - priority=payload.get("priority", 100), - display_name=payload.get("display_name", site_id), - created_at=existing.created_at if existing else None, - ) - - registry.set_local_site(site) - - logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key}) - return jsonify(site.to_dict()) - - -@admin_api_bp.route("/sites", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def list_all_sites(): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - local = registry.get_local_site() - peers = registry.list_peers() - - result = { - "local": local.to_dict() if local else None, - "peers": [peer.to_dict() for peer in peers], - "total_peers": len(peers), - } - - return jsonify(result) - - -@admin_api_bp.route("/sites", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def register_peer_site(): - principal, error = _require_admin() - if error: - return error - - payload = request.get_json(silent=True) or {} - - site_id = payload.get("site_id") - endpoint = payload.get("endpoint") - - if not site_id: - return _json_error("ValidationError", "site_id is required", 400) - - site_id_error = _validate_site_id(site_id) - if site_id_error: - return _json_error("ValidationError", site_id_error, 400) - - if not endpoint: - return _json_error("ValidationError", "endpoint is required", 400) - - endpoint_error = _validate_endpoint(endpoint) - if endpoint_error: - return _json_error("ValidationError", endpoint_error, 400) - - region = payload.get("region", "us-east-1") - region_error = _validate_region(region) - if region_error: - return _json_error("ValidationError", region_error, 400) - - priority = payload.get("priority", 100) - priority_error = _validate_priority(priority) - if priority_error: - return _json_error("ValidationError", priority_error, 400) - - registry = _site_registry() - - if registry.get_peer(site_id): - return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409) - - connection_id = payload.get("connection_id") - if connection_id: - if not _connections().get(connection_id): - return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400) - - peer = PeerSite( - site_id=site_id, - endpoint=endpoint, - region=region, - priority=int(priority), - display_name=payload.get("display_name", site_id), - connection_id=connection_id, - ) - - registry.add_peer(peer) - - logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key}) - return jsonify(peer.to_dict()), 201 - - -@admin_api_bp.route("/sites/", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def get_peer_site(site_id: str): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - peer = registry.get_peer(site_id) - - if not peer: - return _json_error("NotFound", f"Peer site '{site_id}' not found", 404) - - return jsonify(peer.to_dict()) - - -@admin_api_bp.route("/sites/", methods=["PUT"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def update_peer_site(site_id: str): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - existing = registry.get_peer(site_id) - - if not existing: - return _json_error("NotFound", f"Peer site '{site_id}' not found", 404) - - payload = request.get_json(silent=True) or {} - - if "endpoint" in payload: - endpoint_error = _validate_endpoint(payload["endpoint"]) - if endpoint_error: - return _json_error("ValidationError", endpoint_error, 400) - - if "priority" in payload: - priority_error = _validate_priority(payload["priority"]) - if priority_error: - return _json_error("ValidationError", priority_error, 400) - - if "region" in payload: - region_error = _validate_region(payload["region"]) - if region_error: - return _json_error("ValidationError", region_error, 400) - - if "connection_id" in payload: - if payload["connection_id"] and not _connections().get(payload["connection_id"]): - return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400) - - peer = PeerSite( - site_id=site_id, - endpoint=payload.get("endpoint", existing.endpoint), - region=payload.get("region", existing.region), - priority=payload.get("priority", existing.priority), - display_name=payload.get("display_name", existing.display_name), - connection_id=payload.get("connection_id", existing.connection_id), - created_at=existing.created_at, - is_healthy=existing.is_healthy, - last_health_check=existing.last_health_check, - ) - - registry.update_peer(peer) - - logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key}) - return jsonify(peer.to_dict()) - - -@admin_api_bp.route("/sites/", methods=["DELETE"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def delete_peer_site(site_id: str): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - - if not registry.delete_peer(site_id): - return _json_error("NotFound", f"Peer site '{site_id}' not found", 404) - - logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key}) - return Response(status=204) - - -@admin_api_bp.route("/sites//health", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def check_peer_health(site_id: str): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - peer = registry.get_peer(site_id) - - if not peer: - return _json_error("NotFound", f"Peer site '{site_id}' not found", 404) - - is_healthy = False - error_message = None - - if peer.connection_id: - connection = _connections().get(peer.connection_id) - if connection: - is_healthy = _replication().check_endpoint_health(connection) - else: - error_message = f"Connection '{peer.connection_id}' not found" - else: - error_message = "No connection configured for this peer" - - registry.update_health(site_id, is_healthy) - - result = { - "site_id": site_id, - "is_healthy": is_healthy, - "checked_at": time.time(), - } - if error_message: - result["error"] = error_message - - return jsonify(result) - - -@admin_api_bp.route("/topology", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def get_topology(): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - local = registry.get_local_site() - peers = registry.list_peers() - - sites = [] - - if local: - sites.append({ - **local.to_dict(), - "is_local": True, - "is_healthy": True, - }) - - for peer in peers: - sites.append({ - **peer.to_dict(), - "is_local": False, - }) - - sites.sort(key=lambda s: s.get("priority", 100)) - - return jsonify({ - "sites": sites, - "total": len(sites), - "healthy_count": sum(1 for s in sites if s.get("is_healthy")), - }) - - -@admin_api_bp.route("/sites//bidirectional-status", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def check_bidirectional_status(site_id: str): - principal, error = _require_admin() - if error: - return error - - registry = _site_registry() - peer = registry.get_peer(site_id) - - if not peer: - return _json_error("NotFound", f"Peer site '{site_id}' not found", 404) - - local_site = registry.get_local_site() - replication = _replication() - local_rules = replication.list_rules() - - local_bidir_rules = [] - for rule in local_rules: - if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional": - local_bidir_rules.append({ - "bucket_name": rule.bucket_name, - "target_bucket": rule.target_bucket, - "enabled": rule.enabled, - }) - - result = { - "site_id": site_id, - "local_site_id": local_site.site_id if local_site else None, - "local_endpoint": local_site.endpoint if local_site else None, - "local_bidirectional_rules": local_bidir_rules, - "local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False), - "remote_status": None, - "issues": [], - "is_fully_configured": False, - } - - if not local_site or not local_site.site_id: - result["issues"].append({ - "code": "NO_LOCAL_SITE_ID", - "message": "Local site identity not configured", - "severity": "error", - }) - - if not local_site or not local_site.endpoint: - result["issues"].append({ - "code": "NO_LOCAL_ENDPOINT", - "message": "Local site endpoint not configured (remote site cannot reach back)", - "severity": "error", - }) - - if not peer.connection_id: - result["issues"].append({ - "code": "NO_CONNECTION", - "message": "No connection configured for this peer", - "severity": "error", - }) - return jsonify(result) - - connection = _connections().get(peer.connection_id) - if not connection: - result["issues"].append({ - "code": "CONNECTION_NOT_FOUND", - "message": f"Connection '{peer.connection_id}' not found", - "severity": "error", - }) - return jsonify(result) - - if not local_bidir_rules: - result["issues"].append({ - "code": "NO_LOCAL_BIDIRECTIONAL_RULES", - "message": "No bidirectional replication rules configured on this site", - "severity": "warning", - }) - - if not result["local_site_sync_enabled"]: - result["issues"].append({ - "code": "SITE_SYNC_DISABLED", - "message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.", - "severity": "warning", - }) - - if not replication.check_endpoint_health(connection): - result["issues"].append({ - "code": "REMOTE_UNREACHABLE", - "message": "Remote endpoint is not reachable", - "severity": "error", - }) - return jsonify(result) - - allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False) - if not _is_safe_url(peer.endpoint, allow_internal=allow_internal): - result["issues"].append({ - "code": "ENDPOINT_NOT_ALLOWED", - "message": "Peer endpoint points to cloud metadata service (SSRF protection)", - "severity": "error", - }) - return jsonify(result) - - try: - admin_url = peer.endpoint.rstrip("/") + "/admin/sites" - resp = requests.get( - admin_url, - timeout=10, - headers={ - "Accept": "application/json", - "X-Access-Key": connection.access_key, - "X-Secret-Key": connection.secret_key, - }, - ) - - if resp.status_code == 200: - try: - remote_data = resp.json() - if not isinstance(remote_data, dict): - raise ValueError("Expected JSON object") - remote_local = remote_data.get("local") - if remote_local is not None and not isinstance(remote_local, dict): - raise ValueError("Expected 'local' to be an object") - remote_peers = remote_data.get("peers", []) - if not isinstance(remote_peers, list): - raise ValueError("Expected 'peers' to be a list") - except (ValueError, json.JSONDecodeError) as e: - logger.warning("Invalid JSON from remote admin API: %s", e) - result["remote_status"] = {"reachable": True, "invalid_response": True} - result["issues"].append({ - "code": "REMOTE_INVALID_RESPONSE", - "message": "Remote admin API returned invalid JSON", - "severity": "warning", - }) - return jsonify(result) - - result["remote_status"] = { - "reachable": True, - "local_site": remote_local, - "site_sync_enabled": None, - "has_peer_for_us": False, - "peer_connection_configured": False, - "has_bidirectional_rules_for_us": False, - } - - for rp in remote_peers: - if not isinstance(rp, dict): - continue - if local_site and ( - rp.get("site_id") == local_site.site_id or - rp.get("endpoint") == local_site.endpoint - ): - result["remote_status"]["has_peer_for_us"] = True - result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id")) - break - - if not result["remote_status"]["has_peer_for_us"]: - result["issues"].append({ - "code": "REMOTE_NO_PEER_FOR_US", - "message": "Remote site does not have this site registered as a peer", - "severity": "error", - }) - elif not result["remote_status"]["peer_connection_configured"]: - result["issues"].append({ - "code": "REMOTE_NO_CONNECTION_FOR_US", - "message": "Remote site has us as peer but no connection configured (cannot push back)", - "severity": "error", - }) - elif resp.status_code == 401 or resp.status_code == 403: - result["remote_status"] = { - "reachable": True, - "admin_access_denied": True, - } - result["issues"].append({ - "code": "REMOTE_ADMIN_ACCESS_DENIED", - "message": "Cannot verify remote configuration (admin access denied)", - "severity": "warning", - }) - else: - result["remote_status"] = { - "reachable": True, - "admin_api_error": resp.status_code, - } - result["issues"].append({ - "code": "REMOTE_ADMIN_API_ERROR", - "message": f"Remote admin API returned status {resp.status_code}", - "severity": "warning", - }) - except requests.RequestException as e: - logger.warning("Remote admin API unreachable: %s", e) - result["remote_status"] = { - "reachable": False, - "error": "Connection failed", - } - result["issues"].append({ - "code": "REMOTE_ADMIN_UNREACHABLE", - "message": "Could not reach remote admin API", - "severity": "warning", - }) - except Exception as e: - logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True) - result["issues"].append({ - "code": "VERIFICATION_ERROR", - "message": "Internal error during verification", - "severity": "warning", - }) - - error_issues = [i for i in result["issues"] if i["severity"] == "error"] - result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0 - - return jsonify(result) - - -def _website_domains() -> WebsiteDomainStore: - return current_app.extensions["website_domains"] - - -def _storage(): - return current_app.extensions["object_storage"] - - -def _require_iam_action(action: str): - principal, error = _require_principal() - if error: - return None, error - try: - _iam().authorize(principal, None, action) - return principal, None - except IamError: - return None, _json_error("AccessDenied", f"Requires {action} permission", 403) - - -@admin_api_bp.route("/iam/users", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_list_users(): - principal, error = _require_iam_action("iam:list_users") - if error: - return error - return jsonify({"users": _iam().list_users()}) - - -@admin_api_bp.route("/iam/users/", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_get_user(identifier): - principal, error = _require_iam_action("iam:get_user") - if error: - return error - try: - user_id = _iam().resolve_user_id(identifier) - return jsonify(_iam().get_user_by_id(user_id)) - except IamError as exc: - return _json_error("NotFound", str(exc), 404) - - -@admin_api_bp.route("/iam/users//policies", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_get_user_policies(identifier): - principal, error = _require_iam_action("iam:get_policy") - if error: - return error - try: - return jsonify({"policies": _iam().get_user_policies(identifier)}) - except IamError as exc: - return _json_error("NotFound", str(exc), 404) - - -@admin_api_bp.route("/iam/users//keys", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_create_access_key(identifier): - principal, error = _require_iam_action("iam:create_key") - if error: - return error - try: - result = _iam().create_access_key(identifier) - logger.info("Access key created for %s by %s", identifier, principal.access_key) - return jsonify(result), 201 - except IamError as exc: - return _json_error("InvalidRequest", str(exc), 400) - - -@admin_api_bp.route("/iam/users//keys/", methods=["DELETE"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_delete_access_key(identifier, access_key): - principal, error = _require_iam_action("iam:delete_key") - if error: - return error - try: - _iam().delete_access_key(access_key) - logger.info("Access key %s deleted by %s", access_key, principal.access_key) - return "", 204 - except IamError as exc: - return _json_error("InvalidRequest", str(exc), 400) - - -@admin_api_bp.route("/iam/users//disable", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_disable_user(identifier): - principal, error = _require_iam_action("iam:disable_user") - if error: - return error - try: - _iam().disable_user(identifier) - logger.info("User %s disabled by %s", identifier, principal.access_key) - return jsonify({"status": "disabled"}) - except IamError as exc: - return _json_error("InvalidRequest", str(exc), 400) - - -@admin_api_bp.route("/iam/users//enable", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def iam_enable_user(identifier): - principal, error = _require_iam_action("iam:disable_user") - if error: - return error - try: - _iam().enable_user(identifier) - logger.info("User %s enabled by %s", identifier, principal.access_key) - return jsonify({"status": "enabled"}) - except IamError as exc: - return _json_error("InvalidRequest", str(exc), 400) - - -@admin_api_bp.route("/website-domains", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def list_website_domains(): - principal, error = _require_admin() - if error: - return error - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - return _json_error("InvalidRequest", "Website hosting is not enabled", 400) - return jsonify(_website_domains().list_all()) - - -@admin_api_bp.route("/website-domains", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def create_website_domain(): - principal, error = _require_admin() - if error: - return error - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - return _json_error("InvalidRequest", "Website hosting is not enabled", 400) - payload = request.get_json(silent=True) or {} - domain = normalize_domain(payload.get("domain") or "") - bucket = (payload.get("bucket") or "").strip() - if not domain: - return _json_error("ValidationError", "domain is required", 400) - if not is_valid_domain(domain): - return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400) - if not bucket: - return _json_error("ValidationError", "bucket is required", 400) - storage = _storage() - if not storage.bucket_exists(bucket): - return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404) - store = _website_domains() - existing = store.get_bucket(domain) - if existing: - return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409) - store.set_mapping(domain, bucket) - logger.info("Website domain mapping created: %s -> %s", domain, bucket) - return jsonify({"domain": domain, "bucket": bucket}), 201 - - -@admin_api_bp.route("/website-domains/", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def get_website_domain(domain: str): - principal, error = _require_admin() - if error: - return error - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - return _json_error("InvalidRequest", "Website hosting is not enabled", 400) - domain = normalize_domain(domain) - bucket = _website_domains().get_bucket(domain) - if not bucket: - return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404) - return jsonify({"domain": domain, "bucket": bucket}) - - -@admin_api_bp.route("/website-domains/", methods=["PUT"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def update_website_domain(domain: str): - principal, error = _require_admin() - if error: - return error - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - return _json_error("InvalidRequest", "Website hosting is not enabled", 400) - domain = normalize_domain(domain) - payload = request.get_json(silent=True) or {} - bucket = (payload.get("bucket") or "").strip() - if not bucket: - return _json_error("ValidationError", "bucket is required", 400) - storage = _storage() - if not storage.bucket_exists(bucket): - return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404) - store = _website_domains() - if not store.get_bucket(domain): - return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404) - store.set_mapping(domain, bucket) - logger.info("Website domain mapping updated: %s -> %s", domain, bucket) - return jsonify({"domain": domain, "bucket": bucket}) - - -@admin_api_bp.route("/website-domains/", methods=["DELETE"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def delete_website_domain(domain: str): - principal, error = _require_admin() - if error: - return error - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - return _json_error("InvalidRequest", "Website hosting is not enabled", 400) - domain = normalize_domain(domain) - if not _website_domains().delete_mapping(domain): - return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404) - logger.info("Website domain mapping deleted: %s", domain) - return Response(status=204) - - -def _gc() -> Optional[GarbageCollector]: - return current_app.extensions.get("gc") - - -@admin_api_bp.route("/gc/status", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def gc_status(): - principal, error = _require_admin() - if error: - return error - gc = _gc() - if not gc: - return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."}) - return jsonify(gc.get_status()) - - -@admin_api_bp.route("/gc/run", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def gc_run_now(): - principal, error = _require_admin() - if error: - return error - gc = _gc() - if not gc: - return _json_error("InvalidRequest", "GC is not enabled", 400) - payload = request.get_json(silent=True) or {} - started = gc.run_async(dry_run=payload.get("dry_run")) - logger.info("GC manual run by %s", principal.access_key) - if not started: - return _json_error("Conflict", "GC is already in progress", 409) - return jsonify({"status": "started"}) - - -@admin_api_bp.route("/gc/history", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def gc_history(): - principal, error = _require_admin() - if error: - return error - gc = _gc() - if not gc: - return jsonify({"executions": []}) - limit = min(int(request.args.get("limit", 50)), 200) - offset = int(request.args.get("offset", 0)) - records = gc.get_history(limit=limit, offset=offset) - return jsonify({"executions": records}) - - -def _integrity() -> Optional[IntegrityChecker]: - return current_app.extensions.get("integrity") - - -@admin_api_bp.route("/integrity/status", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def integrity_status(): - principal, error = _require_admin() - if error: - return error - checker = _integrity() - if not checker: - return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."}) - return jsonify(checker.get_status()) - - -@admin_api_bp.route("/integrity/run", methods=["POST"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def integrity_run_now(): - principal, error = _require_admin() - if error: - return error - checker = _integrity() - if not checker: - return _json_error("InvalidRequest", "Integrity checker is not enabled", 400) - payload = request.get_json(silent=True) or {} - override_dry_run = payload.get("dry_run") - override_auto_heal = payload.get("auto_heal") - started = checker.run_async( - auto_heal=override_auto_heal if override_auto_heal is not None else None, - dry_run=override_dry_run if override_dry_run is not None else None, - ) - logger.info("Integrity manual run by %s", principal.access_key) - if not started: - return _json_error("Conflict", "A scan is already in progress", 409) - return jsonify({"status": "started"}) - - -@admin_api_bp.route("/integrity/history", methods=["GET"]) -@limiter.limit(lambda: _get_admin_rate_limit()) -def integrity_history(): - principal, error = _require_admin() - if error: - return error - checker = _integrity() - if not checker: - return jsonify({"executions": []}) - limit = min(int(request.args.get("limit", 50)), 200) - offset = int(request.args.get("offset", 0)) - records = checker.get_history(limit=limit, offset=offset) - return jsonify({"executions": records}) - - diff --git a/app/bucket_policies.py b/app/bucket_policies.py deleted file mode 100644 index 61a9337..0000000 --- a/app/bucket_policies.py +++ /dev/null @@ -1,404 +0,0 @@ -from __future__ import annotations - -import ipaddress -import json -import os -import re -import time -from dataclasses import dataclass, field -from fnmatch import fnmatch, translate -from functools import lru_cache -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple - - -RESOURCE_PREFIX = "arn:aws:s3:::" - - -@lru_cache(maxsize=256) -def _compile_pattern(pattern: str) -> Pattern[str]: - return re.compile(translate(pattern), re.IGNORECASE) - - -def _match_string_like(value: str, pattern: str) -> bool: - compiled = _compile_pattern(pattern) - return bool(compiled.match(value)) - - -def _ip_in_cidr(ip_str: str, cidr: str) -> bool: - try: - ip = ipaddress.ip_address(ip_str) - network = ipaddress.ip_network(cidr, strict=False) - return ip in network - except ValueError: - return False - - -def _evaluate_condition_operator( - operator: str, - condition_key: str, - condition_values: List[str], - context: Dict[str, Any], -) -> bool: - context_value = context.get(condition_key) - op_lower = operator.lower() - if_exists = op_lower.endswith("ifexists") - if if_exists: - op_lower = op_lower[:-8] - - if context_value is None: - return if_exists - - context_value_str = str(context_value) - context_value_lower = context_value_str.lower() - - if op_lower == "stringequals": - return context_value_str in condition_values - elif op_lower == "stringnotequals": - return context_value_str not in condition_values - elif op_lower == "stringequalsignorecase": - return context_value_lower in [v.lower() for v in condition_values] - elif op_lower == "stringnotequalsignorecase": - return context_value_lower not in [v.lower() for v in condition_values] - elif op_lower == "stringlike": - return any(_match_string_like(context_value_str, p) for p in condition_values) - elif op_lower == "stringnotlike": - return not any(_match_string_like(context_value_str, p) for p in condition_values) - elif op_lower == "ipaddress": - return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values) - elif op_lower == "notipaddress": - return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values) - elif op_lower == "bool": - bool_val = context_value_lower in ("true", "1", "yes") - return str(bool_val).lower() in [v.lower() for v in condition_values] - elif op_lower == "null": - is_null = context_value is None or context_value == "" - expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True - return is_null == expected_null - - return False - -ACTION_ALIASES = { - "s3:listbucket": "list", - "s3:listallmybuckets": "list", - "s3:listbucketversions": "list", - "s3:listmultipartuploads": "list", - "s3:listparts": "list", - "s3:getobject": "read", - "s3:getobjectversion": "read", - "s3:getobjecttagging": "read", - "s3:getobjectversiontagging": "read", - "s3:getobjectacl": "read", - "s3:getbucketversioning": "read", - "s3:headobject": "read", - "s3:headbucket": "read", - "s3:putobject": "write", - "s3:createbucket": "write", - "s3:putobjecttagging": "write", - "s3:putbucketversioning": "write", - "s3:createmultipartupload": "write", - "s3:uploadpart": "write", - "s3:completemultipartupload": "write", - "s3:abortmultipartupload": "write", - "s3:copyobject": "write", - "s3:deleteobject": "delete", - "s3:deleteobjectversion": "delete", - "s3:deletebucket": "delete", - "s3:deleteobjecttagging": "delete", - "s3:putobjectacl": "share", - "s3:putbucketacl": "share", - "s3:getbucketacl": "share", - "s3:putbucketpolicy": "policy", - "s3:getbucketpolicy": "policy", - "s3:deletebucketpolicy": "policy", - "s3:getreplicationconfiguration": "replication", - "s3:putreplicationconfiguration": "replication", - "s3:deletereplicationconfiguration": "replication", - "s3:replicateobject": "replication", - "s3:replicatetags": "replication", - "s3:replicatedelete": "replication", - "s3:getlifecycleconfiguration": "lifecycle", - "s3:putlifecycleconfiguration": "lifecycle", - "s3:deletelifecycleconfiguration": "lifecycle", - "s3:getbucketlifecycle": "lifecycle", - "s3:putbucketlifecycle": "lifecycle", - "s3:getbucketcors": "cors", - "s3:putbucketcors": "cors", - "s3:deletebucketcors": "cors", -} - - -def _normalize_action(action: str) -> str: - action = action.strip().lower() - if action == "*": - return "*" - return ACTION_ALIASES.get(action, action) - - -def _normalize_actions(actions: Iterable[str]) -> List[str]: - values: List[str] = [] - for action in actions: - canonical = _normalize_action(action) - if canonical == "*" and "*" not in values: - return ["*"] - if canonical and canonical not in values: - values.append(canonical) - return values - - -def _normalize_principals(principal_field: Any) -> List[str] | str: - if principal_field == "*": - return "*" - - def _collect(values: Any) -> List[str]: - if values is None: - return [] - if values == "*": - return ["*"] - if isinstance(values, str): - return [values] - if isinstance(values, dict): - aggregated: List[str] = [] - for nested in values.values(): - chunk = _collect(nested) - if "*" in chunk: - return ["*"] - aggregated.extend(chunk) - return aggregated - if isinstance(values, Iterable): - aggregated = [] - for nested in values: - chunk = _collect(nested) - if "*" in chunk: - return ["*"] - aggregated.extend(chunk) - return aggregated - return [str(values)] - - normalized: List[str] = [] - for entry in _collect(principal_field): - token = str(entry).strip() - if token == "*": - return "*" - if token and token not in normalized: - normalized.append(token) - return normalized or "*" - - -def _parse_resource(resource: str) -> tuple[str | None, str | None]: - if not resource.startswith(RESOURCE_PREFIX): - return None, None - remainder = resource[len(RESOURCE_PREFIX) :] - if "/" not in remainder: - bucket = remainder or "*" - return bucket, None - bucket, _, key_pattern = remainder.partition("/") - return bucket or "*", key_pattern or "*" - - -@dataclass -class BucketPolicyStatement: - sid: Optional[str] - effect: str - principals: List[str] | str - actions: List[str] - resources: List[Tuple[str | None, str | None]] - conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict) - _compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None - - def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]: - if self._compiled_patterns is None: - self._compiled_patterns = [] - for resource_bucket, key_pattern in self.resources: - if key_pattern is None: - self._compiled_patterns.append((resource_bucket, None)) - else: - regex_pattern = translate(key_pattern) - self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern))) - return self._compiled_patterns - - def matches_principal(self, access_key: Optional[str]) -> bool: - if self.principals == "*": - return True - if access_key is None: - return False - return access_key in self.principals - - def matches_action(self, action: str) -> bool: - action = _normalize_action(action) - return "*" in self.actions or action in self.actions - - def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool: - bucket = (bucket or "*").lower() - key = object_key or "" - for resource_bucket, compiled_pattern in self._get_compiled_patterns(): - resource_bucket = (resource_bucket or "*").lower() - if resource_bucket not in {"*", bucket}: - continue - if compiled_pattern is None: - if not key: - return True - continue - if compiled_pattern.match(key): - return True - return False - - def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool: - if not self.conditions: - return True - if context is None: - context = {} - for operator, key_values in self.conditions.items(): - for condition_key, condition_values in key_values.items(): - if not _evaluate_condition_operator(operator, condition_key, condition_values, context): - return False - return True - - -class BucketPolicyStore: - """Loads bucket policies from disk and evaluates statements.""" - - def __init__(self, policy_path: Path) -> None: - self.policy_path = Path(policy_path) - self.policy_path.parent.mkdir(parents=True, exist_ok=True) - if not self.policy_path.exists(): - self.policy_path.write_text(json.dumps({"policies": {}}, indent=2)) - self._raw: Dict[str, Any] = {} - self._policies: Dict[str, List[BucketPolicyStatement]] = {} - self._load() - self._last_mtime = self._current_mtime() - # Performance: Avoid stat() on every request - self._last_stat_check = 0.0 - self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0")) - - def maybe_reload(self) -> None: - # Performance: Skip stat check if we checked recently - now = time.time() - if now - self._last_stat_check < self._stat_check_interval: - return - self._last_stat_check = now - current = self._current_mtime() - if current is None or current == self._last_mtime: - return - self._load() - self._last_mtime = current - - def _current_mtime(self) -> float | None: - try: - return self.policy_path.stat().st_mtime - except FileNotFoundError: - return None - - def evaluate( - self, - access_key: Optional[str], - bucket: Optional[str], - object_key: Optional[str], - action: str, - context: Optional[Dict[str, Any]] = None, - ) -> str | None: - bucket = (bucket or "").lower() - statements = self._policies.get(bucket) or [] - decision: Optional[str] = None - for statement in statements: - if not statement.matches_principal(access_key): - continue - if not statement.matches_action(action): - continue - if not statement.matches_resource(bucket, object_key): - continue - if not statement.matches_condition(context): - continue - if statement.effect == "deny": - return "deny" - decision = "allow" - return decision - - def get_policy(self, bucket: str) -> Dict[str, Any] | None: - return self._raw.get(bucket.lower()) - - def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None: - bucket = bucket.lower() - statements = self._normalize_policy(policy_payload) - if not statements: - raise ValueError("Policy must include at least one valid statement") - self._raw[bucket] = policy_payload - self._policies[bucket] = statements - self._persist() - - def delete_policy(self, bucket: str) -> None: - bucket = bucket.lower() - self._raw.pop(bucket, None) - self._policies.pop(bucket, None) - self._persist() - - def _load(self) -> None: - try: - content = self.policy_path.read_text(encoding='utf-8') - raw_payload = json.loads(content) - except FileNotFoundError: - raw_payload = {"policies": {}} - except json.JSONDecodeError as e: - raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}") - except PermissionError as e: - raise ValueError(f"Cannot read bucket policy file (permission denied): {e}") - except (OSError, ValueError) as e: - raise ValueError(f"Failed to load bucket policies: {e}") - - policies: Dict[str, Any] = raw_payload.get("policies", {}) - parsed: Dict[str, List[BucketPolicyStatement]] = {} - for bucket, policy in policies.items(): - parsed[bucket.lower()] = self._normalize_policy(policy) - self._raw = {bucket.lower(): policy for bucket, policy in policies.items()} - self._policies = parsed - - def _persist(self) -> None: - payload = {"policies": self._raw} - self.policy_path.write_text(json.dumps(payload, indent=2)) - - def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]: - statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", []) - statements: List[BucketPolicyStatement] = [] - for statement in statements_raw: - actions = _normalize_actions(statement.get("Action", [])) - principals = _normalize_principals(statement.get("Principal", "*")) - resources_field = statement.get("Resource", []) - if isinstance(resources_field, str): - resources_field = [resources_field] - resources: List[tuple[str | None, str | None]] = [] - for resource in resources_field: - bucket, pattern = _parse_resource(str(resource)) - if bucket: - resources.append((bucket, pattern)) - if not resources: - continue - effect = statement.get("Effect", "Allow").lower() - conditions = self._normalize_conditions(statement.get("Condition", {})) - statements.append( - BucketPolicyStatement( - sid=statement.get("Sid"), - effect=effect, - principals=principals, - actions=actions or ["*"], - resources=resources, - conditions=conditions, - ) - ) - return statements - - def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]: - if not condition_block or not isinstance(condition_block, dict): - return {} - normalized: Dict[str, Dict[str, List[str]]] = {} - for operator, key_values in condition_block.items(): - if not isinstance(key_values, dict): - continue - normalized[operator] = {} - for cond_key, cond_values in key_values.items(): - if isinstance(cond_values, str): - normalized[operator][cond_key] = [cond_values] - elif isinstance(cond_values, list): - normalized[operator][cond_key] = [str(v) for v in cond_values] - else: - normalized[operator][cond_key] = [str(cond_values)] - return normalized \ No newline at end of file diff --git a/app/compression.py b/app/compression.py deleted file mode 100644 index bf32504..0000000 --- a/app/compression.py +++ /dev/null @@ -1,109 +0,0 @@ -from __future__ import annotations - -import gzip -import io -from typing import Callable, Iterable, List, Tuple - -COMPRESSIBLE_MIMES = frozenset([ - 'application/json', - 'application/javascript', - 'application/xml', - 'text/html', - 'text/css', - 'text/plain', - 'text/xml', - 'text/javascript', - 'application/x-ndjson', -]) - -MIN_SIZE_FOR_COMPRESSION = 500 - - -class GzipMiddleware: - def __init__(self, app: Callable, compression_level: int = 6, min_size: int = MIN_SIZE_FOR_COMPRESSION): - self.app = app - self.compression_level = compression_level - self.min_size = min_size - - def __call__(self, environ: dict, start_response: Callable) -> Iterable[bytes]: - accept_encoding = environ.get('HTTP_ACCEPT_ENCODING', '') - if 'gzip' not in accept_encoding.lower(): - return self.app(environ, start_response) - - response_started = False - status_code = None - response_headers: List[Tuple[str, str]] = [] - content_type = None - content_length = None - should_compress = False - passthrough = False - exc_info_holder = [None] - - def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None): - nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough - response_started = True - status_code = int(status.split(' ', 1)[0]) - response_headers = list(headers) - exc_info_holder[0] = exc_info - - for name, value in headers: - name_lower = name.lower() - if name_lower == 'content-type': - content_type = value.split(';')[0].strip().lower() - elif name_lower == 'content-length': - try: - content_length = int(value) - except (ValueError, TypeError): - pass - elif name_lower == 'content-encoding': - passthrough = True - return start_response(status, headers, exc_info) - elif name_lower == 'x-stream-response': - passthrough = True - return start_response(status, headers, exc_info) - - if content_type and content_type in COMPRESSIBLE_MIMES: - if content_length is None or content_length >= self.min_size: - should_compress = True - else: - passthrough = True - return start_response(status, headers, exc_info) - - return None - - app_iter = self.app(environ, custom_start_response) - - if passthrough: - return app_iter - - response_body = b''.join(app_iter) - - if not response_started: - return [response_body] - - if should_compress and len(response_body) >= self.min_size: - buf = io.BytesIO() - with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=self.compression_level) as gz: - gz.write(response_body) - compressed = buf.getvalue() - - if len(compressed) < len(response_body): - response_body = compressed - new_headers = [] - for name, value in response_headers: - if name.lower() not in ('content-length', 'content-encoding'): - new_headers.append((name, value)) - new_headers.append(('Content-Encoding', 'gzip')) - new_headers.append(('Content-Length', str(len(response_body)))) - new_headers.append(('Vary', 'Accept-Encoding')) - response_headers = new_headers - - status_str = f"{status_code} " + { - 200: "OK", 201: "Created", 204: "No Content", 206: "Partial Content", - 301: "Moved Permanently", 302: "Found", 304: "Not Modified", - 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found", - 405: "Method Not Allowed", 409: "Conflict", 500: "Internal Server Error", - }.get(status_code, "Unknown") - - start_response(status_str, response_headers, exc_info_holder[0]) - return [response_body] diff --git a/app/config.py b/app/config.py deleted file mode 100644 index 38906bc..0000000 --- a/app/config.py +++ /dev/null @@ -1,683 +0,0 @@ -from __future__ import annotations - -import os -import re -import secrets -import shutil -import sys -import warnings -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, Optional - -import psutil - - -def _calculate_auto_threads() -> int: - cpu_count = psutil.cpu_count(logical=True) or 4 - return max(1, min(cpu_count * 2, 64)) - - -def _calculate_auto_connection_limit() -> int: - available_mb = psutil.virtual_memory().available / (1024 * 1024) - calculated = int(available_mb / 5) - return max(20, min(calculated, 1000)) - - -def _calculate_auto_backlog(connection_limit: int) -> int: - return max(128, min(connection_limit * 2, 4096)) - - -def _validate_rate_limit(value: str) -> str: - pattern = r"^\d+\s+per\s+(second|minute|hour|day)$" - if not re.match(pattern, value): - raise ValueError(f"Invalid rate limit format: {value}. Expected format: '200 per minute'") - return value - -if getattr(sys, "frozen", False): - # Running in a PyInstaller bundle - PROJECT_ROOT = Path(sys._MEIPASS) -else: - # Running in a normal Python environment - PROJECT_ROOT = Path(__file__).resolve().parent.parent - - -def _prepare_config_file(active_path: Path, legacy_path: Optional[Path] = None) -> Path: - """Ensure config directories exist and migrate legacy files when possible.""" - active_path = Path(active_path) - active_path.parent.mkdir(parents=True, exist_ok=True) - if legacy_path: - legacy_path = Path(legacy_path) - if not active_path.exists() and legacy_path.exists(): - legacy_path.parent.mkdir(parents=True, exist_ok=True) - try: - shutil.move(str(legacy_path), str(active_path)) - except OSError: - shutil.copy2(legacy_path, active_path) - try: - legacy_path.unlink(missing_ok=True) - except OSError: - pass - return active_path - - -@dataclass -class AppConfig: - storage_root: Path - max_upload_size: int - ui_page_size: int - secret_key: str - iam_config_path: Path - bucket_policy_path: Path - api_base_url: Optional[str] - aws_region: str - aws_service: str - ui_enforce_bucket_policies: bool - log_level: str - log_to_file: bool - log_path: Path - log_max_bytes: int - log_backup_count: int - ratelimit_default: str - ratelimit_storage_uri: str - ratelimit_list_buckets: str - ratelimit_bucket_ops: str - ratelimit_object_ops: str - ratelimit_head_ops: str - cors_origins: list[str] - cors_methods: list[str] - cors_allow_headers: list[str] - cors_expose_headers: list[str] - session_lifetime_days: int - auth_max_attempts: int - auth_lockout_minutes: int - bulk_delete_max_keys: int - secret_ttl_seconds: int - stream_chunk_size: int - multipart_min_part_size: int - bucket_stats_cache_ttl: int - object_cache_ttl: int - encryption_enabled: bool - encryption_master_key_path: Path - kms_enabled: bool - kms_keys_path: Path - default_encryption_algorithm: str - display_timezone: str - lifecycle_enabled: bool - lifecycle_interval_seconds: int - metrics_history_enabled: bool - metrics_history_retention_hours: int - metrics_history_interval_minutes: int - operation_metrics_enabled: bool - operation_metrics_interval_minutes: int - operation_metrics_retention_hours: int - server_threads: int - server_connection_limit: int - server_backlog: int - server_channel_timeout: int - server_max_buffer_size: int - server_threads_auto: bool - server_connection_limit_auto: bool - server_backlog_auto: bool - site_sync_enabled: bool - site_sync_interval_seconds: int - site_sync_batch_size: int - sigv4_timestamp_tolerance_seconds: int - presigned_url_min_expiry_seconds: int - presigned_url_max_expiry_seconds: int - replication_connect_timeout_seconds: int - replication_read_timeout_seconds: int - replication_max_retries: int - replication_streaming_threshold_bytes: int - replication_max_failures_per_bucket: int - site_sync_connect_timeout_seconds: int - site_sync_read_timeout_seconds: int - site_sync_max_retries: int - site_sync_clock_skew_tolerance_seconds: float - object_key_max_length_bytes: int - object_cache_max_size: int - meta_read_cache_max: int - bucket_config_cache_ttl_seconds: float - object_tag_limit: int - encryption_chunk_size_bytes: int - kms_generate_data_key_min_bytes: int - kms_generate_data_key_max_bytes: int - lifecycle_max_history_per_bucket: int - site_id: Optional[str] - site_endpoint: Optional[str] - site_region: str - site_priority: int - ratelimit_admin: str - num_trusted_proxies: int - allowed_redirect_hosts: list[str] - allow_internal_endpoints: bool - website_hosting_enabled: bool - gc_enabled: bool - gc_interval_hours: float - gc_temp_file_max_age_hours: float - gc_multipart_max_age_days: int - gc_lock_file_max_age_hours: float - gc_dry_run: bool - gc_io_throttle_ms: int - integrity_enabled: bool - integrity_interval_hours: float - integrity_batch_size: int - integrity_auto_heal: bool - integrity_dry_run: bool - integrity_io_throttle_ms: int - - @classmethod - def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": - overrides = overrides or {} - - def _get(name: str, default: Any) -> Any: - return overrides.get(name, os.getenv(name, default)) - - storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve() - max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) - ui_page_size = int(_get("UI_PAGE_SIZE", 100)) - auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5)) - auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15)) - bulk_delete_max_keys = int(_get("BULK_DELETE_MAX_KEYS", 500)) - secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300)) - stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024)) - multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024)) - lifecycle_enabled = _get("LIFECYCLE_ENABLED", "false").lower() in ("true", "1", "yes") - lifecycle_interval_seconds = int(_get("LIFECYCLE_INTERVAL_SECONDS", 3600)) - default_secret = "dev-secret-key" - secret_key = str(_get("SECRET_KEY", default_secret)) - - if not secret_key or secret_key == default_secret: - secret_file = storage_root / ".myfsio.sys" / "config" / ".secret" - if secret_file.exists(): - secret_key = secret_file.read_text().strip() - else: - generated = secrets.token_urlsafe(32) - if secret_key == default_secret: - warnings.warn("Using insecure default SECRET_KEY. A random value has been generated and persisted; set SECRET_KEY for production", RuntimeWarning) - try: - secret_file.parent.mkdir(parents=True, exist_ok=True) - secret_file.write_text(generated) - try: - os.chmod(secret_file, 0o600) - except OSError: - pass - secret_key = generated - except OSError: - secret_key = generated - - iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ - bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ - - default_iam_path = storage_root / ".myfsio.sys" / "config" / "iam.json" - default_bucket_policy_path = storage_root / ".myfsio.sys" / "config" / "bucket_policies.json" - - iam_config_path = Path(_get("IAM_CONFIG", default_iam_path)).resolve() - bucket_policy_path = Path(_get("BUCKET_POLICY_PATH", default_bucket_policy_path)).resolve() - - iam_config_path = _prepare_config_file( - iam_config_path, - legacy_path=None if iam_env_override else storage_root / "iam.json", - ) - bucket_policy_path = _prepare_config_file( - bucket_policy_path, - legacy_path=None if bucket_policy_override else storage_root / "bucket_policies.json", - ) - api_base_url = _get("API_BASE_URL", None) - if api_base_url: - api_base_url = str(api_base_url) - - aws_region = str(_get("AWS_REGION", "us-east-1")) - aws_service = str(_get("AWS_SERVICE", "s3")) - enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"} - log_level = str(_get("LOG_LEVEL", "INFO")).upper() - log_to_file = str(_get("LOG_TO_FILE", "1")).lower() in {"1", "true", "yes", "on"} - log_dir = Path(_get("LOG_DIR", storage_root.parent / "logs")).resolve() - log_dir.mkdir(parents=True, exist_ok=True) - log_path = log_dir / str(_get("LOG_FILE", "app.log")) - log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024)) - log_backup_count = int(_get("LOG_BACKUP_COUNT", 3)) - ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))) - ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://")) - ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute"))) - ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute"))) - ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute"))) - ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute"))) - - def _csv(value: str, default: list[str]) -> list[str]: - if not value: - return default - parts = [segment.strip() for segment in value.split(",") if segment.strip()] - return parts or default - - cors_origins = _csv(str(_get("CORS_ORIGINS", "*")), ["*"]) - cors_methods = _csv(str(_get("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS,HEAD")), ["GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD"]) - cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"]) - cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"]) - session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30)) - bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60)) - object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60)) - - encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - encryption_keys_dir = storage_root / ".myfsio.sys" / "keys" - encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve() - kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve() - default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256")) - display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC")) - metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24)) - metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5)) - operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5)) - operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24)) - - _raw_threads = int(_get("SERVER_THREADS", 0)) - if _raw_threads == 0: - server_threads = _calculate_auto_threads() - server_threads_auto = True - else: - server_threads = _raw_threads - server_threads_auto = False - - _raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0)) - if _raw_conn_limit == 0: - server_connection_limit = _calculate_auto_connection_limit() - server_connection_limit_auto = True - else: - server_connection_limit = _raw_conn_limit - server_connection_limit_auto = False - - _raw_backlog = int(_get("SERVER_BACKLOG", 0)) - if _raw_backlog == 0: - server_backlog = _calculate_auto_backlog(server_connection_limit) - server_backlog_auto = True - else: - server_backlog = _raw_backlog - server_backlog_auto = False - - server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120)) - server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128)) - site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60)) - site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100)) - - sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900)) - presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1)) - presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800)) - replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5)) - replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30)) - replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2)) - replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024)) - replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50)) - site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10)) - site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120)) - site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2)) - site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0)) - object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024)) - object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100)) - meta_read_cache_max = int(_get("META_READ_CACHE_MAX", 2048)) - bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0)) - object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50)) - encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024)) - kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1)) - kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024)) - lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50)) - - site_id_raw = _get("SITE_ID", None) - site_id = str(site_id_raw).strip() if site_id_raw else None - site_endpoint_raw = _get("SITE_ENDPOINT", None) - site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None - site_region = str(_get("SITE_REGION", "us-east-1")) - site_priority = int(_get("SITE_PRIORITY", 100)) - ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute"))) - num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 1)) - allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "") - allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()] - allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"} - website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0)) - gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0)) - gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7)) - gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0)) - gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"} - gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10)) - integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} - integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0)) - integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000)) - integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"} - integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"} - integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10)) - - return cls(storage_root=storage_root, - max_upload_size=max_upload_size, - ui_page_size=ui_page_size, - secret_key=secret_key, - iam_config_path=iam_config_path, - bucket_policy_path=bucket_policy_path, - api_base_url=api_base_url, - aws_region=aws_region, - aws_service=aws_service, - ui_enforce_bucket_policies=enforce_ui_policies, - log_level=log_level, - log_to_file=log_to_file, - log_path=log_path, - log_max_bytes=log_max_bytes, - log_backup_count=log_backup_count, - ratelimit_default=ratelimit_default, - ratelimit_storage_uri=ratelimit_storage_uri, - ratelimit_list_buckets=ratelimit_list_buckets, - ratelimit_bucket_ops=ratelimit_bucket_ops, - ratelimit_object_ops=ratelimit_object_ops, - ratelimit_head_ops=ratelimit_head_ops, - cors_origins=cors_origins, - cors_methods=cors_methods, - cors_allow_headers=cors_allow_headers, - cors_expose_headers=cors_expose_headers, - session_lifetime_days=session_lifetime_days, - auth_max_attempts=auth_max_attempts, - auth_lockout_minutes=auth_lockout_minutes, - bulk_delete_max_keys=bulk_delete_max_keys, - secret_ttl_seconds=secret_ttl_seconds, - stream_chunk_size=stream_chunk_size, - multipart_min_part_size=multipart_min_part_size, - bucket_stats_cache_ttl=bucket_stats_cache_ttl, - object_cache_ttl=object_cache_ttl, - encryption_enabled=encryption_enabled, - encryption_master_key_path=encryption_master_key_path, - kms_enabled=kms_enabled, - kms_keys_path=kms_keys_path, - default_encryption_algorithm=default_encryption_algorithm, - display_timezone=display_timezone, - lifecycle_enabled=lifecycle_enabled, - lifecycle_interval_seconds=lifecycle_interval_seconds, - metrics_history_enabled=metrics_history_enabled, - metrics_history_retention_hours=metrics_history_retention_hours, - metrics_history_interval_minutes=metrics_history_interval_minutes, - operation_metrics_enabled=operation_metrics_enabled, - operation_metrics_interval_minutes=operation_metrics_interval_minutes, - operation_metrics_retention_hours=operation_metrics_retention_hours, - server_threads=server_threads, - server_connection_limit=server_connection_limit, - server_backlog=server_backlog, - server_channel_timeout=server_channel_timeout, - server_max_buffer_size=server_max_buffer_size, - server_threads_auto=server_threads_auto, - server_connection_limit_auto=server_connection_limit_auto, - server_backlog_auto=server_backlog_auto, - site_sync_enabled=site_sync_enabled, - site_sync_interval_seconds=site_sync_interval_seconds, - site_sync_batch_size=site_sync_batch_size, - sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds, - presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds, - presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds, - replication_connect_timeout_seconds=replication_connect_timeout_seconds, - replication_read_timeout_seconds=replication_read_timeout_seconds, - replication_max_retries=replication_max_retries, - replication_streaming_threshold_bytes=replication_streaming_threshold_bytes, - replication_max_failures_per_bucket=replication_max_failures_per_bucket, - site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds, - site_sync_read_timeout_seconds=site_sync_read_timeout_seconds, - site_sync_max_retries=site_sync_max_retries, - site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds, - object_key_max_length_bytes=object_key_max_length_bytes, - object_cache_max_size=object_cache_max_size, - meta_read_cache_max=meta_read_cache_max, - bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds, - object_tag_limit=object_tag_limit, - encryption_chunk_size_bytes=encryption_chunk_size_bytes, - kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes, - kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes, - lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket, - site_id=site_id, - site_endpoint=site_endpoint, - site_region=site_region, - site_priority=site_priority, - ratelimit_admin=ratelimit_admin, - num_trusted_proxies=num_trusted_proxies, - allowed_redirect_hosts=allowed_redirect_hosts, - allow_internal_endpoints=allow_internal_endpoints, - website_hosting_enabled=website_hosting_enabled, - gc_enabled=gc_enabled, - gc_interval_hours=gc_interval_hours, - gc_temp_file_max_age_hours=gc_temp_file_max_age_hours, - gc_multipart_max_age_days=gc_multipart_max_age_days, - gc_lock_file_max_age_hours=gc_lock_file_max_age_hours, - gc_dry_run=gc_dry_run, - gc_io_throttle_ms=gc_io_throttle_ms, - integrity_enabled=integrity_enabled, - integrity_interval_hours=integrity_interval_hours, - integrity_batch_size=integrity_batch_size, - integrity_auto_heal=integrity_auto_heal, - integrity_dry_run=integrity_dry_run, - integrity_io_throttle_ms=integrity_io_throttle_ms) - - def validate_and_report(self) -> list[str]: - """Validate configuration and return a list of warnings/issues. - - Call this at startup to detect potential misconfigurations before - the application fully commits to running. - """ - issues = [] - - try: - test_file = self.storage_root / ".write_test" - test_file.touch() - test_file.unlink() - except (OSError, PermissionError) as e: - issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}") - - storage_str = str(self.storage_root).lower() - if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str: - issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!") - - try: - self.iam_config_path.relative_to(self.storage_root) - except ValueError: - issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.") - - try: - self.bucket_policy_path.relative_to(self.storage_root) - except ValueError: - issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.") - - try: - self.log_path.parent.mkdir(parents=True, exist_ok=True) - test_log = self.log_path.parent / ".write_test" - test_log.touch() - test_log.unlink() - except (OSError, PermissionError) as e: - issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}") - - log_str = str(self.log_path).lower() - if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str: - issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!") - - if self.encryption_enabled: - try: - self.encryption_master_key_path.relative_to(self.storage_root) - except ValueError: - issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.") - - if self.kms_enabled: - try: - self.kms_keys_path.relative_to(self.storage_root) - except ValueError: - issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.") - - if self.secret_key == "dev-secret-key": - issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.") - - if "*" in self.cors_origins: - issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.") - - if not (1 <= self.server_threads <= 64): - issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.") - if not (10 <= self.server_connection_limit <= 1000): - issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.") - if not (128 <= self.server_backlog <= 4096): - issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.") - if not (10 <= self.server_channel_timeout <= 300): - issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.") - if self.server_max_buffer_size < 1024 * 1024: - issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.") - - if sys.platform != "win32": - try: - import resource - soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE) - threshold = int(soft_limit * 0.8) - if self.server_connection_limit > threshold: - issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.") - except (ImportError, OSError): - pass - - try: - import psutil - available_mb = psutil.virtual_memory().available / (1024 * 1024) - estimated_mb = self.server_threads * 50 - if estimated_mb > available_mb * 0.5: - issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).") - except ImportError: - pass - - return issues - - def print_startup_summary(self) -> None: - """Print a summary of the configuration at startup.""" - print("\n" + "=" * 60) - print("MyFSIO Configuration Summary") - print("=" * 60) - print(f" STORAGE_ROOT: {self.storage_root}") - print(f" IAM_CONFIG: {self.iam_config_path}") - print(f" BUCKET_POLICY: {self.bucket_policy_path}") - print(f" LOG_PATH: {self.log_path}") - if self.api_base_url: - print(f" API_BASE_URL: {self.api_base_url}") - if self.encryption_enabled: - print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})") - if self.kms_enabled: - print(f" KMS: Enabled (Keys: {self.kms_keys_path})") - if self.website_hosting_enabled: - print(f" WEBSITE_HOSTING: Enabled") - def _auto(flag: bool) -> str: - return " (auto)" if flag else "" - print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}") - print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}") - print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}") - print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s") - print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB") - print("=" * 60) - - issues = self.validate_and_report() - if issues: - print("\nConfiguration Issues Detected:") - for issue in issues: - print(f" • {issue}") - print() - else: - print(" ✓ Configuration validated successfully\n") - - def to_flask_config(self) -> Dict[str, Any]: - return { - "STORAGE_ROOT": str(self.storage_root), - "MAX_CONTENT_LENGTH": self.max_upload_size, - "UI_PAGE_SIZE": self.ui_page_size, - "SECRET_KEY": self.secret_key, - "IAM_CONFIG": str(self.iam_config_path), - "BUCKET_POLICY_PATH": str(self.bucket_policy_path), - "API_BASE_URL": self.api_base_url, - "AWS_REGION": self.aws_region, - "AWS_SERVICE": self.aws_service, - "UI_ENFORCE_BUCKET_POLICIES": self.ui_enforce_bucket_policies, - "AUTH_MAX_ATTEMPTS": self.auth_max_attempts, - "AUTH_LOCKOUT_MINUTES": self.auth_lockout_minutes, - "BULK_DELETE_MAX_KEYS": self.bulk_delete_max_keys, - "SECRET_TTL_SECONDS": self.secret_ttl_seconds, - "STREAM_CHUNK_SIZE": self.stream_chunk_size, - "MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size, - "BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl, - "OBJECT_CACHE_TTL": self.object_cache_ttl, - "LOG_LEVEL": self.log_level, - "LOG_TO_FILE": self.log_to_file, - "LOG_FILE": str(self.log_path), - "LOG_MAX_BYTES": self.log_max_bytes, - "LOG_BACKUP_COUNT": self.log_backup_count, - "RATELIMIT_DEFAULT": self.ratelimit_default, - "RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri, - "RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets, - "RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops, - "RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops, - "RATELIMIT_HEAD_OPS": self.ratelimit_head_ops, - "CORS_ORIGINS": self.cors_origins, - "CORS_METHODS": self.cors_methods, - "CORS_ALLOW_HEADERS": self.cors_allow_headers, - "CORS_EXPOSE_HEADERS": self.cors_expose_headers, - "SESSION_LIFETIME_DAYS": self.session_lifetime_days, - "ENCRYPTION_ENABLED": self.encryption_enabled, - "ENCRYPTION_MASTER_KEY_PATH": str(self.encryption_master_key_path), - "KMS_ENABLED": self.kms_enabled, - "KMS_KEYS_PATH": str(self.kms_keys_path), - "DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm, - "DISPLAY_TIMEZONE": self.display_timezone, - "LIFECYCLE_ENABLED": self.lifecycle_enabled, - "LIFECYCLE_INTERVAL_SECONDS": self.lifecycle_interval_seconds, - "METRICS_HISTORY_ENABLED": self.metrics_history_enabled, - "METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours, - "METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes, - "OPERATION_METRICS_ENABLED": self.operation_metrics_enabled, - "OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes, - "OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours, - "SERVER_THREADS": self.server_threads, - "SERVER_CONNECTION_LIMIT": self.server_connection_limit, - "SERVER_BACKLOG": self.server_backlog, - "SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout, - "SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size, - "SITE_SYNC_ENABLED": self.site_sync_enabled, - "SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds, - "SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size, - "SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds, - "PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds, - "PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds, - "REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds, - "REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds, - "REPLICATION_MAX_RETRIES": self.replication_max_retries, - "REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes, - "REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket, - "SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds, - "SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds, - "SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries, - "SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds, - "OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes, - "OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size, - "META_READ_CACHE_MAX": self.meta_read_cache_max, - "BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds, - "OBJECT_TAG_LIMIT": self.object_tag_limit, - "ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes, - "KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes, - "KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes, - "LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket, - "SITE_ID": self.site_id, - "SITE_ENDPOINT": self.site_endpoint, - "SITE_REGION": self.site_region, - "SITE_PRIORITY": self.site_priority, - "RATE_LIMIT_ADMIN": self.ratelimit_admin, - "NUM_TRUSTED_PROXIES": self.num_trusted_proxies, - "ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts, - "ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints, - "WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled, - "GC_ENABLED": self.gc_enabled, - "GC_INTERVAL_HOURS": self.gc_interval_hours, - "GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours, - "GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days, - "GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours, - "GC_DRY_RUN": self.gc_dry_run, - "GC_IO_THROTTLE_MS": self.gc_io_throttle_ms, - "INTEGRITY_ENABLED": self.integrity_enabled, - "INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours, - "INTEGRITY_BATCH_SIZE": self.integrity_batch_size, - "INTEGRITY_AUTO_HEAL": self.integrity_auto_heal, - "INTEGRITY_DRY_RUN": self.integrity_dry_run, - "INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms, - } diff --git a/app/connections.py b/app/connections.py deleted file mode 100644 index b694af9..0000000 --- a/app/connections.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import asdict, dataclass -from pathlib import Path -from typing import Dict, List, Optional - -from .config import AppConfig - - -@dataclass -class RemoteConnection: - id: str - name: str - endpoint_url: str - access_key: str - secret_key: str - region: str = "us-east-1" - - -class ConnectionStore: - def __init__(self, config_path: Path) -> None: - self.config_path = config_path - self._connections: Dict[str, RemoteConnection] = {} - self.reload() - - def reload(self) -> None: - if not self.config_path.exists(): - self._connections = {} - return - - try: - with open(self.config_path, "r") as f: - data = json.load(f) - for item in data: - conn = RemoteConnection(**item) - self._connections[conn.id] = conn - except (OSError, json.JSONDecodeError): - self._connections = {} - - def save(self) -> None: - self.config_path.parent.mkdir(parents=True, exist_ok=True) - data = [asdict(conn) for conn in self._connections.values()] - with open(self.config_path, "w") as f: - json.dump(data, f, indent=2) - - def list(self) -> List[RemoteConnection]: - return list(self._connections.values()) - - def get(self, connection_id: str) -> Optional[RemoteConnection]: - return self._connections.get(connection_id) - - def add(self, connection: RemoteConnection) -> None: - self._connections[connection.id] = connection - self.save() - - def delete(self, connection_id: str) -> None: - if connection_id in self._connections: - del self._connections[connection_id] - self.save() diff --git a/app/encrypted_storage.py b/app/encrypted_storage.py deleted file mode 100644 index b64e1d1..0000000 --- a/app/encrypted_storage.py +++ /dev/null @@ -1,293 +0,0 @@ -from __future__ import annotations - -import io -from pathlib import Path -from typing import Any, BinaryIO, Dict, Optional - -from .encryption import EncryptionManager, EncryptionMetadata, EncryptionError -from .storage import ObjectStorage, ObjectMeta, StorageError - - -class EncryptedObjectStorage: - """Object storage with transparent server-side encryption. - - This class wraps ObjectStorage and provides transparent encryption/decryption - of objects based on bucket encryption configuration. - - Encryption is applied when: - 1. Bucket has default encryption configured (SSE-S3 or SSE-KMS) - 2. Client explicitly requests encryption via headers - - The encryption metadata is stored alongside object metadata. - """ - - STREAMING_THRESHOLD = 64 * 1024 - - def __init__(self, storage: ObjectStorage, encryption_manager: EncryptionManager): - self.storage = storage - self.encryption = encryption_manager - - @property - def root(self) -> Path: - return self.storage.root - - def _should_encrypt(self, bucket_name: str, - server_side_encryption: str | None = None) -> tuple[bool, str, str | None]: - """Determine if object should be encrypted. - - Returns: - Tuple of (should_encrypt, algorithm, kms_key_id) - """ - if not self.encryption.enabled: - return False, "", None - - if server_side_encryption: - if server_side_encryption == "AES256": - return True, "AES256", None - elif server_side_encryption.startswith("aws:kms"): - parts = server_side_encryption.split(":") - kms_key_id = parts[2] if len(parts) > 2 else None - return True, "aws:kms", kms_key_id - - try: - encryption_config = self.storage.get_bucket_encryption(bucket_name) - if encryption_config and encryption_config.get("Rules"): - rule = encryption_config["Rules"][0] - # AWS format: Rules[].ApplyServerSideEncryptionByDefault.SSEAlgorithm - sse_default = rule.get("ApplyServerSideEncryptionByDefault", {}) - algorithm = sse_default.get("SSEAlgorithm", "AES256") - kms_key_id = sse_default.get("KMSMasterKeyID") - return True, algorithm, kms_key_id - except StorageError: - pass - - return False, "", None - - def _is_encrypted(self, metadata: Dict[str, str]) -> bool: - """Check if object is encrypted based on its metadata.""" - return "x-amz-server-side-encryption" in metadata - - def put_object( - self, - bucket_name: str, - object_key: str, - stream: BinaryIO, - *, - metadata: Optional[Dict[str, str]] = None, - server_side_encryption: Optional[str] = None, - kms_key_id: Optional[str] = None, - ) -> ObjectMeta: - """Store an object, optionally with encryption. - - Args: - bucket_name: Name of the bucket - object_key: Key for the object - stream: Binary stream of object data - metadata: Optional user metadata - server_side_encryption: Encryption algorithm ("AES256" or "aws:kms") - kms_key_id: KMS key ID (for aws:kms encryption) - - Returns: - ObjectMeta with object information - - Performance: Uses streaming encryption for large files to reduce memory usage. - """ - should_encrypt, algorithm, detected_kms_key = self._should_encrypt( - bucket_name, server_side_encryption - ) - - if kms_key_id is None: - kms_key_id = detected_kms_key - - if should_encrypt: - try: - # Performance: Use streaming encryption to avoid loading entire file into memory - encrypted_stream, enc_metadata = self.encryption.encrypt_stream( - stream, - algorithm=algorithm, - context={"bucket": bucket_name, "key": object_key}, - ) - - combined_metadata = metadata.copy() if metadata else {} - combined_metadata.update(enc_metadata.to_dict()) - - result = self.storage.put_object( - bucket_name, - object_key, - encrypted_stream, - metadata=combined_metadata, - ) - - result.metadata = combined_metadata - return result - - except EncryptionError as exc: - raise StorageError(f"Encryption failed: {exc}") from exc - else: - return self.storage.put_object( - bucket_name, - object_key, - stream, - metadata=metadata, - ) - - def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]: - """Get object data, decrypting if necessary. - - Returns: - Tuple of (data, metadata) - - Performance: Uses streaming decryption to reduce memory usage. - """ - path = self.storage.get_object_path(bucket_name, object_key) - metadata = self.storage.get_object_metadata(bucket_name, object_key) - - enc_metadata = EncryptionMetadata.from_dict(metadata) - if enc_metadata: - try: - # Performance: Use streaming decryption to avoid loading entire file into memory - with path.open("rb") as f: - decrypted_stream = self.encryption.decrypt_stream(f, enc_metadata) - data = decrypted_stream.read() - except EncryptionError as exc: - raise StorageError(f"Decryption failed: {exc}") from exc - else: - with path.open("rb") as f: - data = f.read() - - clean_metadata = { - k: v for k, v in metadata.items() - if not k.startswith("x-amz-encryption") - and k != "x-amz-encrypted-data-key" - } - - return data, clean_metadata - - def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]: - """Get object as a stream, decrypting if necessary. - - Returns: - Tuple of (stream, metadata, original_size) - """ - data, metadata = self.get_object_data(bucket_name, object_key) - return io.BytesIO(data), metadata, len(data) - - def list_buckets(self): - return self.storage.list_buckets() - - def bucket_exists(self, bucket_name: str) -> bool: - return self.storage.bucket_exists(bucket_name) - - def create_bucket(self, bucket_name: str) -> None: - return self.storage.create_bucket(bucket_name) - - def delete_bucket(self, bucket_name: str) -> None: - return self.storage.delete_bucket(bucket_name) - - def bucket_stats(self, bucket_name: str, cache_ttl: int = 60): - return self.storage.bucket_stats(bucket_name, cache_ttl) - - def list_objects(self, bucket_name: str, **kwargs): - return self.storage.list_objects(bucket_name, **kwargs) - - def list_objects_shallow(self, bucket_name: str, **kwargs): - return self.storage.list_objects_shallow(bucket_name, **kwargs) - - def iter_objects_shallow(self, bucket_name: str, **kwargs): - return self.storage.iter_objects_shallow(bucket_name, **kwargs) - - def search_objects(self, bucket_name: str, query: str, **kwargs): - return self.storage.search_objects(bucket_name, query, **kwargs) - - def list_objects_all(self, bucket_name: str): - return self.storage.list_objects_all(bucket_name) - - def get_object_path(self, bucket_name: str, object_key: str): - return self.storage.get_object_path(bucket_name, object_key) - - def get_object_metadata(self, bucket_name: str, object_key: str): - return self.storage.get_object_metadata(bucket_name, object_key) - - def delete_object(self, bucket_name: str, object_key: str) -> None: - return self.storage.delete_object(bucket_name, object_key) - - def purge_object(self, bucket_name: str, object_key: str) -> None: - return self.storage.purge_object(bucket_name, object_key) - - def is_versioning_enabled(self, bucket_name: str) -> bool: - return self.storage.is_versioning_enabled(bucket_name) - - def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None: - return self.storage.set_bucket_versioning(bucket_name, enabled) - - def get_bucket_tags(self, bucket_name: str): - return self.storage.get_bucket_tags(bucket_name) - - def set_bucket_tags(self, bucket_name: str, tags): - return self.storage.set_bucket_tags(bucket_name, tags) - - def get_bucket_cors(self, bucket_name: str): - return self.storage.get_bucket_cors(bucket_name) - - def set_bucket_cors(self, bucket_name: str, rules): - return self.storage.set_bucket_cors(bucket_name, rules) - - def get_bucket_encryption(self, bucket_name: str): - return self.storage.get_bucket_encryption(bucket_name) - - def set_bucket_encryption(self, bucket_name: str, config_payload): - return self.storage.set_bucket_encryption(bucket_name, config_payload) - - def get_bucket_lifecycle(self, bucket_name: str): - return self.storage.get_bucket_lifecycle(bucket_name) - - def set_bucket_lifecycle(self, bucket_name: str, rules): - return self.storage.set_bucket_lifecycle(bucket_name, rules) - - def get_object_tags(self, bucket_name: str, object_key: str): - return self.storage.get_object_tags(bucket_name, object_key) - - def set_object_tags(self, bucket_name: str, object_key: str, tags): - return self.storage.set_object_tags(bucket_name, object_key, tags) - - def delete_object_tags(self, bucket_name: str, object_key: str): - return self.storage.delete_object_tags(bucket_name, object_key) - - def list_object_versions(self, bucket_name: str, object_key: str): - return self.storage.list_object_versions(bucket_name, object_key) - - def restore_object_version(self, bucket_name: str, object_key: str, version_id: str): - return self.storage.restore_object_version(bucket_name, object_key, version_id) - - def list_orphaned_objects(self, bucket_name: str): - return self.storage.list_orphaned_objects(bucket_name) - - def initiate_multipart_upload(self, bucket_name: str, object_key: str, *, metadata=None) -> str: - return self.storage.initiate_multipart_upload(bucket_name, object_key, metadata=metadata) - - def upload_multipart_part(self, bucket_name: str, upload_id: str, part_number: int, stream: BinaryIO) -> str: - return self.storage.upload_multipart_part(bucket_name, upload_id, part_number, stream) - - def complete_multipart_upload(self, bucket_name: str, upload_id: str, ordered_parts): - return self.storage.complete_multipart_upload(bucket_name, upload_id, ordered_parts) - - def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None: - return self.storage.abort_multipart_upload(bucket_name, upload_id) - - def list_multipart_parts(self, bucket_name: str, upload_id: str): - return self.storage.list_multipart_parts(bucket_name, upload_id) - - def get_bucket_quota(self, bucket_name: str): - return self.storage.get_bucket_quota(bucket_name) - - def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None): - return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects) - - def get_bucket_website(self, bucket_name: str): - return self.storage.get_bucket_website(bucket_name) - - def set_bucket_website(self, bucket_name: str, website_config): - return self.storage.set_bucket_website(bucket_name, website_config) - - def _compute_etag(self, path: Path) -> str: - return self.storage._compute_etag(path) diff --git a/app/encryption.py b/app/encryption.py deleted file mode 100644 index f000176..0000000 --- a/app/encryption.py +++ /dev/null @@ -1,653 +0,0 @@ -from __future__ import annotations - -import base64 -import io -import json -import logging -import os -import secrets -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Any, BinaryIO, Dict, Generator, Optional - -from cryptography.hazmat.primitives.ciphers.aead import AESGCM -from cryptography.hazmat.primitives.kdf.hkdf import HKDF -from cryptography.hazmat.primitives import hashes - -if sys.platform != "win32": - import fcntl - -try: - import myfsio_core as _rc - if not all(hasattr(_rc, f) for f in ( - "encrypt_stream_chunked", "decrypt_stream_chunked", - )): - raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release") - _HAS_RUST = True -except ImportError: - _rc = None - _HAS_RUST = False - -logger = logging.getLogger(__name__) - - -def _set_secure_file_permissions(file_path: Path) -> None: - """Set restrictive file permissions (owner read/write only).""" - if sys.platform == "win32": - try: - username = os.environ.get("USERNAME", "") - if username: - subprocess.run( - ["icacls", str(file_path), "/inheritance:r", - "/grant:r", f"{username}:F"], - check=True, capture_output=True - ) - else: - logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path) - except (subprocess.SubprocessError, OSError) as exc: - logger.warning("Failed to set secure permissions on %s: %s", file_path, exc) - else: - os.chmod(file_path, 0o600) - - -class EncryptionError(Exception): - """Raised when encryption/decryption fails.""" - - -@dataclass -class EncryptionResult: - """Result of encrypting data.""" - ciphertext: bytes - nonce: bytes - key_id: str - encrypted_data_key: bytes - - -@dataclass -class EncryptionMetadata: - """Metadata stored with encrypted objects.""" - algorithm: str - key_id: str - nonce: bytes - encrypted_data_key: bytes - - def to_dict(self) -> Dict[str, str]: - return { - "x-amz-server-side-encryption": self.algorithm, - "x-amz-encryption-key-id": self.key_id, - "x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(), - "x-amz-encrypted-data-key": base64.b64encode(self.encrypted_data_key).decode(), - } - - @classmethod - def from_dict(cls, data: Dict[str, str]) -> Optional["EncryptionMetadata"]: - algorithm = data.get("x-amz-server-side-encryption") - if not algorithm: - return None - try: - return cls( - algorithm=algorithm, - key_id=data.get("x-amz-encryption-key-id", "local"), - nonce=base64.b64decode(data.get("x-amz-encryption-nonce", "")), - encrypted_data_key=base64.b64decode(data.get("x-amz-encrypted-data-key", "")), - ) - except Exception: - return None - - -class EncryptionProvider: - """Base class for encryption providers.""" - - def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: - raise NotImplementedError - - def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, - key_id: str, context: Dict[str, str] | None = None) -> bytes: - raise NotImplementedError - - def generate_data_key(self) -> tuple[bytes, bytes]: - """Generate a data key and its encrypted form. - - Returns: - Tuple of (plaintext_key, encrypted_key) - """ - raise NotImplementedError - - def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes: - """Decrypt an encrypted data key. - - Args: - encrypted_data_key: The encrypted data key bytes - key_id: Optional key identifier (used by KMS providers) - - Returns: - The decrypted data key - """ - raise NotImplementedError - - -class LocalKeyEncryption(EncryptionProvider): - """SSE-S3 style encryption using a local master key. - - Uses envelope encryption: - 1. Generate a unique data key for each object - 2. Encrypt the data with the data key (AES-256-GCM) - 3. Encrypt the data key with the master key - 4. Store the encrypted data key alongside the ciphertext - """ - - KEY_ID = "local" - - def __init__(self, master_key_path: Path): - self.master_key_path = master_key_path - self._master_key: bytes | None = None - - @property - def master_key(self) -> bytes: - if self._master_key is None: - self._master_key = self._load_or_create_master_key() - return self._master_key - - def _load_or_create_master_key(self) -> bytes: - """Load master key from file or generate a new one (with file locking).""" - lock_path = self.master_key_path.with_suffix(".lock") - lock_path.parent.mkdir(parents=True, exist_ok=True) - - try: - with open(lock_path, "w") as lock_file: - if sys.platform == "win32": - import msvcrt - msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1) - else: - fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX) - try: - if self.master_key_path.exists(): - try: - return base64.b64decode(self.master_key_path.read_text().strip()) - except Exception as exc: - raise EncryptionError(f"Failed to load master key: {exc}") from exc - key = secrets.token_bytes(32) - try: - self.master_key_path.write_text(base64.b64encode(key).decode()) - _set_secure_file_permissions(self.master_key_path) - except OSError as exc: - raise EncryptionError(f"Failed to save master key: {exc}") from exc - return key - finally: - if sys.platform == "win32": - import msvcrt - msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1) - else: - fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN) - except OSError as exc: - raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc - - DATA_KEY_AAD = b'{"purpose":"data_key","version":1}' - - def _encrypt_data_key(self, data_key: bytes) -> bytes: - """Encrypt the data key with the master key.""" - aesgcm = AESGCM(self.master_key) - nonce = secrets.token_bytes(12) - encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD) - return nonce + encrypted - - def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes: - """Decrypt the data key using the master key.""" - if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag - raise EncryptionError("Invalid encrypted data key") - aesgcm = AESGCM(self.master_key) - nonce = encrypted_data_key[:12] - ciphertext = encrypted_data_key[12:] - try: - return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD) - except Exception: - try: - return aesgcm.decrypt(nonce, ciphertext, None) - except Exception as exc: - raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc - - def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes: - """Decrypt an encrypted data key (key_id ignored for local encryption).""" - return self._decrypt_data_key(encrypted_data_key) - - def generate_data_key(self) -> tuple[bytes, bytes]: - """Generate a data key and its encrypted form.""" - plaintext_key = secrets.token_bytes(32) - encrypted_key = self._encrypt_data_key(plaintext_key) - return plaintext_key, encrypted_key - - def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: - """Encrypt data using envelope encryption.""" - data_key, encrypted_data_key = self.generate_data_key() - - aesgcm = AESGCM(data_key) - nonce = secrets.token_bytes(12) - aad = json.dumps(context, sort_keys=True).encode() if context else None - ciphertext = aesgcm.encrypt(nonce, plaintext, aad) - - return EncryptionResult( - ciphertext=ciphertext, - nonce=nonce, - key_id=self.KEY_ID, - encrypted_data_key=encrypted_data_key, - ) - - def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, - key_id: str, context: Dict[str, str] | None = None) -> bytes: - """Decrypt data using envelope encryption.""" - data_key = self._decrypt_data_key(encrypted_data_key) - aesgcm = AESGCM(data_key) - aad = json.dumps(context, sort_keys=True).encode() if context else None - try: - return aesgcm.decrypt(nonce, ciphertext, aad) - except Exception as exc: - raise EncryptionError("Failed to decrypt data") from exc - - -class StreamingEncryptor: - """Encrypts/decrypts data in streaming fashion for large files. - - For large files, we encrypt in chunks. Each chunk is encrypted with the - same data key but a unique nonce derived from the base nonce + chunk index. - """ - - CHUNK_SIZE = 64 * 1024 - HEADER_SIZE = 4 - - def __init__(self, provider: EncryptionProvider, chunk_size: int = CHUNK_SIZE): - self.provider = provider - self.chunk_size = chunk_size - - def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes: - """Derive a unique nonce for each chunk using HKDF.""" - hkdf = HKDF( - algorithm=hashes.SHA256(), - length=12, - salt=base_nonce, - info=chunk_index.to_bytes(4, "big"), - ) - return hkdf.derive(b"chunk_nonce") - - def encrypt_stream(self, stream: BinaryIO, - context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]: - """Encrypt a stream and return encrypted stream + metadata. - - Performance: Writes chunks directly to output buffer instead of accumulating in list. - """ - data_key, encrypted_data_key = self.provider.generate_data_key() - base_nonce = secrets.token_bytes(12) - - aesgcm = AESGCM(data_key) - # Performance: Write directly to BytesIO instead of accumulating chunks - output = io.BytesIO() - output.write(b"\x00\x00\x00\x00") # Placeholder for chunk count - chunk_index = 0 - - while True: - chunk = stream.read(self.chunk_size) - if not chunk: - break - - chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) - encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None) - - # Write size prefix + encrypted chunk directly - output.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big")) - output.write(encrypted_chunk) - chunk_index += 1 - - # Write actual chunk count to header - output.seek(0) - output.write(chunk_index.to_bytes(4, "big")) - output.seek(0) - - metadata = EncryptionMetadata( - algorithm="AES256", - key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local", - nonce=base_nonce, - encrypted_data_key=encrypted_data_key, - ) - - return output, metadata - - def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO: - """Decrypt a stream using the provided metadata. - - Performance: Writes chunks directly to output buffer instead of accumulating in list. - """ - data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id) - - aesgcm = AESGCM(data_key) - base_nonce = metadata.nonce - - chunk_count_bytes = stream.read(4) - if len(chunk_count_bytes) < 4: - raise EncryptionError("Invalid encrypted stream: missing header") - chunk_count = int.from_bytes(chunk_count_bytes, "big") - - # Performance: Write directly to BytesIO instead of accumulating chunks - output = io.BytesIO() - for chunk_index in range(chunk_count): - size_bytes = stream.read(self.HEADER_SIZE) - if len(size_bytes) < self.HEADER_SIZE: - raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}") - chunk_size = int.from_bytes(size_bytes, "big") - - encrypted_chunk = stream.read(chunk_size) - if len(encrypted_chunk) < chunk_size: - raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}") - - chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) - try: - decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None) - output.write(decrypted_chunk) # Write directly instead of appending to list - except Exception as exc: - raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc - - output.seek(0) - return output - - def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata: - data_key, encrypted_data_key = self.provider.generate_data_key() - base_nonce = secrets.token_bytes(12) - - if _HAS_RUST: - _rc.encrypt_stream_chunked( - input_path, output_path, data_key, base_nonce, self.chunk_size - ) - else: - with open(input_path, "rb") as stream: - aesgcm = AESGCM(data_key) - with open(output_path, "wb") as out: - out.write(b"\x00\x00\x00\x00") - chunk_index = 0 - while True: - chunk = stream.read(self.chunk_size) - if not chunk: - break - chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) - encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None) - out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big")) - out.write(encrypted_chunk) - chunk_index += 1 - out.seek(0) - out.write(chunk_index.to_bytes(4, "big")) - - return EncryptionMetadata( - algorithm="AES256", - key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local", - nonce=base_nonce, - encrypted_data_key=encrypted_data_key, - ) - - def decrypt_file(self, input_path: str, output_path: str, - metadata: EncryptionMetadata) -> None: - data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id) - base_nonce = metadata.nonce - - if _HAS_RUST: - _rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce) - else: - with open(input_path, "rb") as stream: - chunk_count_bytes = stream.read(4) - if len(chunk_count_bytes) < 4: - raise EncryptionError("Invalid encrypted stream: missing header") - chunk_count = int.from_bytes(chunk_count_bytes, "big") - aesgcm = AESGCM(data_key) - with open(output_path, "wb") as out: - for chunk_index in range(chunk_count): - size_bytes = stream.read(self.HEADER_SIZE) - if len(size_bytes) < self.HEADER_SIZE: - raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}") - chunk_size = int.from_bytes(size_bytes, "big") - encrypted_chunk = stream.read(chunk_size) - if len(encrypted_chunk) < chunk_size: - raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}") - chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) - try: - decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None) - out.write(decrypted_chunk) - except Exception as exc: - raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc - - -class EncryptionManager: - """Manages encryption providers and operations.""" - - def __init__(self, config: Dict[str, Any]): - self.config = config - self._local_provider: LocalKeyEncryption | None = None - self._kms_provider: Any = None # Set by KMS module - self._streaming_encryptor: StreamingEncryptor | None = None - - @property - def enabled(self) -> bool: - return self.config.get("encryption_enabled", False) - - @property - def default_algorithm(self) -> str: - return self.config.get("default_encryption_algorithm", "AES256") - - def get_local_provider(self) -> LocalKeyEncryption: - if self._local_provider is None: - key_path = Path(self.config.get("encryption_master_key_path", "data/.myfsio.sys/keys/master.key")) - self._local_provider = LocalKeyEncryption(key_path) - return self._local_provider - - def set_kms_provider(self, kms_provider: Any) -> None: - """Set the KMS provider (injected from kms module).""" - self._kms_provider = kms_provider - - def get_provider(self, algorithm: str, kms_key_id: str | None = None) -> EncryptionProvider: - """Get the appropriate encryption provider for the algorithm.""" - if algorithm == "AES256": - return self.get_local_provider() - elif algorithm == "aws:kms": - if self._kms_provider is None: - raise EncryptionError("KMS is not configured") - return self._kms_provider.get_provider(kms_key_id) - else: - raise EncryptionError(f"Unsupported encryption algorithm: {algorithm}") - - def get_streaming_encryptor(self) -> StreamingEncryptor: - if self._streaming_encryptor is None: - chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024) - self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size) - return self._streaming_encryptor - - def encrypt_object(self, data: bytes, algorithm: str = "AES256", - kms_key_id: str | None = None, - context: Dict[str, str] | None = None) -> tuple[bytes, EncryptionMetadata]: - """Encrypt object data.""" - provider = self.get_provider(algorithm, kms_key_id) - result = provider.encrypt(data, context) - - metadata = EncryptionMetadata( - algorithm=algorithm, - key_id=result.key_id, - nonce=result.nonce, - encrypted_data_key=result.encrypted_data_key, - ) - - return result.ciphertext, metadata - - def decrypt_object(self, ciphertext: bytes, metadata: EncryptionMetadata, - context: Dict[str, str] | None = None) -> bytes: - """Decrypt object data.""" - provider = self.get_provider(metadata.algorithm, metadata.key_id) - return provider.decrypt( - ciphertext, - metadata.nonce, - metadata.encrypted_data_key, - metadata.key_id, - context, - ) - - def encrypt_stream(self, stream: BinaryIO, algorithm: str = "AES256", - context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]: - """Encrypt a stream for large files.""" - encryptor = self.get_streaming_encryptor() - return encryptor.encrypt_stream(stream, context) - - def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO: - """Decrypt a stream.""" - encryptor = self.get_streaming_encryptor() - return encryptor.decrypt_stream(stream, metadata) - - -class SSECEncryption(EncryptionProvider): - """SSE-C: Server-Side Encryption with Customer-Provided Keys. - - The client provides the encryption key with each request. - Server encrypts/decrypts but never stores the key. - - Required headers for PUT: - - x-amz-server-side-encryption-customer-algorithm: AES256 - - x-amz-server-side-encryption-customer-key: Base64-encoded 256-bit key - - x-amz-server-side-encryption-customer-key-MD5: Base64-encoded MD5 of key - """ - - KEY_ID = "customer-provided" - - def __init__(self, customer_key: bytes): - if len(customer_key) != 32: - raise EncryptionError("Customer key must be exactly 256 bits (32 bytes)") - self.customer_key = customer_key - - @classmethod - def from_headers(cls, headers: Dict[str, str]) -> "SSECEncryption": - algorithm = headers.get("x-amz-server-side-encryption-customer-algorithm", "") - if algorithm.upper() != "AES256": - raise EncryptionError(f"Unsupported SSE-C algorithm: {algorithm}. Only AES256 is supported.") - - key_b64 = headers.get("x-amz-server-side-encryption-customer-key", "") - if not key_b64: - raise EncryptionError("Missing x-amz-server-side-encryption-customer-key header") - - key_md5_b64 = headers.get("x-amz-server-side-encryption-customer-key-md5", "") - - try: - customer_key = base64.b64decode(key_b64) - except Exception as e: - raise EncryptionError(f"Invalid base64 in customer key: {e}") from e - - if len(customer_key) != 32: - raise EncryptionError(f"Customer key must be 256 bits, got {len(customer_key) * 8} bits") - - if key_md5_b64: - import hashlib - expected_md5 = base64.b64encode(hashlib.md5(customer_key).digest()).decode() - if key_md5_b64 != expected_md5: - raise EncryptionError("Customer key MD5 mismatch") - - return cls(customer_key) - - def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: - aesgcm = AESGCM(self.customer_key) - nonce = secrets.token_bytes(12) - aad = json.dumps(context, sort_keys=True).encode() if context else None - ciphertext = aesgcm.encrypt(nonce, plaintext, aad) - - return EncryptionResult( - ciphertext=ciphertext, - nonce=nonce, - key_id=self.KEY_ID, - encrypted_data_key=b"", - ) - - def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, - key_id: str, context: Dict[str, str] | None = None) -> bytes: - aesgcm = AESGCM(self.customer_key) - aad = json.dumps(context, sort_keys=True).encode() if context else None - try: - return aesgcm.decrypt(nonce, ciphertext, aad) - except Exception as exc: - raise EncryptionError("SSE-C decryption failed") from exc - - def generate_data_key(self) -> tuple[bytes, bytes]: - return self.customer_key, b"" - - -@dataclass -class SSECMetadata: - algorithm: str = "AES256" - nonce: bytes = b"" - key_md5: str = "" - - def to_dict(self) -> Dict[str, str]: - return { - "x-amz-server-side-encryption-customer-algorithm": self.algorithm, - "x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(), - "x-amz-server-side-encryption-customer-key-MD5": self.key_md5, - } - - @classmethod - def from_dict(cls, data: Dict[str, str]) -> Optional["SSECMetadata"]: - algorithm = data.get("x-amz-server-side-encryption-customer-algorithm") - if not algorithm: - return None - try: - nonce = base64.b64decode(data.get("x-amz-encryption-nonce", "")) - return cls( - algorithm=algorithm, - nonce=nonce, - key_md5=data.get("x-amz-server-side-encryption-customer-key-MD5", ""), - ) - except Exception: - return None - - -class ClientEncryptionHelper: - """Helpers for client-side encryption. - - Client-side encryption is performed by the client, but this helper - provides key generation and materials for clients that need them. - """ - - @staticmethod - def generate_client_key() -> Dict[str, str]: - """Generate a new client encryption key.""" - from datetime import datetime, timezone - key = secrets.token_bytes(32) - return { - "key": base64.b64encode(key).decode(), - "algorithm": "AES-256-GCM", - "created_at": datetime.now(timezone.utc).isoformat(), - } - - @staticmethod - def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]: - """Encrypt data with a client-provided key.""" - key = base64.b64decode(key_b64) - if len(key) != 32: - raise EncryptionError("Key must be 256 bits (32 bytes)") - - aesgcm = AESGCM(key) - nonce = secrets.token_bytes(12) - aad = json.dumps(context, sort_keys=True).encode() if context else None - ciphertext = aesgcm.encrypt(nonce, plaintext, aad) - - return { - "ciphertext": base64.b64encode(ciphertext).decode(), - "nonce": base64.b64encode(nonce).decode(), - "algorithm": "AES-256-GCM", - } - - @staticmethod - def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes: - """Decrypt data with a client-provided key.""" - key = base64.b64decode(key_b64) - nonce = base64.b64decode(nonce_b64) - ciphertext = base64.b64decode(ciphertext_b64) - - if len(key) != 32: - raise EncryptionError("Key must be 256 bits (32 bytes)") - - aesgcm = AESGCM(key) - aad = json.dumps(context, sort_keys=True).encode() if context else None - try: - return aesgcm.decrypt(nonce, ciphertext, aad) - except Exception as exc: - raise EncryptionError("Decryption failed") from exc diff --git a/app/errors.py b/app/errors.py deleted file mode 100644 index 049187d..0000000 --- a/app/errors.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import annotations - -import logging -from dataclasses import dataclass, field -from typing import Optional, Dict, Any -from xml.etree.ElementTree import Element, SubElement, tostring - -from flask import Response, jsonify, request, flash, redirect, url_for, g -from flask_limiter import RateLimitExceeded - -logger = logging.getLogger(__name__) - - -@dataclass -class AppError(Exception): - """Base application error with multi-format response support.""" - code: str - message: str - status_code: int = 500 - details: Optional[Dict[str, Any]] = field(default=None) - - def __post_init__(self): - super().__init__(self.message) - - def to_xml_response(self) -> Response: - """Convert to S3 API XML error response.""" - error = Element("Error") - SubElement(error, "Code").text = self.code - SubElement(error, "Message").text = self.message - request_id = getattr(g, 'request_id', None) if g else None - SubElement(error, "RequestId").text = request_id or "unknown" - xml_bytes = tostring(error, encoding="utf-8") - return Response(xml_bytes, status=self.status_code, mimetype="application/xml") - - def to_json_response(self) -> tuple[Response, int]: - """Convert to JSON error response for UI AJAX calls.""" - payload: Dict[str, Any] = { - "success": False, - "error": { - "code": self.code, - "message": self.message - } - } - if self.details: - payload["error"]["details"] = self.details - return jsonify(payload), self.status_code - - def to_flash_message(self) -> str: - """Convert to user-friendly flash message.""" - return self.message - - -@dataclass -class BucketNotFoundError(AppError): - """Bucket does not exist.""" - code: str = "NoSuchBucket" - message: str = "The specified bucket does not exist" - status_code: int = 404 - - -@dataclass -class BucketAlreadyExistsError(AppError): - """Bucket already exists.""" - code: str = "BucketAlreadyExists" - message: str = "The requested bucket name is not available" - status_code: int = 409 - - -@dataclass -class BucketNotEmptyError(AppError): - """Bucket is not empty.""" - code: str = "BucketNotEmpty" - message: str = "The bucket you tried to delete is not empty" - status_code: int = 409 - - -@dataclass -class ObjectNotFoundError(AppError): - """Object does not exist.""" - code: str = "NoSuchKey" - message: str = "The specified key does not exist" - status_code: int = 404 - - -@dataclass -class InvalidObjectKeyError(AppError): - """Invalid object key.""" - code: str = "InvalidKey" - message: str = "The specified key is not valid" - status_code: int = 400 - - -@dataclass -class AccessDeniedError(AppError): - """Access denied.""" - code: str = "AccessDenied" - message: str = "Access Denied" - status_code: int = 403 - - -@dataclass -class InvalidCredentialsError(AppError): - """Invalid credentials.""" - code: str = "InvalidAccessKeyId" - message: str = "The access key ID you provided does not exist" - status_code: int = 403 - -@dataclass -class MalformedRequestError(AppError): - """Malformed request.""" - code: str = "MalformedXML" - message: str = "The XML you provided was not well-formed" - status_code: int = 400 - - -@dataclass -class InvalidArgumentError(AppError): - """Invalid argument.""" - code: str = "InvalidArgument" - message: str = "Invalid argument" - status_code: int = 400 - - -@dataclass -class EntityTooLargeError(AppError): - """Entity too large.""" - code: str = "EntityTooLarge" - message: str = "Your proposed upload exceeds the maximum allowed size" - status_code: int = 413 - - -@dataclass -class QuotaExceededAppError(AppError): - """Bucket quota exceeded.""" - code: str = "QuotaExceeded" - message: str = "The bucket quota has been exceeded" - status_code: int = 403 - quota: Optional[Dict[str, Any]] = None - usage: Optional[Dict[str, int]] = None - - def __post_init__(self): - if self.quota or self.usage: - self.details = {} - if self.quota: - self.details["quota"] = self.quota - if self.usage: - self.details["usage"] = self.usage - super().__post_init__() - - -def handle_app_error(error: AppError) -> Response: - """Handle application errors with appropriate response format.""" - log_extra = {"error_code": error.code} - if error.details: - log_extra["details"] = error.details - - logger.error(f"{error.code}: {error.message}", extra=log_extra) - - if request.path.startswith('/ui'): - wants_json = ( - request.is_json or - request.headers.get('X-Requested-With') == 'XMLHttpRequest' or - 'application/json' in request.accept_mimetypes.values() - ) - if wants_json: - return error.to_json_response() - flash(error.to_flash_message(), 'danger') - referrer = request.referrer - if referrer and request.host in referrer: - return redirect(referrer) - return redirect(url_for('ui.buckets_overview')) - else: - return error.to_xml_response() - - -def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response: - g.s3_error_code = "SlowDown" - if request.path.startswith("/ui") or request.path.startswith("/buckets"): - wants_json = ( - request.is_json or - request.headers.get("X-Requested-With") == "XMLHttpRequest" or - "application/json" in request.accept_mimetypes.values() - ) - if wants_json: - return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429 - error = Element("Error") - SubElement(error, "Code").text = "SlowDown" - SubElement(error, "Message").text = "Please reduce your request rate." - SubElement(error, "Resource").text = request.path - SubElement(error, "RequestId").text = getattr(g, "request_id", "") - xml_bytes = tostring(error, encoding="utf-8") - return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml") - - -def register_error_handlers(app): - """Register error handlers with a Flask app.""" - app.register_error_handler(AppError, handle_app_error) - app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded) - - for error_class in [ - BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError, - ObjectNotFoundError, InvalidObjectKeyError, - AccessDeniedError, InvalidCredentialsError, - MalformedRequestError, InvalidArgumentError, EntityTooLargeError, - QuotaExceededAppError, - ]: - app.register_error_handler(error_class, handle_app_error) diff --git a/app/extensions.py b/app/extensions.py deleted file mode 100644 index 7da36ae..0000000 --- a/app/extensions.py +++ /dev/null @@ -1,16 +0,0 @@ -from flask import g -from flask_limiter import Limiter -from flask_limiter.util import get_remote_address -from flask_wtf import CSRFProtect - -def get_rate_limit_key(): - """Generate rate limit key based on authenticated user.""" - if hasattr(g, 'principal') and g.principal: - return g.principal.access_key - return get_remote_address() - -# Shared rate limiter instance; configured in app factory. -limiter = Limiter(key_func=get_rate_limit_key) - -# Global CSRF protection for UI routes. -csrf = CSRFProtect() diff --git a/app/gc.py b/app/gc.py deleted file mode 100644 index 16fa3b7..0000000 --- a/app/gc.py +++ /dev/null @@ -1,596 +0,0 @@ -from __future__ import annotations - -import json -import logging -import os -import shutil -import threading -import time -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional - -logger = logging.getLogger(__name__) - - -@dataclass -class GCResult: - temp_files_deleted: int = 0 - temp_bytes_freed: int = 0 - multipart_uploads_deleted: int = 0 - multipart_bytes_freed: int = 0 - lock_files_deleted: int = 0 - orphaned_metadata_deleted: int = 0 - orphaned_versions_deleted: int = 0 - orphaned_version_bytes_freed: int = 0 - empty_dirs_removed: int = 0 - errors: List[str] = field(default_factory=list) - execution_time_seconds: float = 0.0 - - def to_dict(self) -> dict: - return { - "temp_files_deleted": self.temp_files_deleted, - "temp_bytes_freed": self.temp_bytes_freed, - "multipart_uploads_deleted": self.multipart_uploads_deleted, - "multipart_bytes_freed": self.multipart_bytes_freed, - "lock_files_deleted": self.lock_files_deleted, - "orphaned_metadata_deleted": self.orphaned_metadata_deleted, - "orphaned_versions_deleted": self.orphaned_versions_deleted, - "orphaned_version_bytes_freed": self.orphaned_version_bytes_freed, - "empty_dirs_removed": self.empty_dirs_removed, - "errors": self.errors, - "execution_time_seconds": self.execution_time_seconds, - } - - @property - def total_bytes_freed(self) -> int: - return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed - - @property - def has_work(self) -> bool: - return ( - self.temp_files_deleted > 0 - or self.multipart_uploads_deleted > 0 - or self.lock_files_deleted > 0 - or self.orphaned_metadata_deleted > 0 - or self.orphaned_versions_deleted > 0 - or self.empty_dirs_removed > 0 - ) - - -@dataclass -class GCExecutionRecord: - timestamp: float - result: dict - dry_run: bool - - def to_dict(self) -> dict: - return { - "timestamp": self.timestamp, - "result": self.result, - "dry_run": self.dry_run, - } - - @classmethod - def from_dict(cls, data: dict) -> GCExecutionRecord: - return cls( - timestamp=data["timestamp"], - result=data["result"], - dry_run=data.get("dry_run", False), - ) - - -class GCHistoryStore: - def __init__(self, storage_root: Path, max_records: int = 50) -> None: - self.storage_root = storage_root - self.max_records = max_records - self._lock = threading.Lock() - - def _get_path(self) -> Path: - return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json" - - def load(self) -> List[GCExecutionRecord]: - path = self._get_path() - if not path.exists(): - return [] - try: - with open(path, "r", encoding="utf-8") as f: - data = json.load(f) - return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])] - except (OSError, ValueError, KeyError) as e: - logger.error("Failed to load GC history: %s", e) - return [] - - def save(self, records: List[GCExecutionRecord]) -> None: - path = self._get_path() - path.parent.mkdir(parents=True, exist_ok=True) - data = {"executions": [r.to_dict() for r in records[: self.max_records]]} - try: - with open(path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - except OSError as e: - logger.error("Failed to save GC history: %s", e) - - def add(self, record: GCExecutionRecord) -> None: - with self._lock: - records = self.load() - records.insert(0, record) - self.save(records) - - def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]: - return self.load()[offset : offset + limit] - - -def _dir_size(path: Path) -> int: - total = 0 - try: - for f in path.rglob("*"): - if f.is_file(): - try: - total += f.stat().st_size - except OSError: - pass - except OSError: - pass - return total - - -def _file_age_hours(path: Path) -> float: - try: - mtime = path.stat().st_mtime - return (time.time() - mtime) / 3600.0 - except OSError: - return 0.0 - - -class GarbageCollector: - SYSTEM_ROOT = ".myfsio.sys" - SYSTEM_TMP_DIR = "tmp" - SYSTEM_MULTIPART_DIR = "multipart" - SYSTEM_BUCKETS_DIR = "buckets" - BUCKET_META_DIR = "meta" - BUCKET_VERSIONS_DIR = "versions" - INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"} - - def __init__( - self, - storage_root: Path, - interval_hours: float = 6.0, - temp_file_max_age_hours: float = 24.0, - multipart_max_age_days: int = 7, - lock_file_max_age_hours: float = 1.0, - dry_run: bool = False, - max_history: int = 50, - io_throttle_ms: int = 10, - ) -> None: - self.storage_root = Path(storage_root) - self.interval_seconds = interval_hours * 3600.0 - self.temp_file_max_age_hours = temp_file_max_age_hours - self.multipart_max_age_days = multipart_max_age_days - self.lock_file_max_age_hours = lock_file_max_age_hours - self.dry_run = dry_run - self._timer: Optional[threading.Timer] = None - self._shutdown = False - self._lock = threading.Lock() - self._scanning = False - self._scan_start_time: Optional[float] = None - self._io_throttle = max(0, io_throttle_ms) / 1000.0 - self.history_store = GCHistoryStore(storage_root, max_records=max_history) - - def start(self) -> None: - if self._timer is not None: - return - self._shutdown = False - self._schedule_next() - logger.info( - "GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s", - self.interval_seconds / 3600.0, - self.temp_file_max_age_hours, - self.multipart_max_age_days, - self.lock_file_max_age_hours, - self.dry_run, - ) - - def stop(self) -> None: - self._shutdown = True - if self._timer: - self._timer.cancel() - self._timer = None - logger.info("GC stopped") - - def _schedule_next(self) -> None: - if self._shutdown: - return - self._timer = threading.Timer(self.interval_seconds, self._run_cycle) - self._timer.daemon = True - self._timer.start() - - def _run_cycle(self) -> None: - if self._shutdown: - return - try: - self.run_now() - except Exception as e: - logger.error("GC cycle failed: %s", e) - finally: - self._schedule_next() - - def run_now(self, dry_run: Optional[bool] = None) -> GCResult: - if not self._lock.acquire(blocking=False): - raise RuntimeError("GC is already in progress") - - effective_dry_run = dry_run if dry_run is not None else self.dry_run - - try: - self._scanning = True - self._scan_start_time = time.time() - - start = self._scan_start_time - result = GCResult() - - original_dry_run = self.dry_run - self.dry_run = effective_dry_run - try: - self._clean_temp_files(result) - self._clean_orphaned_multipart(result) - self._clean_stale_locks(result) - self._clean_orphaned_metadata(result) - self._clean_orphaned_versions(result) - self._clean_empty_dirs(result) - finally: - self.dry_run = original_dry_run - - result.execution_time_seconds = time.time() - start - - if result.has_work or result.errors: - logger.info( - "GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), " - "locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s", - result.execution_time_seconds, - result.temp_files_deleted, - result.temp_bytes_freed / (1024 * 1024), - result.multipart_uploads_deleted, - result.multipart_bytes_freed / (1024 * 1024), - result.lock_files_deleted, - result.orphaned_metadata_deleted, - result.orphaned_versions_deleted, - result.orphaned_version_bytes_freed / (1024 * 1024), - result.empty_dirs_removed, - len(result.errors), - " (dry run)" if effective_dry_run else "", - ) - - record = GCExecutionRecord( - timestamp=time.time(), - result=result.to_dict(), - dry_run=effective_dry_run, - ) - self.history_store.add(record) - - return result - finally: - self._scanning = False - self._scan_start_time = None - self._lock.release() - - def run_async(self, dry_run: Optional[bool] = None) -> bool: - if self._scanning: - return False - t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True) - t.start() - return True - - def _system_path(self) -> Path: - return self.storage_root / self.SYSTEM_ROOT - - def _throttle(self) -> bool: - if self._shutdown: - return True - if self._io_throttle > 0: - time.sleep(self._io_throttle) - return self._shutdown - - def _list_bucket_names(self) -> List[str]: - names = [] - try: - for entry in self.storage_root.iterdir(): - if entry.is_dir() and entry.name != self.SYSTEM_ROOT: - names.append(entry.name) - except OSError: - pass - return names - - def _clean_temp_files(self, result: GCResult) -> None: - tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR - if not tmp_dir.exists(): - return - try: - for entry in tmp_dir.iterdir(): - if self._throttle(): - return - if not entry.is_file(): - continue - age = _file_age_hours(entry) - if age < self.temp_file_max_age_hours: - continue - try: - size = entry.stat().st_size - if not self.dry_run: - entry.unlink() - result.temp_files_deleted += 1 - result.temp_bytes_freed += size - except OSError as e: - result.errors.append(f"temp file {entry.name}: {e}") - except OSError as e: - result.errors.append(f"scan tmp dir: {e}") - - def _clean_orphaned_multipart(self, result: GCResult) -> None: - cutoff_hours = self.multipart_max_age_days * 24.0 - bucket_names = self._list_bucket_names() - - for bucket_name in bucket_names: - if self._shutdown: - return - for multipart_root in ( - self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name, - self.storage_root / bucket_name / ".multipart", - ): - if not multipart_root.exists(): - continue - try: - for upload_dir in multipart_root.iterdir(): - if self._throttle(): - return - if not upload_dir.is_dir(): - continue - self._maybe_clean_upload(upload_dir, cutoff_hours, result) - except OSError as e: - result.errors.append(f"scan multipart {bucket_name}: {e}") - - def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None: - manifest_path = upload_dir / "manifest.json" - age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir) - - if age < cutoff_hours: - return - - dir_bytes = _dir_size(upload_dir) - try: - if not self.dry_run: - shutil.rmtree(upload_dir, ignore_errors=True) - result.multipart_uploads_deleted += 1 - result.multipart_bytes_freed += dir_bytes - except OSError as e: - result.errors.append(f"multipart {upload_dir.name}: {e}") - - def _clean_stale_locks(self, result: GCResult) -> None: - buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR - if not buckets_root.exists(): - return - - try: - for bucket_dir in buckets_root.iterdir(): - if self._shutdown: - return - if not bucket_dir.is_dir(): - continue - locks_dir = bucket_dir / "locks" - if not locks_dir.exists(): - continue - try: - for lock_file in locks_dir.iterdir(): - if self._throttle(): - return - if not lock_file.is_file() or not lock_file.name.endswith(".lock"): - continue - age = _file_age_hours(lock_file) - if age < self.lock_file_max_age_hours: - continue - try: - if not self.dry_run: - lock_file.unlink(missing_ok=True) - result.lock_files_deleted += 1 - except OSError as e: - result.errors.append(f"lock {lock_file.name}: {e}") - except OSError as e: - result.errors.append(f"scan locks {bucket_dir.name}: {e}") - except OSError as e: - result.errors.append(f"scan buckets for locks: {e}") - - def _clean_orphaned_metadata(self, result: GCResult) -> None: - bucket_names = self._list_bucket_names() - - for bucket_name in bucket_names: - if self._shutdown: - return - legacy_meta = self.storage_root / bucket_name / ".meta" - if legacy_meta.exists(): - self._clean_legacy_metadata(bucket_name, legacy_meta, result) - - new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR - if new_meta.exists(): - self._clean_index_metadata(bucket_name, new_meta, result) - - def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None: - bucket_path = self.storage_root / bucket_name - try: - for meta_file in meta_root.rglob("*.meta.json"): - if self._throttle(): - return - if not meta_file.is_file(): - continue - try: - rel = meta_file.relative_to(meta_root) - object_key = rel.as_posix().removesuffix(".meta.json") - object_path = bucket_path / object_key - if not object_path.exists(): - if not self.dry_run: - meta_file.unlink(missing_ok=True) - result.orphaned_metadata_deleted += 1 - except (OSError, ValueError) as e: - result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}") - except OSError as e: - result.errors.append(f"scan legacy meta {bucket_name}: {e}") - - def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None: - bucket_path = self.storage_root / bucket_name - try: - for index_file in meta_root.rglob("_index.json"): - if self._throttle(): - return - if not index_file.is_file(): - continue - try: - with open(index_file, "r", encoding="utf-8") as f: - index_data = json.load(f) - except (OSError, json.JSONDecodeError): - continue - - keys_to_remove = [] - for key in index_data: - rel_dir = index_file.parent.relative_to(meta_root) - if rel_dir == Path("."): - full_key = key - else: - full_key = rel_dir.as_posix() + "/" + key - object_path = bucket_path / full_key - if not object_path.exists(): - keys_to_remove.append(key) - - if keys_to_remove: - if not self.dry_run: - for k in keys_to_remove: - index_data.pop(k, None) - if index_data: - try: - with open(index_file, "w", encoding="utf-8") as f: - json.dump(index_data, f) - except OSError as e: - result.errors.append(f"write index {bucket_name}: {e}") - continue - else: - try: - index_file.unlink(missing_ok=True) - except OSError: - pass - result.orphaned_metadata_deleted += len(keys_to_remove) - except OSError as e: - result.errors.append(f"scan index meta {bucket_name}: {e}") - - def _clean_orphaned_versions(self, result: GCResult) -> None: - bucket_names = self._list_bucket_names() - - for bucket_name in bucket_names: - if self._shutdown: - return - bucket_path = self.storage_root / bucket_name - for versions_root in ( - self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR, - self.storage_root / bucket_name / ".versions", - ): - if not versions_root.exists(): - continue - try: - for key_dir in versions_root.iterdir(): - if self._throttle(): - return - if not key_dir.is_dir(): - continue - self._clean_versions_for_key(bucket_path, versions_root, key_dir, result) - except OSError as e: - result.errors.append(f"scan versions {bucket_name}: {e}") - - def _clean_versions_for_key( - self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult - ) -> None: - try: - rel = key_dir.relative_to(versions_root) - except ValueError: - return - - object_path = bucket_path / rel - if object_path.exists(): - return - - version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json")) - if not version_files: - return - - for vf in version_files: - try: - size = vf.stat().st_size if vf.suffix == ".bin" else 0 - if not self.dry_run: - vf.unlink(missing_ok=True) - if vf.suffix == ".bin": - result.orphaned_version_bytes_freed += size - result.orphaned_versions_deleted += 1 - except OSError as e: - result.errors.append(f"version file {vf.name}: {e}") - - def _clean_empty_dirs(self, result: GCResult) -> None: - targets = [ - self._system_path() / self.SYSTEM_TMP_DIR, - self._system_path() / self.SYSTEM_MULTIPART_DIR, - self._system_path() / self.SYSTEM_BUCKETS_DIR, - ] - for bucket_name in self._list_bucket_names(): - targets.append(self.storage_root / bucket_name / ".meta") - targets.append(self.storage_root / bucket_name / ".versions") - targets.append(self.storage_root / bucket_name / ".multipart") - - for root in targets: - if not root.exists(): - continue - self._remove_empty_dirs_recursive(root, root, result) - - def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool: - if self._shutdown: - return False - if not path.is_dir(): - return False - - try: - children = list(path.iterdir()) - except OSError: - return False - - all_empty = True - for child in children: - if self._throttle(): - return False - if child.is_dir(): - if not self._remove_empty_dirs_recursive(child, stop_at, result): - all_empty = False - else: - all_empty = False - - if all_empty and path != stop_at: - try: - if not self.dry_run: - path.rmdir() - result.empty_dirs_removed += 1 - return True - except OSError: - return False - return all_empty - - def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]: - records = self.history_store.get_history(limit, offset) - return [r.to_dict() for r in records] - - def get_status(self) -> dict: - status: Dict[str, Any] = { - "enabled": not self._shutdown or self._timer is not None, - "running": self._timer is not None and not self._shutdown, - "scanning": self._scanning, - "interval_hours": self.interval_seconds / 3600.0, - "temp_file_max_age_hours": self.temp_file_max_age_hours, - "multipart_max_age_days": self.multipart_max_age_days, - "lock_file_max_age_hours": self.lock_file_max_age_hours, - "dry_run": self.dry_run, - "io_throttle_ms": round(self._io_throttle * 1000), - } - if self._scanning and self._scan_start_time: - status["scan_elapsed_seconds"] = time.time() - self._scan_start_time - return status diff --git a/app/iam.py b/app/iam.py deleted file mode 100644 index 7e8f3fa..0000000 --- a/app/iam.py +++ /dev/null @@ -1,1095 +0,0 @@ -from __future__ import annotations - -import base64 -import hashlib -import hmac -import json -import math -import os -import secrets -import threading -import time -from collections import deque -from dataclasses import dataclass, field -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple - -from cryptography.fernet import Fernet, InvalidToken - - -class IamError(RuntimeError): - """Raised when authentication or authorization fails.""" - - -S3_ACTIONS = { - "list", "read", "write", "delete", "share", "policy", - "replication", "lifecycle", "cors", - "create_bucket", "delete_bucket", - "versioning", "tagging", "encryption", "quota", - "object_lock", "notification", "logging", "website", -} -IAM_ACTIONS = { - "iam:list_users", - "iam:create_user", - "iam:delete_user", - "iam:rotate_key", - "iam:update_policy", - "iam:create_key", - "iam:delete_key", - "iam:get_user", - "iam:get_policy", - "iam:disable_user", -} -ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"} - -_V1_IMPLIED_ACTIONS = { - "write": {"create_bucket"}, - "delete": {"delete_bucket"}, - "policy": { - "versioning", "tagging", "encryption", "quota", - "object_lock", "notification", "logging", "website", - "cors", "lifecycle", "replication", "share", - }, -} - -ACTION_ALIASES = { - "list": "list", - "s3:listbucket": "list", - "s3:listallmybuckets": "list", - "s3:listbucketversions": "list", - "s3:listmultipartuploads": "list", - "s3:listparts": "list", - "read": "read", - "s3:getobject": "read", - "s3:getobjectversion": "read", - "s3:getobjecttagging": "read", - "s3:getobjectversiontagging": "read", - "s3:getobjectacl": "read", - "s3:headobject": "read", - "s3:headbucket": "read", - "write": "write", - "s3:putobject": "write", - "s3:putobjecttagging": "write", - "s3:createmultipartupload": "write", - "s3:uploadpart": "write", - "s3:completemultipartupload": "write", - "s3:abortmultipartupload": "write", - "s3:copyobject": "write", - "delete": "delete", - "s3:deleteobject": "delete", - "s3:deleteobjectversion": "delete", - "s3:deleteobjecttagging": "delete", - "create_bucket": "create_bucket", - "s3:createbucket": "create_bucket", - "delete_bucket": "delete_bucket", - "s3:deletebucket": "delete_bucket", - "share": "share", - "s3:putobjectacl": "share", - "s3:putbucketacl": "share", - "s3:getbucketacl": "share", - "policy": "policy", - "s3:putbucketpolicy": "policy", - "s3:getbucketpolicy": "policy", - "s3:deletebucketpolicy": "policy", - "replication": "replication", - "s3:getreplicationconfiguration": "replication", - "s3:putreplicationconfiguration": "replication", - "s3:deletereplicationconfiguration": "replication", - "s3:replicateobject": "replication", - "s3:replicatetags": "replication", - "s3:replicatedelete": "replication", - "lifecycle": "lifecycle", - "s3:getlifecycleconfiguration": "lifecycle", - "s3:putlifecycleconfiguration": "lifecycle", - "s3:deletelifecycleconfiguration": "lifecycle", - "s3:getbucketlifecycle": "lifecycle", - "s3:putbucketlifecycle": "lifecycle", - "cors": "cors", - "s3:getbucketcors": "cors", - "s3:putbucketcors": "cors", - "s3:deletebucketcors": "cors", - "versioning": "versioning", - "s3:getbucketversioning": "versioning", - "s3:putbucketversioning": "versioning", - "tagging": "tagging", - "s3:getbuckettagging": "tagging", - "s3:putbuckettagging": "tagging", - "s3:deletebuckettagging": "tagging", - "encryption": "encryption", - "s3:getencryptionconfiguration": "encryption", - "s3:putencryptionconfiguration": "encryption", - "s3:deleteencryptionconfiguration": "encryption", - "quota": "quota", - "s3:getbucketquota": "quota", - "s3:putbucketquota": "quota", - "s3:deletebucketquota": "quota", - "object_lock": "object_lock", - "s3:getobjectlockconfiguration": "object_lock", - "s3:putobjectlockconfiguration": "object_lock", - "s3:putobjectretention": "object_lock", - "s3:getobjectretention": "object_lock", - "s3:putobjectlegalhold": "object_lock", - "s3:getobjectlegalhold": "object_lock", - "notification": "notification", - "s3:getbucketnotificationconfiguration": "notification", - "s3:putbucketnotificationconfiguration": "notification", - "s3:deletebucketnotificationconfiguration": "notification", - "logging": "logging", - "s3:getbucketlogging": "logging", - "s3:putbucketlogging": "logging", - "s3:deletebucketlogging": "logging", - "website": "website", - "s3:getbucketwebsite": "website", - "s3:putbucketwebsite": "website", - "s3:deletebucketwebsite": "website", - "iam:listusers": "iam:list_users", - "iam:createuser": "iam:create_user", - "iam:deleteuser": "iam:delete_user", - "iam:rotateaccesskey": "iam:rotate_key", - "iam:putuserpolicy": "iam:update_policy", - "iam:createaccesskey": "iam:create_key", - "iam:deleteaccesskey": "iam:delete_key", - "iam:getuser": "iam:get_user", - "iam:getpolicy": "iam:get_policy", - "iam:disableuser": "iam:disable_user", - "iam:*": "iam:*", -} - - -@dataclass -class Policy: - bucket: str - actions: Set[str] - prefix: str = "*" - - -@dataclass -class Principal: - access_key: str - display_name: str - policies: List[Policy] - - -def _derive_fernet_key(secret: str) -> bytes: - raw = hashlib.pbkdf2_hmac("sha256", secret.encode(), b"myfsio-iam-encryption", 100_000) - return base64.urlsafe_b64encode(raw) - - -_IAM_ENCRYPTED_PREFIX = b"MYFSIO_IAM_ENC:" - -_CONFIG_VERSION = 2 - - -def _expand_v1_actions(actions: Set[str]) -> Set[str]: - expanded = set(actions) - for action, implied in _V1_IMPLIED_ACTIONS.items(): - if action in expanded: - expanded.update(implied) - return expanded - - -class IamService: - """Loads IAM configuration, manages users, and evaluates policies.""" - - def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15, encryption_key: str | None = None) -> None: - self.config_path = Path(config_path) - self.auth_max_attempts = auth_max_attempts - self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes) - self._fernet: Fernet | None = None - if encryption_key: - self._fernet = Fernet(_derive_fernet_key(encryption_key)) - self.config_path.parent.mkdir(parents=True, exist_ok=True) - if not self.config_path.exists(): - self._write_default() - self._user_records: Dict[str, Dict[str, Any]] = {} - self._key_index: Dict[str, str] = {} - self._key_secrets: Dict[str, str] = {} - self._key_status: Dict[str, str] = {} - self._raw_config: Dict[str, Any] = {} - self._failed_attempts: Dict[str, Deque[datetime]] = {} - self._last_load_time = 0.0 - self._principal_cache: Dict[str, Tuple[Principal, float]] = {} - self._secret_key_cache: Dict[str, Tuple[str, float]] = {} - self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0")) - self._last_stat_check = 0.0 - self._stat_check_interval = float(os.environ.get("IAM_STAT_CHECK_INTERVAL_SECONDS", "2.0")) - self._sessions: Dict[str, Dict[str, Any]] = {} - self._session_lock = threading.Lock() - self._load() - self._load_lockout_state() - - def _maybe_reload(self) -> None: - now = time.time() - if now - self._last_stat_check < self._stat_check_interval: - return - self._last_stat_check = now - try: - if self.config_path.stat().st_mtime > self._last_load_time: - self._load() - self._principal_cache.clear() - self._secret_key_cache.clear() - except OSError: - pass - - def _check_expiry(self, access_key: str, record: Dict[str, Any]) -> None: - expires_at = record.get("expires_at") - if not expires_at: - return - try: - exp_dt = datetime.fromisoformat(expires_at) - if exp_dt.tzinfo is None: - exp_dt = exp_dt.replace(tzinfo=timezone.utc) - if datetime.now(timezone.utc) >= exp_dt: - raise IamError(f"Credentials for '{access_key}' have expired") - except (ValueError, TypeError): - pass - - def authenticate(self, access_key: str, secret_key: str) -> Principal: - self._maybe_reload() - access_key = (access_key or "").strip() - secret_key = (secret_key or "").strip() - if not access_key or not secret_key: - raise IamError("Missing access credentials") - if self._is_locked_out(access_key): - seconds = self._seconds_until_unlock(access_key) - raise IamError( - f"Access temporarily locked. Try again in {seconds} seconds." - ) - user_id = self._key_index.get(access_key) - stored_secret = self._key_secrets.get(access_key, secrets.token_urlsafe(24)) - if not user_id or not hmac.compare_digest(stored_secret, secret_key): - self._record_failed_attempt(access_key) - raise IamError("Invalid credentials") - key_status = self._key_status.get(access_key, "active") - if key_status != "active": - raise IamError("Access key is inactive") - record = self._user_records.get(user_id) - if not record: - self._record_failed_attempt(access_key) - raise IamError("Invalid credentials") - if not record.get("enabled", True): - raise IamError("User account is disabled") - self._check_expiry(access_key, record) - self._clear_failed_attempts(access_key) - return self._build_principal(access_key, record) - - _MAX_LOCKOUT_KEYS = 10000 - - def _record_failed_attempt(self, access_key: str) -> None: - if not access_key: - return - if access_key not in self._failed_attempts and len(self._failed_attempts) >= self._MAX_LOCKOUT_KEYS: - oldest_key = min(self._failed_attempts, key=lambda k: self._failed_attempts[k][0] if self._failed_attempts[k] else datetime.min.replace(tzinfo=timezone.utc)) - del self._failed_attempts[oldest_key] - attempts = self._failed_attempts.setdefault(access_key, deque()) - self._prune_attempts(attempts) - attempts.append(datetime.now(timezone.utc)) - self._save_lockout_state() - - def _clear_failed_attempts(self, access_key: str) -> None: - if not access_key: - return - if self._failed_attempts.pop(access_key, None) is not None: - self._save_lockout_state() - - def _lockout_file(self) -> Path: - return self.config_path.parent / "lockout_state.json" - - def _load_lockout_state(self) -> None: - try: - if self._lockout_file().exists(): - data = json.loads(self._lockout_file().read_text(encoding="utf-8")) - cutoff = datetime.now(timezone.utc) - self.auth_lockout_window - for key, timestamps in data.get("failed_attempts", {}).items(): - valid = [] - for ts in timestamps: - try: - dt = datetime.fromisoformat(ts) - if dt > cutoff: - valid.append(dt) - except (ValueError, TypeError): - continue - if valid: - self._failed_attempts[key] = deque(valid) - except (OSError, json.JSONDecodeError): - pass - - def _save_lockout_state(self) -> None: - data: Dict[str, Any] = {"failed_attempts": {}} - for key, attempts in self._failed_attempts.items(): - data["failed_attempts"][key] = [ts.isoformat() for ts in attempts] - try: - self._lockout_file().write_text(json.dumps(data), encoding="utf-8") - except OSError: - pass - - def _prune_attempts(self, attempts: Deque[datetime]) -> None: - cutoff = datetime.now(timezone.utc) - self.auth_lockout_window - while attempts and attempts[0] < cutoff: - attempts.popleft() - - def _is_locked_out(self, access_key: str) -> bool: - if not access_key: - return False - attempts = self._failed_attempts.get(access_key) - if not attempts: - return False - self._prune_attempts(attempts) - return len(attempts) >= self.auth_max_attempts - - def _seconds_until_unlock(self, access_key: str) -> int: - attempts = self._failed_attempts.get(access_key) - if not attempts: - return 0 - self._prune_attempts(attempts) - if len(attempts) < self.auth_max_attempts: - return 0 - oldest = attempts[0] - elapsed = (datetime.now(timezone.utc) - oldest).total_seconds() - return int(max(0, self.auth_lockout_window.total_seconds() - elapsed)) - - def create_session_token(self, access_key: str, duration_seconds: int = 3600) -> str: - self._maybe_reload() - user_id = self._key_index.get(access_key) - if not user_id or user_id not in self._user_records: - raise IamError("Unknown access key") - self._cleanup_expired_sessions() - token = secrets.token_urlsafe(32) - expires_at = time.time() + duration_seconds - self._sessions[token] = { - "access_key": access_key, - "expires_at": expires_at, - } - return token - - def validate_session_token(self, access_key: str, session_token: str) -> bool: - dummy_key = secrets.token_urlsafe(16) - dummy_token = secrets.token_urlsafe(32) - with self._session_lock: - session = self._sessions.get(session_token) - if not session: - hmac.compare_digest(access_key, dummy_key) - hmac.compare_digest(session_token, dummy_token) - return False - key_match = hmac.compare_digest(session["access_key"], access_key) - if not key_match: - hmac.compare_digest(session_token, dummy_token) - return False - if time.time() > session["expires_at"]: - self._sessions.pop(session_token, None) - return False - return True - - def _cleanup_expired_sessions(self) -> None: - now = time.time() - expired = [token for token, data in self._sessions.items() if now > data["expires_at"]] - for token in expired: - del self._sessions[token] - - def principal_for_key(self, access_key: str) -> Principal: - now = time.time() - cached = self._principal_cache.get(access_key) - if cached: - principal, cached_time = cached - if now - cached_time < self._cache_ttl: - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record: - self._check_expiry(access_key, record) - self._enforce_key_and_user_status(access_key) - return principal - - self._maybe_reload() - self._enforce_key_and_user_status(access_key) - user_id = self._key_index.get(access_key) - if not user_id: - raise IamError("Unknown access key") - record = self._user_records.get(user_id) - if not record: - raise IamError("Unknown access key") - self._check_expiry(access_key, record) - principal = self._build_principal(access_key, record) - self._principal_cache[access_key] = (principal, now) - return principal - - def secret_for_key(self, access_key: str) -> str: - self._maybe_reload() - self._enforce_key_and_user_status(access_key) - secret = self._key_secrets.get(access_key) - if not secret: - raise IamError("Unknown access key") - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record: - self._check_expiry(access_key, record) - return secret - - def authorize(self, principal: Principal, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None: - action = self._normalize_action(action) - if action not in ALLOWED_ACTIONS: - raise IamError(f"Unknown action '{action}'") - bucket_name = bucket_name or "*" - normalized = bucket_name.lower() if bucket_name != "*" else bucket_name - if not self._is_allowed(principal, normalized, action, object_key=object_key): - raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'") - - def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str], *, object_key: str | None = None) -> Dict[str, bool]: - self._maybe_reload() - bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*") - normalized_actions = {a: self._normalize_action(a) for a in actions} - results: Dict[str, bool] = {} - for original, canonical in normalized_actions.items(): - if canonical not in ALLOWED_ACTIONS: - results[original] = False - else: - results[original] = self._is_allowed(principal, bucket_name, canonical, object_key=object_key) - return results - - def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]: - return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")] - - def _is_allowed(self, principal: Principal, bucket_name: str, action: str, *, object_key: str | None = None) -> bool: - bucket_name = bucket_name.lower() - for policy in principal.policies: - if policy.bucket not in {"*", bucket_name}: - continue - action_match = "*" in policy.actions or action in policy.actions - if not action_match and "iam:*" in policy.actions and action.startswith("iam:"): - action_match = True - if not action_match: - continue - if object_key is not None and policy.prefix != "*": - prefix = policy.prefix.rstrip("*") - if not object_key.startswith(prefix): - continue - return True - return False - - def list_users(self) -> List[Dict[str, Any]]: - listing: List[Dict[str, Any]] = [] - for user_id, record in self._user_records.items(): - access_keys = [] - for key_info in record.get("access_keys", []): - access_keys.append({ - "access_key": key_info["access_key"], - "status": key_info.get("status", "active"), - "created_at": key_info.get("created_at"), - }) - user_entry: Dict[str, Any] = { - "user_id": user_id, - "display_name": record["display_name"], - "enabled": record.get("enabled", True), - "expires_at": record.get("expires_at"), - "access_keys": access_keys, - "policies": [ - {**{"bucket": policy.bucket, "actions": sorted(policy.actions)}, **({"prefix": policy.prefix} if policy.prefix != "*" else {})} - for policy in record["policies"] - ], - } - if access_keys: - user_entry["access_key"] = access_keys[0]["access_key"] - listing.append(user_entry) - return listing - - def create_user( - self, - *, - display_name: str, - policies: Optional[Sequence[Dict[str, Any]]] = None, - access_key: str | None = None, - secret_key: str | None = None, - expires_at: str | None = None, - user_id: str | None = None, - ) -> Dict[str, str]: - access_key = (access_key or self._generate_access_key()).strip() - if not access_key: - raise IamError("Access key cannot be empty") - if access_key in self._key_index: - raise IamError("Access key already exists") - if expires_at: - self._validate_expires_at(expires_at) - secret_key = secret_key or self._generate_secret_key() - sanitized_policies = self._prepare_policy_payload(policies) - user_id = user_id or self._generate_user_id() - if user_id in self._user_records: - raise IamError("User ID already exists") - now_iso = datetime.now(timezone.utc).isoformat() - record: Dict[str, Any] = { - "user_id": user_id, - "display_name": display_name or access_key, - "enabled": True, - "access_keys": [ - { - "access_key": access_key, - "secret_key": secret_key, - "status": "active", - "created_at": now_iso, - } - ], - "policies": sanitized_policies, - } - if expires_at: - record["expires_at"] = expires_at - self._raw_config.setdefault("users", []).append(record) - self._save() - self._load() - return {"user_id": user_id, "access_key": access_key, "secret_key": secret_key} - - def create_access_key(self, identifier: str) -> Dict[str, str]: - user_raw, _ = self._resolve_raw_user(identifier) - new_access_key = self._generate_access_key() - new_secret_key = self._generate_secret_key() - now_iso = datetime.now(timezone.utc).isoformat() - key_entry = { - "access_key": new_access_key, - "secret_key": new_secret_key, - "status": "active", - "created_at": now_iso, - } - user_raw.setdefault("access_keys", []).append(key_entry) - self._save() - self._load() - return {"access_key": new_access_key, "secret_key": new_secret_key} - - def delete_access_key(self, access_key: str) -> None: - user_raw, _ = self._resolve_raw_user(access_key) - keys = user_raw.get("access_keys", []) - if len(keys) <= 1: - raise IamError("Cannot delete the only access key for a user") - remaining = [k for k in keys if k["access_key"] != access_key] - if len(remaining) == len(keys): - raise IamError("Access key not found") - user_raw["access_keys"] = remaining - self._save() - self._principal_cache.pop(access_key, None) - self._secret_key_cache.pop(access_key, None) - from .s3_api import clear_signing_key_cache - clear_signing_key_cache() - self._load() - - def disable_user(self, identifier: str) -> None: - user_raw, _ = self._resolve_raw_user(identifier) - user_raw["enabled"] = False - self._save() - for key_info in user_raw.get("access_keys", []): - ak = key_info["access_key"] - self._principal_cache.pop(ak, None) - self._secret_key_cache.pop(ak, None) - from .s3_api import clear_signing_key_cache - clear_signing_key_cache() - self._load() - - def enable_user(self, identifier: str) -> None: - user_raw, _ = self._resolve_raw_user(identifier) - user_raw["enabled"] = True - self._save() - self._load() - - def get_user_by_id(self, user_id: str) -> Dict[str, Any]: - record = self._user_records.get(user_id) - if not record: - raise IamError("User not found") - access_keys = [] - for key_info in record.get("access_keys", []): - access_keys.append({ - "access_key": key_info["access_key"], - "status": key_info.get("status", "active"), - "created_at": key_info.get("created_at"), - }) - return { - "user_id": user_id, - "display_name": record["display_name"], - "enabled": record.get("enabled", True), - "expires_at": record.get("expires_at"), - "access_keys": access_keys, - "policies": [ - {"bucket": p.bucket, "actions": sorted(p.actions), "prefix": p.prefix} - for p in record["policies"] - ], - } - - def get_user_policies(self, identifier: str) -> List[Dict[str, Any]]: - _, user_id = self._resolve_raw_user(identifier) - record = self._user_records.get(user_id) - if not record: - raise IamError("User not found") - return [ - {**{"bucket": p.bucket, "actions": sorted(p.actions)}, **({"prefix": p.prefix} if p.prefix != "*" else {})} - for p in record["policies"] - ] - - def resolve_user_id(self, identifier: str) -> str: - if identifier in self._user_records: - return identifier - user_id = self._key_index.get(identifier) - if user_id: - return user_id - raise IamError("User not found") - - def rotate_secret(self, access_key: str) -> str: - user_raw, _ = self._resolve_raw_user(access_key) - new_secret = self._generate_secret_key() - for key_info in user_raw.get("access_keys", []): - if key_info["access_key"] == access_key: - key_info["secret_key"] = new_secret - break - else: - raise IamError("Access key not found") - self._save() - self._principal_cache.pop(access_key, None) - self._secret_key_cache.pop(access_key, None) - from .s3_api import clear_signing_key_cache - clear_signing_key_cache() - self._load() - return new_secret - - def update_user(self, access_key: str, display_name: str) -> None: - user_raw, _ = self._resolve_raw_user(access_key) - user_raw["display_name"] = display_name - self._save() - self._load() - - def delete_user(self, access_key: str) -> None: - users = self._raw_config.get("users", []) - if len(users) <= 1: - raise IamError("Cannot delete the only user") - _, target_user_id = self._resolve_raw_user(access_key) - target_user_raw = None - remaining = [] - for u in users: - if u.get("user_id") == target_user_id: - target_user_raw = u - else: - remaining.append(u) - if target_user_raw is None: - raise IamError("User not found") - self._raw_config["users"] = remaining - self._save() - for key_info in target_user_raw.get("access_keys", []): - ak = key_info["access_key"] - self._principal_cache.pop(ak, None) - self._secret_key_cache.pop(ak, None) - from .s3_api import clear_signing_key_cache - clear_signing_key_cache() - self._load() - - def update_user_expiry(self, access_key: str, expires_at: str | None) -> None: - user_raw, _ = self._resolve_raw_user(access_key) - if expires_at: - self._validate_expires_at(expires_at) - user_raw["expires_at"] = expires_at - else: - user_raw.pop("expires_at", None) - self._save() - for key_info in user_raw.get("access_keys", []): - ak = key_info["access_key"] - self._principal_cache.pop(ak, None) - self._secret_key_cache.pop(ak, None) - self._load() - - def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: - user_raw, _ = self._resolve_raw_user(access_key) - user_raw["policies"] = self._prepare_policy_payload(policies) - self._save() - self._load() - - def _decrypt_content(self, raw_bytes: bytes) -> str: - if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX): - if not self._fernet: - raise IamError("IAM config is encrypted but no encryption key provided. Set SECRET_KEY or use 'python run.py reset-cred'.") - try: - encrypted_data = raw_bytes[len(_IAM_ENCRYPTED_PREFIX):] - return self._fernet.decrypt(encrypted_data).decode("utf-8") - except InvalidToken: - raise IamError("Cannot decrypt IAM config. SECRET_KEY may have changed. Use 'python run.py reset-cred' to reset credentials.") - return raw_bytes.decode("utf-8") - - def _is_v2_config(self, raw: Dict[str, Any]) -> bool: - return raw.get("version", 1) >= _CONFIG_VERSION - - def _migrate_v1_to_v2(self, raw: Dict[str, Any]) -> Dict[str, Any]: - migrated_users = [] - now_iso = datetime.now(timezone.utc).isoformat() - for user in raw.get("users", []): - old_policies = user.get("policies", []) - expanded_policies = [] - for p in old_policies: - raw_actions = p.get("actions", []) - if isinstance(raw_actions, str): - raw_actions = [raw_actions] - action_set: Set[str] = set() - for a in raw_actions: - canonical = self._normalize_action(a) - if canonical == "*": - action_set = set(ALLOWED_ACTIONS) - break - if canonical: - action_set.add(canonical) - action_set = _expand_v1_actions(action_set) - expanded_policies.append({ - "bucket": p.get("bucket", "*"), - "actions": sorted(action_set), - "prefix": p.get("prefix", "*"), - }) - migrated_user: Dict[str, Any] = { - "user_id": user["access_key"], - "display_name": user.get("display_name", user["access_key"]), - "enabled": True, - "access_keys": [ - { - "access_key": user["access_key"], - "secret_key": user["secret_key"], - "status": "active", - "created_at": now_iso, - } - ], - "policies": expanded_policies, - } - if user.get("expires_at"): - migrated_user["expires_at"] = user["expires_at"] - migrated_users.append(migrated_user) - return {"version": _CONFIG_VERSION, "users": migrated_users} - - def _load(self) -> None: - try: - self._last_load_time = self.config_path.stat().st_mtime - raw_bytes = self.config_path.read_bytes() - content = self._decrypt_content(raw_bytes) - raw = json.loads(content) - except IamError: - raise - except FileNotFoundError: - raise IamError(f"IAM config not found: {self.config_path}") - except json.JSONDecodeError as e: - raise IamError(f"Corrupted IAM config (invalid JSON): {e}") - except PermissionError as e: - raise IamError(f"Cannot read IAM config (permission denied): {e}") - except (OSError, ValueError) as e: - raise IamError(f"Failed to load IAM config: {e}") - - was_plaintext = not raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX) - was_v1 = not self._is_v2_config(raw) - - if was_v1: - raw = self._migrate_v1_to_v2(raw) - - user_records: Dict[str, Dict[str, Any]] = {} - key_index: Dict[str, str] = {} - key_secrets: Dict[str, str] = {} - key_status_map: Dict[str, str] = {} - - for user in raw.get("users", []): - user_id = user["user_id"] - policies = self._build_policy_objects(user.get("policies", [])) - access_keys_raw = user.get("access_keys", []) - access_keys_info = [] - for key_entry in access_keys_raw: - ak = key_entry["access_key"] - sk = key_entry["secret_key"] - status = key_entry.get("status", "active") - key_index[ak] = user_id - key_secrets[ak] = sk - key_status_map[ak] = status - access_keys_info.append({ - "access_key": ak, - "secret_key": sk, - "status": status, - "created_at": key_entry.get("created_at"), - }) - record: Dict[str, Any] = { - "display_name": user.get("display_name", user_id), - "enabled": user.get("enabled", True), - "policies": policies, - "access_keys": access_keys_info, - } - if user.get("expires_at"): - record["expires_at"] = user["expires_at"] - user_records[user_id] = record - - if not user_records: - raise IamError("IAM configuration contains no users") - - self._user_records = user_records - self._key_index = key_index - self._key_secrets = key_secrets - self._key_status = key_status_map - - raw_users: List[Dict[str, Any]] = [] - for user in raw.get("users", []): - raw_entry: Dict[str, Any] = { - "user_id": user["user_id"], - "display_name": user.get("display_name", user["user_id"]), - "enabled": user.get("enabled", True), - "access_keys": user.get("access_keys", []), - "policies": user.get("policies", []), - } - if user.get("expires_at"): - raw_entry["expires_at"] = user["expires_at"] - raw_users.append(raw_entry) - self._raw_config = {"version": _CONFIG_VERSION, "users": raw_users} - - if was_v1 or (was_plaintext and self._fernet): - self._save() - - def _save(self) -> None: - try: - json_text = json.dumps(self._raw_config, indent=2) - temp_path = self.config_path.with_suffix('.json.tmp') - if self._fernet: - encrypted = self._fernet.encrypt(json_text.encode("utf-8")) - temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted) - else: - temp_path.write_text(json_text, encoding='utf-8') - temp_path.replace(self.config_path) - except (OSError, PermissionError) as e: - raise IamError(f"Cannot save IAM config: {e}") - - def config_summary(self) -> Dict[str, Any]: - return { - "path": str(self.config_path), - "user_count": len(self._user_records), - "allowed_actions": sorted(ALLOWED_ACTIONS), - } - - def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]: - payload: Dict[str, Any] = {"version": _CONFIG_VERSION, "users": []} - for user in self._raw_config.get("users", []): - access_keys = [] - for key_info in user.get("access_keys", []): - access_keys.append({ - "access_key": key_info["access_key"], - "secret_key": "\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022" if mask_secrets else key_info["secret_key"], - "status": key_info.get("status", "active"), - "created_at": key_info.get("created_at"), - }) - record: Dict[str, Any] = { - "user_id": user["user_id"], - "display_name": user["display_name"], - "enabled": user.get("enabled", True), - "access_keys": access_keys, - "policies": user["policies"], - } - if access_keys: - record["access_key"] = access_keys[0]["access_key"] - if user.get("expires_at"): - record["expires_at"] = user["expires_at"] - payload["users"].append(record) - return payload - - def _build_policy_objects(self, policies: Sequence[Dict[str, Any]]) -> List[Policy]: - entries: List[Policy] = [] - for policy in policies: - bucket = str(policy.get("bucket", "*")).lower() - prefix = str(policy.get("prefix", "*")) - raw_actions = policy.get("actions", []) - if isinstance(raw_actions, str): - raw_actions = [raw_actions] - action_set: Set[str] = set() - for action in raw_actions: - canonical = self._normalize_action(action) - if canonical == "*": - action_set = set(ALLOWED_ACTIONS) - break - if canonical: - action_set.add(canonical) - if action_set: - entries.append(Policy(bucket=bucket, actions=action_set, prefix=prefix)) - return entries - - def _prepare_policy_payload(self, policies: Optional[Sequence[Dict[str, Any]]]) -> List[Dict[str, Any]]: - if not policies: - policies = ( - { - "bucket": "*", - "actions": ["list", "read", "write", "delete", "share", "policy", - "create_bucket", "delete_bucket"], - }, - ) - sanitized: List[Dict[str, Any]] = [] - for policy in policies: - bucket = str(policy.get("bucket", "*")).lower() - prefix = str(policy.get("prefix", "*")) - raw_actions = policy.get("actions", []) - if isinstance(raw_actions, str): - raw_actions = [raw_actions] - action_set: Set[str] = set() - for action in raw_actions: - canonical = self._normalize_action(action) - if canonical == "*": - action_set = set(ALLOWED_ACTIONS) - break - if canonical: - action_set.add(canonical) - if not action_set: - continue - entry: Dict[str, Any] = {"bucket": bucket, "actions": sorted(action_set)} - if prefix != "*": - entry["prefix"] = prefix - sanitized.append(entry) - if not sanitized: - raise IamError("At least one policy with valid actions is required") - return sanitized - - def _build_principal(self, access_key: str, record: Dict[str, Any]) -> Principal: - return Principal( - access_key=access_key, - display_name=record["display_name"], - policies=record["policies"], - ) - - def _normalize_action(self, action: str) -> str: - if not action: - return "" - lowered = action.strip().lower() - if lowered == "*": - return "*" - candidate = ACTION_ALIASES.get(lowered, lowered) - return candidate if candidate in ALLOWED_ACTIONS else "" - - def _write_default(self) -> None: - access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12) - secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32) - custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip()) - user_id = self._generate_user_id() - now_iso = datetime.now(timezone.utc).isoformat() - default = { - "version": _CONFIG_VERSION, - "users": [ - { - "user_id": user_id, - "display_name": "Local Admin", - "enabled": True, - "access_keys": [ - { - "access_key": access_key, - "secret_key": secret_key, - "status": "active", - "created_at": now_iso, - } - ], - "policies": [ - {"bucket": "*", "actions": list(ALLOWED_ACTIONS)} - ], - } - ] - } - json_text = json.dumps(default, indent=2) - if self._fernet: - encrypted = self._fernet.encrypt(json_text.encode("utf-8")) - self.config_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted) - else: - self.config_path.write_text(json_text) - print(f"\n{'='*60}") - print("MYFSIO FIRST RUN - ADMIN CREDENTIALS") - print(f"{'='*60}") - if custom_keys: - print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)") - print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}") - else: - print(f"Access Key: {access_key}") - print(f"Secret Key: {secret_key}") - print(f"User ID: {user_id}") - print(f"{'='*60}") - if self._fernet: - print("IAM config is encrypted at rest.") - print("Lost credentials? Run: python run.py reset-cred") - else: - print(f"Missed this? Check: {self.config_path}") - print(f"{'='*60}\n") - - def _validate_expires_at(self, expires_at: str) -> None: - try: - dt = datetime.fromisoformat(expires_at) - if dt.tzinfo is None: - dt = dt.replace(tzinfo=timezone.utc) - except (ValueError, TypeError): - raise IamError(f"Invalid expires_at format: {expires_at}. Use ISO 8601 (e.g. 2026-12-31T23:59:59Z)") - - def _generate_access_key(self) -> str: - return secrets.token_hex(8) - - def _generate_secret_key(self) -> str: - return secrets.token_urlsafe(24) - - def _generate_user_id(self) -> str: - return f"u-{secrets.token_hex(8)}" - - def _resolve_raw_user(self, identifier: str) -> Tuple[Dict[str, Any], str]: - for user in self._raw_config.get("users", []): - if user.get("user_id") == identifier: - return user, identifier - for user in self._raw_config.get("users", []): - for key_info in user.get("access_keys", []): - if key_info["access_key"] == identifier: - return user, user["user_id"] - raise IamError("User not found") - - def _get_raw_user(self, access_key: str) -> Dict[str, Any]: - user, _ = self._resolve_raw_user(access_key) - return user - - def _enforce_key_and_user_status(self, access_key: str) -> None: - key_status = self._key_status.get(access_key, "active") - if key_status != "active": - raise IamError("Access key is inactive") - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record and not record.get("enabled", True): - raise IamError("User account is disabled") - - def get_secret_key(self, access_key: str) -> str | None: - now = time.time() - cached = self._secret_key_cache.get(access_key) - if cached: - secret_key, cached_time = cached - if now - cached_time < self._cache_ttl: - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record: - self._check_expiry(access_key, record) - self._enforce_key_and_user_status(access_key) - return secret_key - - self._maybe_reload() - secret = self._key_secrets.get(access_key) - if secret: - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record: - self._check_expiry(access_key, record) - self._enforce_key_and_user_status(access_key) - self._secret_key_cache[access_key] = (secret, now) - return secret - return None - - def get_principal(self, access_key: str) -> Principal | None: - now = time.time() - cached = self._principal_cache.get(access_key) - if cached: - principal, cached_time = cached - if now - cached_time < self._cache_ttl: - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record: - self._check_expiry(access_key, record) - self._enforce_key_and_user_status(access_key) - return principal - - self._maybe_reload() - self._enforce_key_and_user_status(access_key) - user_id = self._key_index.get(access_key) - if user_id: - record = self._user_records.get(user_id) - if record: - self._check_expiry(access_key, record) - principal = self._build_principal(access_key, record) - self._principal_cache[access_key] = (principal, now) - return principal - return None diff --git a/app/integrity.py b/app/integrity.py deleted file mode 100644 index 2ca3eb5..0000000 --- a/app/integrity.py +++ /dev/null @@ -1,995 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -import logging -import os -import threading -import time -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional - -try: - import myfsio_core as _rc - if not hasattr(_rc, "md5_file"): - raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release") - _HAS_RUST = True -except ImportError: - _HAS_RUST = False - -logger = logging.getLogger(__name__) - - -def _compute_etag(path: Path) -> str: - if _HAS_RUST: - return _rc.md5_file(str(path)) - checksum = hashlib.md5() - with path.open("rb") as handle: - for chunk in iter(lambda: handle.read(8192), b""): - checksum.update(chunk) - return checksum.hexdigest() - - -@dataclass -class IntegrityIssue: - issue_type: str - bucket: str - key: str - detail: str - healed: bool = False - heal_action: str = "" - - def to_dict(self) -> dict: - return { - "issue_type": self.issue_type, - "bucket": self.bucket, - "key": self.key, - "detail": self.detail, - "healed": self.healed, - "heal_action": self.heal_action, - } - - -@dataclass -class IntegrityResult: - corrupted_objects: int = 0 - orphaned_objects: int = 0 - phantom_metadata: int = 0 - stale_versions: int = 0 - etag_cache_inconsistencies: int = 0 - legacy_metadata_drifts: int = 0 - issues_healed: int = 0 - issues: List[IntegrityIssue] = field(default_factory=list) - errors: List[str] = field(default_factory=list) - objects_scanned: int = 0 - buckets_scanned: int = 0 - execution_time_seconds: float = 0.0 - - def to_dict(self) -> dict: - return { - "corrupted_objects": self.corrupted_objects, - "orphaned_objects": self.orphaned_objects, - "phantom_metadata": self.phantom_metadata, - "stale_versions": self.stale_versions, - "etag_cache_inconsistencies": self.etag_cache_inconsistencies, - "legacy_metadata_drifts": self.legacy_metadata_drifts, - "issues_healed": self.issues_healed, - "issues": [i.to_dict() for i in self.issues], - "errors": self.errors, - "objects_scanned": self.objects_scanned, - "buckets_scanned": self.buckets_scanned, - "execution_time_seconds": self.execution_time_seconds, - } - - @property - def total_issues(self) -> int: - return ( - self.corrupted_objects - + self.orphaned_objects - + self.phantom_metadata - + self.stale_versions - + self.etag_cache_inconsistencies - + self.legacy_metadata_drifts - ) - - @property - def has_issues(self) -> bool: - return self.total_issues > 0 - - -@dataclass -class IntegrityExecutionRecord: - timestamp: float - result: dict - dry_run: bool - auto_heal: bool - - def to_dict(self) -> dict: - return { - "timestamp": self.timestamp, - "result": self.result, - "dry_run": self.dry_run, - "auto_heal": self.auto_heal, - } - - @classmethod - def from_dict(cls, data: dict) -> IntegrityExecutionRecord: - return cls( - timestamp=data["timestamp"], - result=data["result"], - dry_run=data.get("dry_run", False), - auto_heal=data.get("auto_heal", False), - ) - - -class IntegrityHistoryStore: - def __init__(self, storage_root: Path, max_records: int = 50) -> None: - self.storage_root = storage_root - self.max_records = max_records - self._lock = threading.Lock() - - def _get_path(self) -> Path: - return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json" - - def load(self) -> List[IntegrityExecutionRecord]: - path = self._get_path() - if not path.exists(): - return [] - try: - with open(path, "r", encoding="utf-8") as f: - data = json.load(f) - return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])] - except (OSError, ValueError, KeyError) as e: - logger.error("Failed to load integrity history: %s", e) - return [] - - def save(self, records: List[IntegrityExecutionRecord]) -> None: - path = self._get_path() - path.parent.mkdir(parents=True, exist_ok=True) - data = {"executions": [r.to_dict() for r in records[: self.max_records]]} - try: - with open(path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - except OSError as e: - logger.error("Failed to save integrity history: %s", e) - - def add(self, record: IntegrityExecutionRecord) -> None: - with self._lock: - records = self.load() - records.insert(0, record) - self.save(records) - - def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]: - return self.load()[offset : offset + limit] - - -class IntegrityCursorStore: - def __init__(self, storage_root: Path) -> None: - self.storage_root = storage_root - self._lock = threading.Lock() - - def _get_path(self) -> Path: - return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json" - - def load(self) -> Dict[str, Any]: - path = self._get_path() - if not path.exists(): - return {"buckets": {}} - try: - with open(path, "r", encoding="utf-8") as f: - data = json.load(f) - if not isinstance(data.get("buckets"), dict): - return {"buckets": {}} - return data - except (OSError, ValueError, KeyError): - return {"buckets": {}} - - def save(self, data: Dict[str, Any]) -> None: - path = self._get_path() - path.parent.mkdir(parents=True, exist_ok=True) - try: - with open(path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - except OSError as e: - logger.error("Failed to save integrity cursor: %s", e) - - def update_bucket( - self, - bucket_name: str, - timestamp: float, - last_key: Optional[str] = None, - completed: bool = False, - ) -> None: - with self._lock: - data = self.load() - entry = data["buckets"].get(bucket_name, {}) - if completed: - entry["last_scanned"] = timestamp - entry.pop("last_key", None) - entry["completed"] = True - else: - entry["last_scanned"] = timestamp - if last_key is not None: - entry["last_key"] = last_key - entry["completed"] = False - data["buckets"][bucket_name] = entry - self.save(data) - - def clean_stale(self, existing_buckets: List[str]) -> None: - with self._lock: - data = self.load() - existing_set = set(existing_buckets) - stale_keys = [k for k in data["buckets"] if k not in existing_set] - if stale_keys: - for k in stale_keys: - del data["buckets"][k] - self.save(data) - - def get_last_key(self, bucket_name: str) -> Optional[str]: - data = self.load() - entry = data.get("buckets", {}).get(bucket_name) - if entry is None: - return None - return entry.get("last_key") - - def get_bucket_order(self, bucket_names: List[str]) -> List[str]: - data = self.load() - buckets_info = data.get("buckets", {}) - - incomplete = [] - complete = [] - for name in bucket_names: - entry = buckets_info.get(name) - if entry is None: - incomplete.append((name, 0.0)) - elif entry.get("last_key") is not None: - incomplete.append((name, entry.get("last_scanned", 0.0))) - else: - complete.append((name, entry.get("last_scanned", 0.0))) - - incomplete.sort(key=lambda x: x[1]) - complete.sort(key=lambda x: x[1]) - - return [n for n, _ in incomplete] + [n for n, _ in complete] - - def get_info(self) -> Dict[str, Any]: - data = self.load() - buckets = data.get("buckets", {}) - return { - "tracked_buckets": len(buckets), - "buckets": { - name: { - "last_scanned": info.get("last_scanned"), - "last_key": info.get("last_key"), - "completed": info.get("completed", False), - } - for name, info in buckets.items() - }, - } - - -MAX_ISSUES = 500 - - -class IntegrityChecker: - SYSTEM_ROOT = ".myfsio.sys" - SYSTEM_BUCKETS_DIR = "buckets" - BUCKET_META_DIR = "meta" - BUCKET_VERSIONS_DIR = "versions" - INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"} - - def __init__( - self, - storage_root: Path, - interval_hours: float = 24.0, - batch_size: int = 1000, - auto_heal: bool = False, - dry_run: bool = False, - max_history: int = 50, - io_throttle_ms: int = 10, - ) -> None: - self.storage_root = Path(storage_root) - self.interval_seconds = interval_hours * 3600.0 - self.batch_size = batch_size - self.auto_heal = auto_heal - self.dry_run = dry_run - self._timer: Optional[threading.Timer] = None - self._shutdown = False - self._lock = threading.Lock() - self._scanning = False - self._scan_start_time: Optional[float] = None - self._io_throttle = max(0, io_throttle_ms) / 1000.0 - self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history) - self.cursor_store = IntegrityCursorStore(self.storage_root) - - def start(self) -> None: - if self._timer is not None: - return - self._shutdown = False - self._schedule_next() - logger.info( - "Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s", - self.interval_seconds / 3600.0, - self.batch_size, - self.auto_heal, - self.dry_run, - ) - - def stop(self) -> None: - self._shutdown = True - if self._timer: - self._timer.cancel() - self._timer = None - logger.info("Integrity checker stopped") - - def _schedule_next(self) -> None: - if self._shutdown: - return - self._timer = threading.Timer(self.interval_seconds, self._run_cycle) - self._timer.daemon = True - self._timer.start() - - def _run_cycle(self) -> None: - if self._shutdown: - return - try: - self.run_now() - except Exception as e: - logger.error("Integrity check cycle failed: %s", e) - finally: - self._schedule_next() - - def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult: - if not self._lock.acquire(blocking=False): - raise RuntimeError("Integrity scan is already in progress") - - try: - self._scanning = True - self._scan_start_time = time.time() - - effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal - effective_dry_run = dry_run if dry_run is not None else self.dry_run - - start = self._scan_start_time - result = IntegrityResult() - - bucket_names = self._list_bucket_names() - self.cursor_store.clean_stale(bucket_names) - ordered_buckets = self.cursor_store.get_bucket_order(bucket_names) - - for bucket_name in ordered_buckets: - if self._batch_exhausted(result): - break - result.buckets_scanned += 1 - cursor_key = self.cursor_store.get_last_key(bucket_name) - key_corrupted = self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key) - key_orphaned = self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key) - key_phantom = self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key) - self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run) - self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run) - self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run) - returned_keys = [k for k in (key_corrupted, key_orphaned, key_phantom) if k is not None] - bucket_exhausted = self._batch_exhausted(result) - if bucket_exhausted and returned_keys: - self.cursor_store.update_bucket(bucket_name, time.time(), last_key=min(returned_keys)) - else: - self.cursor_store.update_bucket(bucket_name, time.time(), completed=True) - - result.execution_time_seconds = time.time() - start - - if result.has_issues or result.errors: - logger.info( - "Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, " - "stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s", - result.execution_time_seconds, - result.corrupted_objects, - result.orphaned_objects, - result.phantom_metadata, - result.stale_versions, - result.etag_cache_inconsistencies, - result.legacy_metadata_drifts, - result.issues_healed, - len(result.errors), - " (dry run)" if effective_dry_run else "", - ) - - record = IntegrityExecutionRecord( - timestamp=time.time(), - result=result.to_dict(), - dry_run=effective_dry_run, - auto_heal=effective_auto_heal, - ) - self.history_store.add(record) - - return result - finally: - self._scanning = False - self._scan_start_time = None - self._lock.release() - - def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool: - if self._scanning: - return False - t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True) - t.start() - return True - - def _system_path(self) -> Path: - return self.storage_root / self.SYSTEM_ROOT - - def _list_bucket_names(self) -> List[str]: - names = [] - try: - for entry in self.storage_root.iterdir(): - if entry.is_dir() and entry.name != self.SYSTEM_ROOT: - names.append(entry.name) - except OSError: - pass - return names - - def _throttle(self) -> bool: - if self._shutdown: - return True - if self._io_throttle > 0: - time.sleep(self._io_throttle) - return self._shutdown - - def _batch_exhausted(self, result: IntegrityResult) -> bool: - return self._shutdown or result.objects_scanned >= self.batch_size - - def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None: - if len(result.issues) < MAX_ISSUES: - result.issues.append(issue) - - def _collect_index_keys( - self, meta_root: Path, cursor_key: Optional[str] = None, - ) -> Dict[str, Dict[str, Any]]: - all_keys: Dict[str, Dict[str, Any]] = {} - if not meta_root.exists(): - return all_keys - try: - for index_file in meta_root.rglob("_index.json"): - if not index_file.is_file(): - continue - rel_dir = index_file.parent.relative_to(meta_root) - dir_prefix = "" if rel_dir == Path(".") else rel_dir.as_posix() - if cursor_key is not None and dir_prefix: - full_prefix = dir_prefix + "/" - if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix: - continue - try: - index_data = json.loads(index_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - continue - for key_name, entry in index_data.items(): - full_key = (dir_prefix + "/" + key_name) if dir_prefix else key_name - if cursor_key is not None and full_key <= cursor_key: - continue - all_keys[full_key] = { - "entry": entry, - "index_file": index_file, - "key_name": key_name, - } - except OSError: - pass - return all_keys - - def _walk_bucket_files_sorted( - self, bucket_path: Path, cursor_key: Optional[str] = None, - ): - def _walk(dir_path: Path, prefix: str): - try: - entries = list(os.scandir(dir_path)) - except OSError: - return - - def _sort_key(e): - if e.is_dir(follow_symlinks=False): - return e.name + "/" - return e.name - - entries.sort(key=_sort_key) - - for entry in entries: - if entry.is_dir(follow_symlinks=False): - if not prefix and entry.name in self.INTERNAL_FOLDERS: - continue - new_prefix = (prefix + "/" + entry.name) if prefix else entry.name - if cursor_key is not None: - full_prefix = new_prefix + "/" - if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix: - continue - yield from _walk(Path(entry.path), new_prefix) - elif entry.is_file(follow_symlinks=False): - full_key = (prefix + "/" + entry.name) if prefix else entry.name - if cursor_key is not None and full_key <= cursor_key: - continue - yield full_key - - yield from _walk(bucket_path, "") - - def _check_corrupted_objects( - self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool, - cursor_key: Optional[str] = None, - ) -> Optional[str]: - if self._batch_exhausted(result): - return None - bucket_path = self.storage_root / bucket_name - meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR - - if not meta_root.exists(): - return None - - last_key = None - try: - all_keys = self._collect_index_keys(meta_root, cursor_key) - sorted_keys = sorted(all_keys.keys()) - - for full_key in sorted_keys: - if self._throttle(): - return last_key - if self._batch_exhausted(result): - return last_key - - info = all_keys[full_key] - entry = info["entry"] - index_file = info["index_file"] - key_name = info["key_name"] - - object_path = bucket_path / full_key - if not object_path.exists(): - continue - - result.objects_scanned += 1 - last_key = full_key - - meta = entry.get("metadata", {}) if isinstance(entry, dict) else {} - stored_etag = meta.get("__etag__") - if not stored_etag: - continue - - try: - actual_etag = _compute_etag(object_path) - except OSError: - continue - - if actual_etag != stored_etag: - result.corrupted_objects += 1 - issue = IntegrityIssue( - issue_type="corrupted_object", - bucket=bucket_name, - key=full_key, - detail=f"stored_etag={stored_etag} actual_etag={actual_etag}", - ) - - if auto_heal and not dry_run: - try: - stat = object_path.stat() - meta["__etag__"] = actual_etag - meta["__size__"] = str(stat.st_size) - meta["__last_modified__"] = str(stat.st_mtime) - try: - index_data = json.loads(index_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - index_data = {} - index_data[key_name] = {"metadata": meta} - self._atomic_write_index(index_file, index_data) - issue.healed = True - issue.heal_action = "updated etag in index" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}") - - self._add_issue(result, issue) - except OSError as e: - result.errors.append(f"check corrupted {bucket_name}: {e}") - return last_key - - def _check_orphaned_objects( - self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool, - cursor_key: Optional[str] = None, - ) -> Optional[str]: - if self._batch_exhausted(result): - return None - bucket_path = self.storage_root / bucket_name - meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR - - last_key = None - try: - for full_key in self._walk_bucket_files_sorted(bucket_path, cursor_key): - if self._throttle(): - return last_key - if self._batch_exhausted(result): - return last_key - - result.objects_scanned += 1 - last_key = full_key - key_path = Path(full_key) - key_name = key_path.name - parent = key_path.parent - - if parent == Path("."): - index_path = meta_root / "_index.json" - else: - index_path = meta_root / parent / "_index.json" - - has_entry = False - if index_path.exists(): - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - has_entry = key_name in index_data - except (OSError, json.JSONDecodeError): - pass - - if not has_entry: - result.orphaned_objects += 1 - issue = IntegrityIssue( - issue_type="orphaned_object", - bucket=bucket_name, - key=full_key, - detail="file exists without metadata entry", - ) - - if auto_heal and not dry_run: - try: - object_path = bucket_path / full_key - etag = _compute_etag(object_path) - stat = object_path.stat() - meta = { - "__etag__": etag, - "__size__": str(stat.st_size), - "__last_modified__": str(stat.st_mtime), - } - index_data = {} - if index_path.exists(): - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - pass - index_data[key_name] = {"metadata": meta} - self._atomic_write_index(index_path, index_data) - issue.healed = True - issue.heal_action = "created metadata entry" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}") - - self._add_issue(result, issue) - except OSError as e: - result.errors.append(f"check orphaned {bucket_name}: {e}") - return last_key - - def _check_phantom_metadata( - self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool, - cursor_key: Optional[str] = None, - ) -> Optional[str]: - if self._batch_exhausted(result): - return None - bucket_path = self.storage_root / bucket_name - meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR - - if not meta_root.exists(): - return None - - last_key = None - try: - all_keys = self._collect_index_keys(meta_root, cursor_key) - sorted_keys = sorted(all_keys.keys()) - - heal_by_index: Dict[Path, List[str]] = {} - - for full_key in sorted_keys: - if self._batch_exhausted(result): - break - - result.objects_scanned += 1 - last_key = full_key - - object_path = bucket_path / full_key - if not object_path.exists(): - result.phantom_metadata += 1 - info = all_keys[full_key] - issue = IntegrityIssue( - issue_type="phantom_metadata", - bucket=bucket_name, - key=full_key, - detail="metadata entry without file on disk", - ) - if auto_heal and not dry_run: - index_file = info["index_file"] - heal_by_index.setdefault(index_file, []).append(info["key_name"]) - issue.healed = True - issue.heal_action = "removed stale index entry" - result.issues_healed += 1 - self._add_issue(result, issue) - - if heal_by_index and auto_heal and not dry_run: - for index_file, keys_to_remove in heal_by_index.items(): - try: - index_data = json.loads(index_file.read_text(encoding="utf-8")) - for k in keys_to_remove: - index_data.pop(k, None) - if index_data: - self._atomic_write_index(index_file, index_data) - else: - index_file.unlink(missing_ok=True) - except OSError as e: - result.errors.append(f"heal phantom {bucket_name}: {e}") - except OSError as e: - result.errors.append(f"check phantom {bucket_name}: {e}") - return last_key - - def _check_stale_versions( - self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool - ) -> None: - if self._batch_exhausted(result): - return - versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR - - if not versions_root.exists(): - return - - try: - for key_dir in versions_root.rglob("*"): - if self._throttle(): - return - if self._batch_exhausted(result): - return - if not key_dir.is_dir(): - continue - - bin_files = {f.stem: f for f in key_dir.glob("*.bin")} - json_files = {f.stem: f for f in key_dir.glob("*.json")} - - for stem, bin_file in bin_files.items(): - if self._batch_exhausted(result): - return - result.objects_scanned += 1 - if stem not in json_files: - result.stale_versions += 1 - issue = IntegrityIssue( - issue_type="stale_version", - bucket=bucket_name, - key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}", - detail="version data without manifest", - ) - if auto_heal and not dry_run: - try: - bin_file.unlink(missing_ok=True) - issue.healed = True - issue.heal_action = "removed orphaned version data" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal stale version {bin_file}: {e}") - self._add_issue(result, issue) - - for stem, json_file in json_files.items(): - if self._batch_exhausted(result): - return - result.objects_scanned += 1 - if stem not in bin_files: - result.stale_versions += 1 - issue = IntegrityIssue( - issue_type="stale_version", - bucket=bucket_name, - key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}", - detail="version manifest without data", - ) - if auto_heal and not dry_run: - try: - json_file.unlink(missing_ok=True) - issue.healed = True - issue.heal_action = "removed orphaned version manifest" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal stale version {json_file}: {e}") - self._add_issue(result, issue) - except OSError as e: - result.errors.append(f"check stale versions {bucket_name}: {e}") - - def _check_etag_cache( - self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool - ) -> None: - if self._batch_exhausted(result): - return - etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json" - - if not etag_index_path.exists(): - return - - meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR - if not meta_root.exists(): - return - - try: - etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - return - - found_mismatch = False - - for full_key, cached_etag in etag_cache.items(): - if self._batch_exhausted(result): - break - result.objects_scanned += 1 - key_path = Path(full_key) - key_name = key_path.name - parent = key_path.parent - - if parent == Path("."): - index_path = meta_root / "_index.json" - else: - index_path = meta_root / parent / "_index.json" - - if not index_path.exists(): - continue - - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - continue - - entry = index_data.get(key_name) - if not entry: - continue - - meta = entry.get("metadata", {}) if isinstance(entry, dict) else {} - stored_etag = meta.get("__etag__") - - if stored_etag and cached_etag != stored_etag: - result.etag_cache_inconsistencies += 1 - found_mismatch = True - issue = IntegrityIssue( - issue_type="etag_cache_inconsistency", - bucket=bucket_name, - key=full_key, - detail=f"cached_etag={cached_etag} index_etag={stored_etag}", - ) - self._add_issue(result, issue) - - if found_mismatch and auto_heal and not dry_run: - try: - etag_index_path.unlink(missing_ok=True) - for issue in result.issues: - if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed: - issue.healed = True - issue.heal_action = "deleted etag_index.json" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal etag cache {bucket_name}: {e}") - - def _check_legacy_metadata( - self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool - ) -> None: - if self._batch_exhausted(result): - return - legacy_meta_root = self.storage_root / bucket_name / ".meta" - if not legacy_meta_root.exists(): - return - - meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR - - try: - for meta_file in legacy_meta_root.rglob("*.meta.json"): - if self._throttle(): - return - if self._batch_exhausted(result): - return - if not meta_file.is_file(): - continue - - result.objects_scanned += 1 - try: - rel = meta_file.relative_to(legacy_meta_root) - except ValueError: - continue - - full_key = rel.as_posix().removesuffix(".meta.json") - key_path = Path(full_key) - key_name = key_path.name - parent = key_path.parent - - if parent == Path("."): - index_path = meta_root / "_index.json" - else: - index_path = meta_root / parent / "_index.json" - - try: - legacy_data = json.loads(meta_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - continue - - index_entry = None - if index_path.exists(): - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - index_entry = index_data.get(key_name) - except (OSError, json.JSONDecodeError): - pass - - if index_entry is None: - result.legacy_metadata_drifts += 1 - issue = IntegrityIssue( - issue_type="legacy_metadata_drift", - bucket=bucket_name, - key=full_key, - detail="unmigrated legacy .meta.json", - ) - - if auto_heal and not dry_run: - try: - index_data = {} - if index_path.exists(): - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - pass - index_data[key_name] = {"metadata": legacy_data} - self._atomic_write_index(index_path, index_data) - meta_file.unlink(missing_ok=True) - issue.healed = True - issue.heal_action = "migrated to index and deleted legacy file" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}") - - self._add_issue(result, issue) - else: - index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {} - if legacy_data != index_meta: - result.legacy_metadata_drifts += 1 - issue = IntegrityIssue( - issue_type="legacy_metadata_drift", - bucket=bucket_name, - key=full_key, - detail="legacy .meta.json differs from index entry", - ) - - if auto_heal and not dry_run: - try: - meta_file.unlink(missing_ok=True) - issue.healed = True - issue.heal_action = "deleted legacy file (index is authoritative)" - result.issues_healed += 1 - except OSError as e: - result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}") - - self._add_issue(result, issue) - except OSError as e: - result.errors.append(f"check legacy meta {bucket_name}: {e}") - - @staticmethod - def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None: - index_path.parent.mkdir(parents=True, exist_ok=True) - tmp_path = index_path.with_suffix(".tmp") - try: - with open(tmp_path, "w", encoding="utf-8") as f: - json.dump(data, f) - os.replace(str(tmp_path), str(index_path)) - except BaseException: - try: - tmp_path.unlink(missing_ok=True) - except OSError: - pass - raise - - def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]: - records = self.history_store.get_history(limit, offset) - return [r.to_dict() for r in records] - - def get_status(self) -> dict: - status: Dict[str, Any] = { - "enabled": not self._shutdown or self._timer is not None, - "running": self._timer is not None and not self._shutdown, - "scanning": self._scanning, - "interval_hours": self.interval_seconds / 3600.0, - "batch_size": self.batch_size, - "auto_heal": self.auto_heal, - "dry_run": self.dry_run, - "io_throttle_ms": round(self._io_throttle * 1000), - } - if self._scanning and self._scan_start_time is not None: - status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1) - status["cursor"] = self.cursor_store.get_info() - return status diff --git a/app/kms.py b/app/kms.py deleted file mode 100644 index 3ac02b8..0000000 --- a/app/kms.py +++ /dev/null @@ -1,422 +0,0 @@ -from __future__ import annotations - -import base64 -import json -import logging -import os -import secrets -import subprocess -import sys -import uuid -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional - -from cryptography.hazmat.primitives.ciphers.aead import AESGCM - -from .encryption import EncryptionError, EncryptionProvider, EncryptionResult - -if sys.platform != "win32": - import fcntl - -logger = logging.getLogger(__name__) - - -def _set_secure_file_permissions(file_path: Path) -> None: - """Set restrictive file permissions (owner read/write only).""" - if sys.platform == "win32": - try: - username = os.environ.get("USERNAME", "") - if username: - subprocess.run( - ["icacls", str(file_path), "/inheritance:r", - "/grant:r", f"{username}:F"], - check=True, capture_output=True - ) - else: - logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path) - except (subprocess.SubprocessError, OSError) as exc: - logger.warning("Failed to set secure permissions on %s: %s", file_path, exc) - else: - os.chmod(file_path, 0o600) - - -@dataclass -class KMSKey: - """Represents a KMS encryption key.""" - key_id: str - description: str - created_at: str - enabled: bool = True - key_material: bytes = field(default_factory=lambda: b"", repr=False) - - @property - def arn(self) -> str: - return f"arn:aws:kms:local:000000000000:key/{self.key_id}" - - def to_dict(self, include_key: bool = False) -> Dict[str, Any]: - data = { - "KeyId": self.key_id, - "Arn": self.arn, - "Description": self.description, - "CreationDate": self.created_at, - "Enabled": self.enabled, - "KeyState": "Enabled" if self.enabled else "Disabled", - "KeyUsage": "ENCRYPT_DECRYPT", - "KeySpec": "SYMMETRIC_DEFAULT", - } - if include_key: - data["KeyMaterial"] = base64.b64encode(self.key_material).decode() - return data - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "KMSKey": - key_material = b"" - if "KeyMaterial" in data: - key_material = base64.b64decode(data["KeyMaterial"]) - return cls( - key_id=data["KeyId"], - description=data.get("Description", ""), - created_at=data.get("CreationDate", datetime.now(timezone.utc).isoformat()), - enabled=data.get("Enabled", True), - key_material=key_material, - ) - - -class KMSEncryptionProvider(EncryptionProvider): - """Encryption provider using a specific KMS key.""" - - def __init__(self, kms: "KMSManager", key_id: str): - self.kms = kms - self.key_id = key_id - - @property - def KEY_ID(self) -> str: - return self.key_id - - def generate_data_key(self) -> tuple[bytes, bytes]: - """Generate a data key encrypted with the KMS key.""" - return self.kms.generate_data_key(self.key_id) - - def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: - """Encrypt data using envelope encryption with KMS.""" - data_key, encrypted_data_key = self.generate_data_key() - - aesgcm = AESGCM(data_key) - nonce = secrets.token_bytes(12) - ciphertext = aesgcm.encrypt(nonce, plaintext, - json.dumps(context, sort_keys=True).encode() if context else None) - - return EncryptionResult( - ciphertext=ciphertext, - nonce=nonce, - key_id=self.key_id, - encrypted_data_key=encrypted_data_key, - ) - - def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, - key_id: str, context: Dict[str, str] | None = None) -> bytes: - """Decrypt data using envelope encryption with KMS.""" - data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None) - if len(data_key) != 32: - raise EncryptionError("Invalid data key size") - - aesgcm = AESGCM(data_key) - try: - return aesgcm.decrypt(nonce, ciphertext, - json.dumps(context, sort_keys=True).encode() if context else None) - except Exception as exc: - logger.debug("KMS decryption failed: %s", exc) - raise EncryptionError("Failed to decrypt data") from exc - - def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes: - """Decrypt an encrypted data key using KMS.""" - if key_id is None: - key_id = self.key_id - data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None) - if len(data_key) != 32: - raise EncryptionError("Invalid data key size") - return data_key - - -class KMSManager: - """Manages KMS keys and operations. - - This is a local implementation that mimics AWS KMS functionality. - Keys are stored encrypted on disk. - """ - - def __init__( - self, - keys_path: Path, - master_key_path: Path, - generate_data_key_min_bytes: int = 1, - generate_data_key_max_bytes: int = 1024, - ): - self.keys_path = keys_path - self.master_key_path = master_key_path - self.generate_data_key_min_bytes = generate_data_key_min_bytes - self.generate_data_key_max_bytes = generate_data_key_max_bytes - self._keys: Dict[str, KMSKey] = {} - self._master_key: bytes | None = None - self._master_aesgcm: AESGCM | None = None - self._loaded = False - - @property - def master_key(self) -> bytes: - """Load or create the master key for encrypting KMS keys (with file locking).""" - if self._master_key is None: - lock_path = self.master_key_path.with_suffix(".lock") - lock_path.parent.mkdir(parents=True, exist_ok=True) - with open(lock_path, "w") as lock_file: - if sys.platform == "win32": - import msvcrt - msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1) - else: - fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX) - try: - if self.master_key_path.exists(): - self._master_key = base64.b64decode( - self.master_key_path.read_text().strip() - ) - else: - self._master_key = secrets.token_bytes(32) - self.master_key_path.write_text( - base64.b64encode(self._master_key).decode() - ) - _set_secure_file_permissions(self.master_key_path) - finally: - if sys.platform == "win32": - import msvcrt - msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1) - else: - fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN) - self._master_aesgcm = AESGCM(self._master_key) - return self._master_key - - def _load_keys(self) -> None: - """Load keys from disk.""" - if self._loaded: - return - - if self.keys_path.exists(): - try: - data = json.loads(self.keys_path.read_text(encoding="utf-8")) - for key_data in data.get("keys", []): - key = KMSKey.from_dict(key_data) - if key_data.get("EncryptedKeyMaterial"): - encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"]) - key.key_material = self._decrypt_key_material(encrypted) - self._keys[key.key_id] = key - except json.JSONDecodeError as exc: - logger.error("Failed to parse KMS keys file: %s", exc) - except (ValueError, KeyError) as exc: - logger.error("Invalid KMS key data: %s", exc) - - self._loaded = True - - def _save_keys(self) -> None: - """Save keys to disk (with encrypted key material).""" - keys_data = [] - for key in self._keys.values(): - data = key.to_dict(include_key=False) - encrypted = self._encrypt_key_material(key.key_material) - data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode() - keys_data.append(data) - - self.keys_path.parent.mkdir(parents=True, exist_ok=True) - self.keys_path.write_text( - json.dumps({"keys": keys_data}, indent=2), - encoding="utf-8" - ) - _set_secure_file_permissions(self.keys_path) - - def _encrypt_key_material(self, key_material: bytes) -> bytes: - _ = self.master_key - nonce = secrets.token_bytes(12) - ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None) - return nonce + ciphertext - - def _decrypt_key_material(self, encrypted: bytes) -> bytes: - _ = self.master_key - nonce = encrypted[:12] - ciphertext = encrypted[12:] - return self._master_aesgcm.decrypt(nonce, ciphertext, None) - - def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey: - """Create a new KMS key.""" - self._load_keys() - - if key_id is None: - key_id = str(uuid.uuid4()) - - if key_id in self._keys: - raise EncryptionError(f"Key already exists: {key_id}") - - key = KMSKey( - key_id=key_id, - description=description, - created_at=datetime.now(timezone.utc).isoformat(), - enabled=True, - key_material=secrets.token_bytes(32), - ) - - self._keys[key_id] = key - self._save_keys() - return key - - def get_key(self, key_id: str) -> KMSKey | None: - """Get a key by ID.""" - self._load_keys() - return self._keys.get(key_id) - - def list_keys(self) -> List[KMSKey]: - """List all keys.""" - self._load_keys() - return list(self._keys.values()) - - def get_default_key_id(self) -> str: - """Get the default KMS key ID, creating one if none exist.""" - self._load_keys() - for key in self._keys.values(): - if key.enabled: - return key.key_id - default_key = self.create_key(description="Default KMS Key") - return default_key.key_id - - def get_provider(self, key_id: str | None = None) -> "KMSEncryptionProvider": - """Get a KMS encryption provider for the specified key.""" - if key_id is None: - key_id = self.get_default_key_id() - key = self.get_key(key_id) - if not key: - raise EncryptionError(f"Key not found: {key_id}") - if not key.enabled: - raise EncryptionError(f"Key is disabled: {key_id}") - return KMSEncryptionProvider(self, key_id) - - def enable_key(self, key_id: str) -> None: - """Enable a key.""" - self._load_keys() - key = self._keys.get(key_id) - if not key: - raise EncryptionError(f"Key not found: {key_id}") - key.enabled = True - self._save_keys() - - def disable_key(self, key_id: str) -> None: - """Disable a key.""" - self._load_keys() - key = self._keys.get(key_id) - if not key: - raise EncryptionError(f"Key not found: {key_id}") - key.enabled = False - self._save_keys() - - def delete_key(self, key_id: str) -> None: - """Delete a key (schedule for deletion in real KMS).""" - self._load_keys() - if key_id not in self._keys: - raise EncryptionError(f"Key not found: {key_id}") - del self._keys[key_id] - self._save_keys() - - def encrypt(self, key_id: str, plaintext: bytes, - context: Dict[str, str] | None = None) -> bytes: - """Encrypt data directly with a KMS key.""" - self._load_keys() - key = self._keys.get(key_id) - if not key: - raise EncryptionError(f"Key not found: {key_id}") - if not key.enabled: - raise EncryptionError(f"Key is disabled: {key_id}") - - aesgcm = AESGCM(key.key_material) - nonce = secrets.token_bytes(12) - aad = json.dumps(context, sort_keys=True).encode() if context else None - ciphertext = aesgcm.encrypt(nonce, plaintext, aad) - - key_id_bytes = key_id.encode("utf-8") - return len(key_id_bytes).to_bytes(2, "big") + key_id_bytes + nonce + ciphertext - - def decrypt(self, ciphertext: bytes, - context: Dict[str, str] | None = None) -> tuple[bytes, str]: - """Decrypt data directly with a KMS key. - - Returns: - Tuple of (plaintext, key_id) - """ - self._load_keys() - - key_id_len = int.from_bytes(ciphertext[:2], "big") - key_id = ciphertext[2:2 + key_id_len].decode("utf-8") - rest = ciphertext[2 + key_id_len:] - - key = self._keys.get(key_id) - if not key: - raise EncryptionError(f"Key not found: {key_id}") - if not key.enabled: - raise EncryptionError(f"Key is disabled: {key_id}") - - nonce = rest[:12] - encrypted = rest[12:] - - aesgcm = AESGCM(key.key_material) - aad = json.dumps(context, sort_keys=True).encode() if context else None - try: - plaintext = aesgcm.decrypt(nonce, encrypted, aad) - return plaintext, key_id - except Exception as exc: - logger.debug("KMS decrypt operation failed: %s", exc) - raise EncryptionError("Decryption failed") from exc - - def generate_data_key(self, key_id: str, - context: Dict[str, str] | None = None, - key_spec: str = "AES_256") -> tuple[bytes, bytes]: - """Generate a data key and return both plaintext and encrypted versions. - - Args: - key_id: The KMS key ID to use for encryption - context: Optional encryption context - key_spec: Key specification - AES_128 or AES_256 (default) - - Returns: - Tuple of (plaintext_key, encrypted_key) - """ - self._load_keys() - key = self._keys.get(key_id) - if not key: - raise EncryptionError(f"Key not found: {key_id}") - if not key.enabled: - raise EncryptionError(f"Key is disabled: {key_id}") - - key_bytes = 32 if key_spec == "AES_256" else 16 - plaintext_key = secrets.token_bytes(key_bytes) - - encrypted_key = self.encrypt(key_id, plaintext_key, context) - - return plaintext_key, encrypted_key - - def decrypt_data_key(self, key_id: str, encrypted_key: bytes, - context: Dict[str, str] | None = None) -> bytes: - """Decrypt a data key.""" - plaintext, _ = self.decrypt(encrypted_key, context) - return plaintext - - def re_encrypt(self, ciphertext: bytes, destination_key_id: str, - source_context: Dict[str, str] | None = None, - destination_context: Dict[str, str] | None = None) -> bytes: - """Re-encrypt data with a different key.""" - - plaintext, source_key_id = self.decrypt(ciphertext, source_context) - - return self.encrypt(destination_key_id, plaintext, destination_context) - - def generate_random(self, num_bytes: int = 32) -> bytes: - """Generate cryptographically secure random bytes.""" - if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes: - raise EncryptionError( - f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}" - ) - return secrets.token_bytes(num_bytes) diff --git a/app/kms_api.py b/app/kms_api.py deleted file mode 100644 index 332c012..0000000 --- a/app/kms_api.py +++ /dev/null @@ -1,444 +0,0 @@ -from __future__ import annotations - -import base64 -import uuid -from typing import Any, Dict - -from flask import Blueprint, Response, current_app, jsonify, request - -from .encryption import ClientEncryptionHelper, EncryptionError -from .extensions import limiter -from .iam import IamError - -kms_api_bp = Blueprint("kms_api", __name__, url_prefix="/kms") - - -def _require_principal(): - """Require authentication for KMS operations.""" - from .s3_api import _require_principal as s3_require_principal - return s3_require_principal() - - -def _kms(): - """Get KMS manager from app extensions.""" - return current_app.extensions.get("kms") - - -def _encryption(): - """Get encryption manager from app extensions.""" - return current_app.extensions.get("encryption") - - -def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]: - return {"__type": code, "message": message}, status - -@kms_api_bp.route("/keys", methods=["GET", "POST"]) -@limiter.limit("30 per minute") -def list_or_create_keys(): - """List all KMS keys or create a new key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - if request.method == "POST": - payload = request.get_json(silent=True) or {} - key_id = payload.get("KeyId") or payload.get("key_id") - description = payload.get("Description") or payload.get("description", "") - - try: - key = kms.create_key(description=description, key_id=key_id) - current_app.logger.info( - "KMS key created", - extra={"key_id": key.key_id, "principal": principal.access_key}, - ) - return jsonify({ - "KeyMetadata": key.to_dict(), - }) - except EncryptionError as exc: - return _error_response("KMSInternalException", str(exc), 400) - - keys = kms.list_keys() - return jsonify({ - "Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys], - "Truncated": False, - }) - - -@kms_api_bp.route("/keys/", methods=["GET", "DELETE"]) -@limiter.limit("30 per minute") -def get_or_delete_key(key_id: str): - """Get or delete a specific KMS key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - if request.method == "DELETE": - try: - kms.delete_key(key_id) - current_app.logger.info( - "KMS key deleted", - extra={"key_id": key_id, "principal": principal.access_key}, - ) - return Response(status=204) - except EncryptionError as exc: - return _error_response("NotFoundException", str(exc), 404) - - key = kms.get_key(key_id) - if not key: - return _error_response("NotFoundException", f"Key not found: {key_id}", 404) - - return jsonify({"KeyMetadata": key.to_dict()}) - - -@kms_api_bp.route("/keys//enable", methods=["POST"]) -@limiter.limit("30 per minute") -def enable_key(key_id: str): - """Enable a KMS key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - try: - kms.enable_key(key_id) - current_app.logger.info( - "KMS key enabled", - extra={"key_id": key_id, "principal": principal.access_key}, - ) - return Response(status=200) - except EncryptionError as exc: - return _error_response("NotFoundException", str(exc), 404) - - -@kms_api_bp.route("/keys//disable", methods=["POST"]) -@limiter.limit("30 per minute") -def disable_key(key_id: str): - """Disable a KMS key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - try: - kms.disable_key(key_id) - current_app.logger.info( - "KMS key disabled", - extra={"key_id": key_id, "principal": principal.access_key}, - ) - return Response(status=200) - except EncryptionError as exc: - return _error_response("NotFoundException", str(exc), 404) - -@kms_api_bp.route("/encrypt", methods=["POST"]) -@limiter.limit("60 per minute") -def encrypt_data(): - """Encrypt data using a KMS key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - key_id = payload.get("KeyId") - plaintext_b64 = payload.get("Plaintext") - context = payload.get("EncryptionContext") - - if not key_id: - return _error_response("ValidationException", "KeyId is required", 400) - if not plaintext_b64: - return _error_response("ValidationException", "Plaintext is required", 400) - - try: - plaintext = base64.b64decode(plaintext_b64) - except Exception: - return _error_response("ValidationException", "Plaintext must be base64 encoded", 400) - - try: - ciphertext = kms.encrypt(key_id, plaintext, context) - return jsonify({ - "CiphertextBlob": base64.b64encode(ciphertext).decode(), - "KeyId": key_id, - "EncryptionAlgorithm": "SYMMETRIC_DEFAULT", - }) - except EncryptionError as exc: - return _error_response("KMSInternalException", str(exc), 400) - - -@kms_api_bp.route("/decrypt", methods=["POST"]) -@limiter.limit("60 per minute") -def decrypt_data(): - """Decrypt data using a KMS key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - ciphertext_b64 = payload.get("CiphertextBlob") - context = payload.get("EncryptionContext") - - if not ciphertext_b64: - return _error_response("ValidationException", "CiphertextBlob is required", 400) - - try: - ciphertext = base64.b64decode(ciphertext_b64) - except Exception: - return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400) - - try: - plaintext, key_id = kms.decrypt(ciphertext, context) - return jsonify({ - "Plaintext": base64.b64encode(plaintext).decode(), - "KeyId": key_id, - "EncryptionAlgorithm": "SYMMETRIC_DEFAULT", - }) - except EncryptionError as exc: - return _error_response("InvalidCiphertextException", str(exc), 400) - - -@kms_api_bp.route("/generate-data-key", methods=["POST"]) -@limiter.limit("60 per minute") -def generate_data_key(): - """Generate a data encryption key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - key_id = payload.get("KeyId") - context = payload.get("EncryptionContext") - key_spec = payload.get("KeySpec", "AES_256") - - if not key_id: - return _error_response("ValidationException", "KeyId is required", 400) - - if key_spec not in {"AES_256", "AES_128"}: - return _error_response("ValidationException", "KeySpec must be AES_256 or AES_128", 400) - - try: - plaintext_key, encrypted_key = kms.generate_data_key(key_id, context) - - if key_spec == "AES_128": - plaintext_key = plaintext_key[:16] - - return jsonify({ - "Plaintext": base64.b64encode(plaintext_key).decode(), - "CiphertextBlob": base64.b64encode(encrypted_key).decode(), - "KeyId": key_id, - }) - except EncryptionError as exc: - return _error_response("KMSInternalException", str(exc), 400) - - -@kms_api_bp.route("/generate-data-key-without-plaintext", methods=["POST"]) -@limiter.limit("60 per minute") -def generate_data_key_without_plaintext(): - """Generate a data encryption key without returning the plaintext.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - key_id = payload.get("KeyId") - context = payload.get("EncryptionContext") - - if not key_id: - return _error_response("ValidationException", "KeyId is required", 400) - - try: - _, encrypted_key = kms.generate_data_key(key_id, context) - return jsonify({ - "CiphertextBlob": base64.b64encode(encrypted_key).decode(), - "KeyId": key_id, - }) - except EncryptionError as exc: - return _error_response("KMSInternalException", str(exc), 400) - - -@kms_api_bp.route("/re-encrypt", methods=["POST"]) -@limiter.limit("30 per minute") -def re_encrypt(): - """Re-encrypt data with a different key.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - ciphertext_b64 = payload.get("CiphertextBlob") - destination_key_id = payload.get("DestinationKeyId") - source_context = payload.get("SourceEncryptionContext") - destination_context = payload.get("DestinationEncryptionContext") - - if not ciphertext_b64: - return _error_response("ValidationException", "CiphertextBlob is required", 400) - if not destination_key_id: - return _error_response("ValidationException", "DestinationKeyId is required", 400) - - try: - ciphertext = base64.b64decode(ciphertext_b64) - except Exception: - return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400) - - try: - plaintext, source_key_id = kms.decrypt(ciphertext, source_context) - new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context) - - return jsonify({ - "CiphertextBlob": base64.b64encode(new_ciphertext).decode(), - "SourceKeyId": source_key_id, - "KeyId": destination_key_id, - }) - except EncryptionError as exc: - return _error_response("KMSInternalException", str(exc), 400) - - -@kms_api_bp.route("/generate-random", methods=["POST"]) -@limiter.limit("60 per minute") -def generate_random(): - """Generate random bytes.""" - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - num_bytes = payload.get("NumberOfBytes", 32) - - try: - num_bytes = int(num_bytes) - except (TypeError, ValueError): - return _error_response("ValidationException", "NumberOfBytes must be an integer", 400) - - try: - random_bytes = kms.generate_random(num_bytes) - return jsonify({ - "Plaintext": base64.b64encode(random_bytes).decode(), - }) - except EncryptionError as exc: - return _error_response("ValidationException", str(exc), 400) - -@kms_api_bp.route("/client/generate-key", methods=["POST"]) -@limiter.limit("30 per minute") -def generate_client_key(): - """Generate a client-side encryption key.""" - principal, error = _require_principal() - if error: - return error - - key_info = ClientEncryptionHelper.generate_client_key() - return jsonify(key_info) - - -@kms_api_bp.route("/client/encrypt", methods=["POST"]) -@limiter.limit("60 per minute") -def client_encrypt(): - """Encrypt data using client-side encryption.""" - principal, error = _require_principal() - if error: - return error - - payload = request.get_json(silent=True) or {} - plaintext_b64 = payload.get("Plaintext") - key_b64 = payload.get("Key") - - if not plaintext_b64 or not key_b64: - return _error_response("ValidationException", "Plaintext and Key are required", 400) - - try: - plaintext = base64.b64decode(plaintext_b64) - result = ClientEncryptionHelper.encrypt_with_key(plaintext, key_b64) - return jsonify(result) - except Exception as exc: - return _error_response("EncryptionError", str(exc), 400) - - -@kms_api_bp.route("/client/decrypt", methods=["POST"]) -@limiter.limit("60 per minute") -def client_decrypt(): - """Decrypt data using client-side encryption.""" - principal, error = _require_principal() - if error: - return error - - payload = request.get_json(silent=True) or {} - ciphertext_b64 = payload.get("Ciphertext") or payload.get("ciphertext") - nonce_b64 = payload.get("Nonce") or payload.get("nonce") - key_b64 = payload.get("Key") or payload.get("key") - - if not ciphertext_b64 or not nonce_b64 or not key_b64: - return _error_response("ValidationException", "Ciphertext, Nonce, and Key are required", 400) - - try: - plaintext = ClientEncryptionHelper.decrypt_with_key(ciphertext_b64, nonce_b64, key_b64) - return jsonify({ - "Plaintext": base64.b64encode(plaintext).decode(), - }) - except Exception as exc: - return _error_response("DecryptionError", str(exc), 400) - -@kms_api_bp.route("/materials/", methods=["POST"]) -@limiter.limit("60 per minute") -def get_encryption_materials(key_id: str): - """Get encryption materials for client-side S3 encryption. - - This is used by S3 encryption clients that want to use KMS for - key management but perform encryption client-side. - """ - principal, error = _require_principal() - if error: - return error - - kms = _kms() - if not kms: - return _error_response("KMSNotEnabled", "KMS is not configured", 400) - - payload = request.get_json(silent=True) or {} - context = payload.get("EncryptionContext") - - try: - plaintext_key, encrypted_key = kms.generate_data_key(key_id, context) - - return jsonify({ - "PlaintextKey": base64.b64encode(plaintext_key).decode(), - "EncryptedKey": base64.b64encode(encrypted_key).decode(), - "KeyId": key_id, - "Algorithm": "AES-256-GCM", - "KeyWrapAlgorithm": "kms", - }) - except EncryptionError as exc: - return _error_response("KMSInternalException", str(exc), 400) diff --git a/app/lifecycle.py b/app/lifecycle.py deleted file mode 100644 index ea2c262..0000000 --- a/app/lifecycle.py +++ /dev/null @@ -1,340 +0,0 @@ -from __future__ import annotations - -import json -import logging -import threading -import time -from dataclasses import dataclass, field -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional - -from .storage import ObjectStorage, StorageError - -logger = logging.getLogger(__name__) - - -@dataclass -class LifecycleResult: - bucket_name: str - objects_deleted: int = 0 - versions_deleted: int = 0 - uploads_aborted: int = 0 - errors: List[str] = field(default_factory=list) - execution_time_seconds: float = 0.0 - - -@dataclass -class LifecycleExecutionRecord: - timestamp: float - bucket_name: str - objects_deleted: int - versions_deleted: int - uploads_aborted: int - errors: List[str] - execution_time_seconds: float - - def to_dict(self) -> dict: - return { - "timestamp": self.timestamp, - "bucket_name": self.bucket_name, - "objects_deleted": self.objects_deleted, - "versions_deleted": self.versions_deleted, - "uploads_aborted": self.uploads_aborted, - "errors": self.errors, - "execution_time_seconds": self.execution_time_seconds, - } - - @classmethod - def from_dict(cls, data: dict) -> "LifecycleExecutionRecord": - return cls( - timestamp=data["timestamp"], - bucket_name=data["bucket_name"], - objects_deleted=data["objects_deleted"], - versions_deleted=data["versions_deleted"], - uploads_aborted=data["uploads_aborted"], - errors=data.get("errors", []), - execution_time_seconds=data["execution_time_seconds"], - ) - - @classmethod - def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord": - return cls( - timestamp=time.time(), - bucket_name=result.bucket_name, - objects_deleted=result.objects_deleted, - versions_deleted=result.versions_deleted, - uploads_aborted=result.uploads_aborted, - errors=result.errors.copy(), - execution_time_seconds=result.execution_time_seconds, - ) - - -class LifecycleHistoryStore: - def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None: - self.storage_root = storage_root - self.max_history_per_bucket = max_history_per_bucket - self._lock = threading.Lock() - - def _get_history_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json" - - def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]: - path = self._get_history_path(bucket_name) - if not path.exists(): - return [] - try: - with open(path, "r") as f: - data = json.load(f) - return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])] - except (OSError, ValueError, KeyError) as e: - logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}") - return [] - - def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None: - path = self._get_history_path(bucket_name) - path.parent.mkdir(parents=True, exist_ok=True) - data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]} - try: - with open(path, "w") as f: - json.dump(data, f, indent=2) - except OSError as e: - logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}") - - def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None: - with self._lock: - records = self.load_history(bucket_name) - records.insert(0, record) - self.save_history(bucket_name, records) - - def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]: - records = self.load_history(bucket_name) - return records[offset:offset + limit] - - -class LifecycleManager: - def __init__( - self, - storage: ObjectStorage, - interval_seconds: int = 3600, - storage_root: Optional[Path] = None, - max_history_per_bucket: int = 50, - ): - self.storage = storage - self.interval_seconds = interval_seconds - self.storage_root = storage_root - self._timer: Optional[threading.Timer] = None - self._shutdown = False - self._lock = threading.Lock() - self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None - - def start(self) -> None: - if self._timer is not None: - return - self._shutdown = False - self._schedule_next() - logger.info(f"Lifecycle manager started with interval {self.interval_seconds}s") - - def stop(self) -> None: - self._shutdown = True - if self._timer: - self._timer.cancel() - self._timer = None - logger.info("Lifecycle manager stopped") - - def _schedule_next(self) -> None: - if self._shutdown: - return - self._timer = threading.Timer(self.interval_seconds, self._run_enforcement) - self._timer.daemon = True - self._timer.start() - - def _run_enforcement(self) -> None: - if self._shutdown: - return - try: - self.enforce_all_buckets() - except Exception as e: - logger.error(f"Lifecycle enforcement failed: {e}") - finally: - self._schedule_next() - - def enforce_all_buckets(self) -> Dict[str, LifecycleResult]: - results = {} - try: - buckets = self.storage.list_buckets() - for bucket in buckets: - result = self.enforce_rules(bucket.name) - if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0: - results[bucket.name] = result - except StorageError as e: - logger.error(f"Failed to list buckets for lifecycle: {e}") - return results - - def enforce_rules(self, bucket_name: str) -> LifecycleResult: - start_time = time.time() - result = LifecycleResult(bucket_name=bucket_name) - - try: - lifecycle = self.storage.get_bucket_lifecycle(bucket_name) - if not lifecycle: - return result - - for rule in lifecycle: - if rule.get("Status") != "Enabled": - continue - rule_id = rule.get("ID", "unknown") - prefix = rule.get("Prefix", rule.get("Filter", {}).get("Prefix", "")) - - self._enforce_expiration(bucket_name, rule, prefix, result) - self._enforce_noncurrent_expiration(bucket_name, rule, prefix, result) - self._enforce_abort_multipart(bucket_name, rule, result) - - except StorageError as e: - result.errors.append(str(e)) - logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}") - - result.execution_time_seconds = time.time() - start_time - if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors: - logger.info( - f"Lifecycle enforcement for {bucket_name}: " - f"deleted={result.objects_deleted}, versions={result.versions_deleted}, " - f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s" - ) - if self.history_store: - record = LifecycleExecutionRecord.from_result(result) - self.history_store.add_record(bucket_name, record) - return result - - def _enforce_expiration( - self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult - ) -> None: - expiration = rule.get("Expiration", {}) - if not expiration: - return - - days = expiration.get("Days") - date_str = expiration.get("Date") - - if days: - cutoff = datetime.now(timezone.utc) - timedelta(days=days) - elif date_str: - try: - cutoff = datetime.fromisoformat(date_str.replace("Z", "+00:00")) - except ValueError: - return - else: - return - - try: - objects = self.storage.list_objects_all(bucket_name) - for obj in objects: - if prefix and not obj.key.startswith(prefix): - continue - if obj.last_modified < cutoff: - try: - self.storage.delete_object(bucket_name, obj.key) - result.objects_deleted += 1 - except StorageError as e: - result.errors.append(f"Failed to delete {obj.key}: {e}") - except StorageError as e: - result.errors.append(f"Failed to list objects: {e}") - - def _enforce_noncurrent_expiration( - self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult - ) -> None: - noncurrent = rule.get("NoncurrentVersionExpiration", {}) - noncurrent_days = noncurrent.get("NoncurrentDays") - if not noncurrent_days: - return - - cutoff = datetime.now(timezone.utc) - timedelta(days=noncurrent_days) - - try: - objects = self.storage.list_objects_all(bucket_name) - for obj in objects: - if prefix and not obj.key.startswith(prefix): - continue - try: - versions = self.storage.list_object_versions(bucket_name, obj.key) - for version in versions: - archived_at_str = version.get("archived_at", "") - if not archived_at_str: - continue - try: - archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00")) - if archived_at < cutoff: - version_id = version.get("version_id") - if version_id: - self.storage.delete_object_version(bucket_name, obj.key, version_id) - result.versions_deleted += 1 - except (ValueError, StorageError) as e: - result.errors.append(f"Failed to process version: {e}") - except StorageError: - pass - except StorageError as e: - result.errors.append(f"Failed to list objects: {e}") - - try: - orphaned = self.storage.list_orphaned_objects(bucket_name) - for item in orphaned: - obj_key = item.get("key", "") - if prefix and not obj_key.startswith(prefix): - continue - try: - versions = self.storage.list_object_versions(bucket_name, obj_key) - for version in versions: - archived_at_str = version.get("archived_at", "") - if not archived_at_str: - continue - try: - archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00")) - if archived_at < cutoff: - version_id = version.get("version_id") - if version_id: - self.storage.delete_object_version(bucket_name, obj_key, version_id) - result.versions_deleted += 1 - except (ValueError, StorageError) as e: - result.errors.append(f"Failed to process orphaned version: {e}") - except StorageError: - pass - except StorageError as e: - result.errors.append(f"Failed to list orphaned objects: {e}") - - def _enforce_abort_multipart( - self, bucket_name: str, rule: Dict[str, Any], result: LifecycleResult - ) -> None: - abort_config = rule.get("AbortIncompleteMultipartUpload", {}) - days_after = abort_config.get("DaysAfterInitiation") - if not days_after: - return - - cutoff = datetime.now(timezone.utc) - timedelta(days=days_after) - - try: - uploads = self.storage.list_multipart_uploads(bucket_name) - for upload in uploads: - created_at_str = upload.get("created_at", "") - if not created_at_str: - continue - try: - created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00")) - if created_at < cutoff: - upload_id = upload.get("upload_id") - if upload_id: - self.storage.abort_multipart_upload(bucket_name, upload_id) - result.uploads_aborted += 1 - except (ValueError, StorageError) as e: - result.errors.append(f"Failed to abort upload: {e}") - except StorageError as e: - result.errors.append(f"Failed to list multipart uploads: {e}") - - def run_now(self, bucket_name: Optional[str] = None) -> Dict[str, LifecycleResult]: - if bucket_name: - return {bucket_name: self.enforce_rules(bucket_name)} - return self.enforce_all_buckets() - - def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]: - if not self.history_store: - return [] - return self.history_store.get_history(bucket_name, limit, offset) diff --git a/app/notifications.py b/app/notifications.py deleted file mode 100644 index ee03ba8..0000000 --- a/app/notifications.py +++ /dev/null @@ -1,406 +0,0 @@ -from __future__ import annotations - -import ipaddress -import json -import logging -import queue -import socket -import threading -import time -import uuid -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional -from urllib.parse import urlparse - -import requests -from urllib3.util.connection import create_connection as _urllib3_create_connection - - -def _resolve_and_check_url(url: str, allow_internal: bool = False) -> Optional[str]: - try: - parsed = urlparse(url) - hostname = parsed.hostname - if not hostname: - return None - cloud_metadata_hosts = { - "metadata.google.internal", - "169.254.169.254", - } - if hostname.lower() in cloud_metadata_hosts: - return None - if allow_internal: - return hostname - blocked_hosts = { - "localhost", - "127.0.0.1", - "0.0.0.0", - "::1", - "[::1]", - } - if hostname.lower() in blocked_hosts: - return None - try: - resolved_ip = socket.gethostbyname(hostname) - ip = ipaddress.ip_address(resolved_ip) - if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: - return None - return resolved_ip - except (socket.gaierror, ValueError): - return None - except Exception: - return None - - -def _is_safe_url(url: str, allow_internal: bool = False) -> bool: - return _resolve_and_check_url(url, allow_internal) is not None - - -_dns_pin_lock = threading.Lock() - - -def _pinned_post(url: str, pinned_ip: str, **kwargs: Any) -> requests.Response: - parsed = urlparse(url) - hostname = parsed.hostname or "" - session = requests.Session() - original_create = _urllib3_create_connection - - def _create_pinned(address: Any, *args: Any, **kw: Any) -> Any: - host, req_port = address - if host == hostname: - return original_create((pinned_ip, req_port), *args, **kw) - return original_create(address, *args, **kw) - - import urllib3.util.connection as _conn_mod - with _dns_pin_lock: - _conn_mod.create_connection = _create_pinned - try: - return session.post(url, **kwargs) - finally: - _conn_mod.create_connection = original_create - - -logger = logging.getLogger(__name__) - - -@dataclass -class NotificationEvent: - event_name: str - bucket_name: str - object_key: str - object_size: int = 0 - etag: str = "" - version_id: Optional[str] = None - timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) - request_id: str = field(default_factory=lambda: uuid.uuid4().hex) - source_ip: str = "" - user_identity: str = "" - - def to_s3_event(self) -> Dict[str, Any]: - return { - "Records": [ - { - "eventVersion": "2.1", - "eventSource": "myfsio:s3", - "awsRegion": "local", - "eventTime": self.timestamp.strftime("%Y-%m-%dT%H:%M:%S.000Z"), - "eventName": self.event_name, - "userIdentity": { - "principalId": self.user_identity or "ANONYMOUS", - }, - "requestParameters": { - "sourceIPAddress": self.source_ip or "127.0.0.1", - }, - "responseElements": { - "x-amz-request-id": self.request_id, - "x-amz-id-2": self.request_id, - }, - "s3": { - "s3SchemaVersion": "1.0", - "configurationId": "notification", - "bucket": { - "name": self.bucket_name, - "ownerIdentity": {"principalId": "local"}, - "arn": f"arn:aws:s3:::{self.bucket_name}", - }, - "object": { - "key": self.object_key, - "size": self.object_size, - "eTag": self.etag, - "versionId": self.version_id or "null", - "sequencer": f"{int(time.time() * 1000):016X}", - }, - }, - } - ] - } - - -@dataclass -class WebhookDestination: - url: str - headers: Dict[str, str] = field(default_factory=dict) - timeout_seconds: int = 30 - retry_count: int = 3 - retry_delay_seconds: int = 1 - - def to_dict(self) -> Dict[str, Any]: - return { - "url": self.url, - "headers": self.headers, - "timeout_seconds": self.timeout_seconds, - "retry_count": self.retry_count, - "retry_delay_seconds": self.retry_delay_seconds, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "WebhookDestination": - return cls( - url=data.get("url", ""), - headers=data.get("headers", {}), - timeout_seconds=data.get("timeout_seconds", 30), - retry_count=data.get("retry_count", 3), - retry_delay_seconds=data.get("retry_delay_seconds", 1), - ) - - -@dataclass -class NotificationConfiguration: - id: str - events: List[str] - destination: WebhookDestination - prefix_filter: str = "" - suffix_filter: str = "" - - def matches_event(self, event_name: str, object_key: str) -> bool: - event_match = False - for pattern in self.events: - if pattern.endswith("*"): - base = pattern[:-1] - if event_name.startswith(base): - event_match = True - break - elif pattern == event_name: - event_match = True - break - - if not event_match: - return False - - if self.prefix_filter and not object_key.startswith(self.prefix_filter): - return False - if self.suffix_filter and not object_key.endswith(self.suffix_filter): - return False - - return True - - def to_dict(self) -> Dict[str, Any]: - return { - "Id": self.id, - "Events": self.events, - "Destination": self.destination.to_dict(), - "Filter": { - "Key": { - "FilterRules": [ - {"Name": "prefix", "Value": self.prefix_filter}, - {"Name": "suffix", "Value": self.suffix_filter}, - ] - } - }, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "NotificationConfiguration": - prefix = "" - suffix = "" - filter_data = data.get("Filter", {}) - key_filter = filter_data.get("Key", {}) - for rule in key_filter.get("FilterRules", []): - if rule.get("Name") == "prefix": - prefix = rule.get("Value", "") - elif rule.get("Name") == "suffix": - suffix = rule.get("Value", "") - - return cls( - id=data.get("Id", uuid.uuid4().hex), - events=data.get("Events", []), - destination=WebhookDestination.from_dict(data.get("Destination", {})), - prefix_filter=prefix, - suffix_filter=suffix, - ) - - -class NotificationService: - def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False): - self.storage_root = storage_root - self._allow_internal_endpoints = allow_internal_endpoints - self._configs: Dict[str, List[NotificationConfiguration]] = {} - self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue() - self._workers: List[threading.Thread] = [] - self._shutdown = threading.Event() - self._stats = { - "events_queued": 0, - "events_sent": 0, - "events_failed": 0, - } - - for i in range(worker_count): - worker = threading.Thread(target=self._worker_loop, name=f"notification-worker-{i}", daemon=True) - worker.start() - self._workers.append(worker) - - def _config_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "notifications.json" - - def get_bucket_notifications(self, bucket_name: str) -> List[NotificationConfiguration]: - if bucket_name in self._configs: - return self._configs[bucket_name] - - config_path = self._config_path(bucket_name) - if not config_path.exists(): - return [] - - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - configs = [NotificationConfiguration.from_dict(c) for c in data.get("configurations", [])] - self._configs[bucket_name] = configs - return configs - except (json.JSONDecodeError, OSError) as e: - logger.warning(f"Failed to load notification config for {bucket_name}: {e}") - return [] - - def set_bucket_notifications( - self, bucket_name: str, configurations: List[NotificationConfiguration] - ) -> None: - config_path = self._config_path(bucket_name) - config_path.parent.mkdir(parents=True, exist_ok=True) - - data = {"configurations": [c.to_dict() for c in configurations]} - config_path.write_text(json.dumps(data, indent=2), encoding="utf-8") - self._configs[bucket_name] = configurations - - def delete_bucket_notifications(self, bucket_name: str) -> None: - config_path = self._config_path(bucket_name) - try: - if config_path.exists(): - config_path.unlink() - except OSError: - pass - self._configs.pop(bucket_name, None) - - def emit_event(self, event: NotificationEvent) -> None: - configurations = self.get_bucket_notifications(event.bucket_name) - if not configurations: - return - - for config in configurations: - if config.matches_event(event.event_name, event.object_key): - self._queue.put((event, config.destination)) - self._stats["events_queued"] += 1 - logger.debug( - f"Queued notification for {event.event_name} on {event.bucket_name}/{event.object_key}" - ) - - def emit_object_created( - self, - bucket_name: str, - object_key: str, - *, - size: int = 0, - etag: str = "", - version_id: Optional[str] = None, - request_id: str = "", - source_ip: str = "", - user_identity: str = "", - operation: str = "Put", - ) -> None: - event = NotificationEvent( - event_name=f"s3:ObjectCreated:{operation}", - bucket_name=bucket_name, - object_key=object_key, - object_size=size, - etag=etag, - version_id=version_id, - request_id=request_id or uuid.uuid4().hex, - source_ip=source_ip, - user_identity=user_identity, - ) - self.emit_event(event) - - def emit_object_removed( - self, - bucket_name: str, - object_key: str, - *, - version_id: Optional[str] = None, - request_id: str = "", - source_ip: str = "", - user_identity: str = "", - operation: str = "Delete", - ) -> None: - event = NotificationEvent( - event_name=f"s3:ObjectRemoved:{operation}", - bucket_name=bucket_name, - object_key=object_key, - version_id=version_id, - request_id=request_id or uuid.uuid4().hex, - source_ip=source_ip, - user_identity=user_identity, - ) - self.emit_event(event) - - def _worker_loop(self) -> None: - while not self._shutdown.is_set(): - try: - event, destination = self._queue.get(timeout=1.0) - except queue.Empty: - continue - - try: - self._send_notification(event, destination) - self._stats["events_sent"] += 1 - except Exception as e: - self._stats["events_failed"] += 1 - logger.error(f"Failed to send notification: {e}") - finally: - self._queue.task_done() - - def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None: - resolved_ip = _resolve_and_check_url(destination.url, allow_internal=self._allow_internal_endpoints) - if not resolved_ip: - raise RuntimeError(f"Blocked request (SSRF protection): {destination.url}") - payload = event.to_s3_event() - headers = {"Content-Type": "application/json", **destination.headers} - - last_error = None - for attempt in range(destination.retry_count): - try: - response = _pinned_post( - destination.url, - resolved_ip, - json=payload, - headers=headers, - timeout=destination.timeout_seconds, - ) - if response.status_code < 400: - logger.info( - f"Notification sent: {event.event_name} -> {destination.url} (status={response.status_code})" - ) - return - last_error = f"HTTP {response.status_code}: {response.text[:200]}" - except requests.RequestException as e: - last_error = str(e) - - if attempt < destination.retry_count - 1: - time.sleep(destination.retry_delay_seconds * (attempt + 1)) - - raise RuntimeError(f"Failed after {destination.retry_count} attempts: {last_error}") - - def get_stats(self) -> Dict[str, int]: - return dict(self._stats) - - def shutdown(self) -> None: - self._shutdown.set() - for worker in self._workers: - worker.join(timeout=5.0) diff --git a/app/object_lock.py b/app/object_lock.py deleted file mode 100644 index a5aab2c..0000000 --- a/app/object_lock.py +++ /dev/null @@ -1,234 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import dataclass -from datetime import datetime, timezone -from enum import Enum -from pathlib import Path -from typing import Any, Dict, Optional - - -class RetentionMode(Enum): - GOVERNANCE = "GOVERNANCE" - COMPLIANCE = "COMPLIANCE" - - -class ObjectLockError(Exception): - pass - - -@dataclass -class ObjectLockRetention: - mode: RetentionMode - retain_until_date: datetime - - def to_dict(self) -> Dict[str, str]: - return { - "Mode": self.mode.value, - "RetainUntilDate": self.retain_until_date.isoformat(), - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> Optional["ObjectLockRetention"]: - if not data: - return None - mode_str = data.get("Mode") - date_str = data.get("RetainUntilDate") - if not mode_str or not date_str: - return None - try: - mode = RetentionMode(mode_str) - retain_until = datetime.fromisoformat(date_str.replace("Z", "+00:00")) - return cls(mode=mode, retain_until_date=retain_until) - except (ValueError, KeyError): - return None - - def is_expired(self) -> bool: - return datetime.now(timezone.utc) > self.retain_until_date - - -@dataclass -class ObjectLockConfig: - enabled: bool = False - default_retention: Optional[ObjectLockRetention] = None - - def to_dict(self) -> Dict[str, Any]: - result: Dict[str, Any] = {"ObjectLockEnabled": "Enabled" if self.enabled else "Disabled"} - if self.default_retention: - result["Rule"] = { - "DefaultRetention": { - "Mode": self.default_retention.mode.value, - "Days": None, - "Years": None, - } - } - return result - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ObjectLockConfig": - enabled = data.get("ObjectLockEnabled") == "Enabled" - default_retention = None - rule = data.get("Rule") - if rule and "DefaultRetention" in rule: - dr = rule["DefaultRetention"] - mode_str = dr.get("Mode", "GOVERNANCE") - days = dr.get("Days") - years = dr.get("Years") - if days or years: - from datetime import timedelta - now = datetime.now(timezone.utc) - if years: - delta = timedelta(days=int(years) * 365) - else: - delta = timedelta(days=int(days)) - default_retention = ObjectLockRetention( - mode=RetentionMode(mode_str), - retain_until_date=now + delta, - ) - return cls(enabled=enabled, default_retention=default_retention) - - -class ObjectLockService: - def __init__(self, storage_root: Path): - self.storage_root = storage_root - self._config_cache: Dict[str, ObjectLockConfig] = {} - - def _bucket_lock_config_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "object_lock.json" - - def _object_lock_meta_path(self, bucket_name: str, object_key: str) -> Path: - safe_key = object_key.replace("/", "_").replace("\\", "_") - return ( - self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / - "locks" / f"{safe_key}.lock.json" - ) - - def get_bucket_lock_config(self, bucket_name: str) -> ObjectLockConfig: - if bucket_name in self._config_cache: - return self._config_cache[bucket_name] - - config_path = self._bucket_lock_config_path(bucket_name) - if not config_path.exists(): - return ObjectLockConfig(enabled=False) - - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - config = ObjectLockConfig.from_dict(data) - self._config_cache[bucket_name] = config - return config - except (json.JSONDecodeError, OSError): - return ObjectLockConfig(enabled=False) - - def set_bucket_lock_config(self, bucket_name: str, config: ObjectLockConfig) -> None: - config_path = self._bucket_lock_config_path(bucket_name) - config_path.parent.mkdir(parents=True, exist_ok=True) - config_path.write_text(json.dumps(config.to_dict()), encoding="utf-8") - self._config_cache[bucket_name] = config - - def enable_bucket_lock(self, bucket_name: str) -> None: - config = self.get_bucket_lock_config(bucket_name) - config.enabled = True - self.set_bucket_lock_config(bucket_name, config) - - def is_bucket_lock_enabled(self, bucket_name: str) -> bool: - return self.get_bucket_lock_config(bucket_name).enabled - - def get_object_retention(self, bucket_name: str, object_key: str) -> Optional[ObjectLockRetention]: - meta_path = self._object_lock_meta_path(bucket_name, object_key) - if not meta_path.exists(): - return None - try: - data = json.loads(meta_path.read_text(encoding="utf-8")) - return ObjectLockRetention.from_dict(data.get("retention", {})) - except (json.JSONDecodeError, OSError): - return None - - def set_object_retention( - self, - bucket_name: str, - object_key: str, - retention: ObjectLockRetention, - bypass_governance: bool = False, - ) -> None: - existing = self.get_object_retention(bucket_name, object_key) - if existing and not existing.is_expired(): - if existing.mode == RetentionMode.COMPLIANCE: - raise ObjectLockError( - "Cannot modify retention on object with COMPLIANCE mode until retention expires" - ) - if existing.mode == RetentionMode.GOVERNANCE and not bypass_governance: - raise ObjectLockError( - "Cannot modify GOVERNANCE retention without bypass-governance permission" - ) - - meta_path = self._object_lock_meta_path(bucket_name, object_key) - meta_path.parent.mkdir(parents=True, exist_ok=True) - - existing_data: Dict[str, Any] = {} - if meta_path.exists(): - try: - existing_data = json.loads(meta_path.read_text(encoding="utf-8")) - except (json.JSONDecodeError, OSError): - pass - - existing_data["retention"] = retention.to_dict() - meta_path.write_text(json.dumps(existing_data), encoding="utf-8") - - def get_legal_hold(self, bucket_name: str, object_key: str) -> bool: - meta_path = self._object_lock_meta_path(bucket_name, object_key) - if not meta_path.exists(): - return False - try: - data = json.loads(meta_path.read_text(encoding="utf-8")) - return data.get("legal_hold", False) - except (json.JSONDecodeError, OSError): - return False - - def set_legal_hold(self, bucket_name: str, object_key: str, enabled: bool) -> None: - meta_path = self._object_lock_meta_path(bucket_name, object_key) - meta_path.parent.mkdir(parents=True, exist_ok=True) - - existing_data: Dict[str, Any] = {} - if meta_path.exists(): - try: - existing_data = json.loads(meta_path.read_text(encoding="utf-8")) - except (json.JSONDecodeError, OSError): - pass - - existing_data["legal_hold"] = enabled - meta_path.write_text(json.dumps(existing_data), encoding="utf-8") - - def can_delete_object( - self, - bucket_name: str, - object_key: str, - bypass_governance: bool = False, - ) -> tuple[bool, str]: - if self.get_legal_hold(bucket_name, object_key): - return False, "Object is under legal hold" - - retention = self.get_object_retention(bucket_name, object_key) - if retention and not retention.is_expired(): - if retention.mode == RetentionMode.COMPLIANCE: - return False, f"Object is locked in COMPLIANCE mode until {retention.retain_until_date.isoformat()}" - if retention.mode == RetentionMode.GOVERNANCE: - if not bypass_governance: - return False, f"Object is locked in GOVERNANCE mode until {retention.retain_until_date.isoformat()}" - - return True, "" - - def can_overwrite_object( - self, - bucket_name: str, - object_key: str, - bypass_governance: bool = False, - ) -> tuple[bool, str]: - return self.can_delete_object(bucket_name, object_key, bypass_governance) - - def delete_object_lock_metadata(self, bucket_name: str, object_key: str) -> None: - meta_path = self._object_lock_meta_path(bucket_name, object_key) - try: - if meta_path.exists(): - meta_path.unlink() - except OSError: - pass diff --git a/app/operation_metrics.py b/app/operation_metrics.py deleted file mode 100644 index 0917d8e..0000000 --- a/app/operation_metrics.py +++ /dev/null @@ -1,296 +0,0 @@ -from __future__ import annotations - -import json -import logging -import random -import threading -import time -from collections import defaultdict -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional - -MAX_LATENCY_SAMPLES = 5000 - -logger = logging.getLogger(__name__) - - -@dataclass -class OperationStats: - count: int = 0 - success_count: int = 0 - error_count: int = 0 - latency_sum_ms: float = 0.0 - latency_min_ms: float = float("inf") - latency_max_ms: float = 0.0 - bytes_in: int = 0 - bytes_out: int = 0 - latency_samples: List[float] = field(default_factory=list) - - @staticmethod - def _compute_percentile(sorted_data: List[float], p: float) -> float: - if not sorted_data: - return 0.0 - k = (len(sorted_data) - 1) * (p / 100.0) - f = int(k) - c = min(f + 1, len(sorted_data) - 1) - d = k - f - return sorted_data[f] + d * (sorted_data[c] - sorted_data[f]) - - def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None: - self.count += 1 - if success: - self.success_count += 1 - else: - self.error_count += 1 - self.latency_sum_ms += latency_ms - if latency_ms < self.latency_min_ms: - self.latency_min_ms = latency_ms - if latency_ms > self.latency_max_ms: - self.latency_max_ms = latency_ms - self.bytes_in += bytes_in - self.bytes_out += bytes_out - if len(self.latency_samples) < MAX_LATENCY_SAMPLES: - self.latency_samples.append(latency_ms) - else: - j = random.randint(0, self.count - 1) - if j < MAX_LATENCY_SAMPLES: - self.latency_samples[j] = latency_ms - - def to_dict(self) -> Dict[str, Any]: - avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0 - min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0 - sorted_latencies = sorted(self.latency_samples) - return { - "count": self.count, - "success_count": self.success_count, - "error_count": self.error_count, - "latency_avg_ms": round(avg_latency, 2), - "latency_min_ms": round(min_latency, 2), - "latency_max_ms": round(self.latency_max_ms, 2), - "latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2), - "latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2), - "latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2), - "bytes_in": self.bytes_in, - "bytes_out": self.bytes_out, - } - - def merge(self, other: "OperationStats") -> None: - self.count += other.count - self.success_count += other.success_count - self.error_count += other.error_count - self.latency_sum_ms += other.latency_sum_ms - if other.latency_min_ms < self.latency_min_ms: - self.latency_min_ms = other.latency_min_ms - if other.latency_max_ms > self.latency_max_ms: - self.latency_max_ms = other.latency_max_ms - self.bytes_in += other.bytes_in - self.bytes_out += other.bytes_out - combined = self.latency_samples + other.latency_samples - if len(combined) > MAX_LATENCY_SAMPLES: - random.shuffle(combined) - combined = combined[:MAX_LATENCY_SAMPLES] - self.latency_samples = combined - - -@dataclass -class MetricsSnapshot: - timestamp: datetime - window_seconds: int - by_method: Dict[str, Dict[str, Any]] - by_endpoint: Dict[str, Dict[str, Any]] - by_status_class: Dict[str, int] - error_codes: Dict[str, int] - totals: Dict[str, Any] - - def to_dict(self) -> Dict[str, Any]: - return { - "timestamp": self.timestamp.isoformat(), - "window_seconds": self.window_seconds, - "by_method": self.by_method, - "by_endpoint": self.by_endpoint, - "by_status_class": self.by_status_class, - "error_codes": self.error_codes, - "totals": self.totals, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot": - return cls( - timestamp=datetime.fromisoformat(data["timestamp"]), - window_seconds=data.get("window_seconds", 300), - by_method=data.get("by_method", {}), - by_endpoint=data.get("by_endpoint", {}), - by_status_class=data.get("by_status_class", {}), - error_codes=data.get("error_codes", {}), - totals=data.get("totals", {}), - ) - - -class OperationMetricsCollector: - def __init__( - self, - storage_root: Path, - interval_minutes: int = 5, - retention_hours: int = 24, - ): - self.storage_root = storage_root - self.interval_seconds = interval_minutes * 60 - self.retention_hours = retention_hours - self._lock = threading.Lock() - self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats) - self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats) - self._by_status_class: Dict[str, int] = {} - self._error_codes: Dict[str, int] = {} - self._totals = OperationStats() - self._window_start = time.time() - self._shutdown = threading.Event() - self._snapshots: List[MetricsSnapshot] = [] - - self._load_history() - - self._snapshot_thread = threading.Thread( - target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True - ) - self._snapshot_thread.start() - - def _config_path(self) -> Path: - return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json" - - def _load_history(self) -> None: - config_path = self._config_path() - if not config_path.exists(): - return - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - snapshots_data = data.get("snapshots", []) - self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data] - self._prune_old_snapshots() - except (json.JSONDecodeError, OSError, KeyError) as e: - logger.warning(f"Failed to load operation metrics history: {e}") - - def _save_history(self) -> None: - config_path = self._config_path() - config_path.parent.mkdir(parents=True, exist_ok=True) - try: - data = {"snapshots": [s.to_dict() for s in self._snapshots]} - config_path.write_text(json.dumps(data, indent=2), encoding="utf-8") - except OSError as e: - logger.warning(f"Failed to save operation metrics history: {e}") - - def _prune_old_snapshots(self) -> None: - if not self._snapshots: - return - cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600) - self._snapshots = [ - s for s in self._snapshots if s.timestamp.timestamp() > cutoff - ] - - def _snapshot_loop(self) -> None: - while not self._shutdown.is_set(): - self._shutdown.wait(timeout=self.interval_seconds) - if not self._shutdown.is_set(): - self._take_snapshot() - - def _take_snapshot(self) -> None: - with self._lock: - now = datetime.now(timezone.utc) - window_seconds = int(time.time() - self._window_start) - - snapshot = MetricsSnapshot( - timestamp=now, - window_seconds=window_seconds, - by_method={k: v.to_dict() for k, v in self._by_method.items()}, - by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()}, - by_status_class=dict(self._by_status_class), - error_codes=dict(self._error_codes), - totals=self._totals.to_dict(), - ) - - self._snapshots.append(snapshot) - self._prune_old_snapshots() - self._save_history() - - self._by_method = defaultdict(OperationStats) - self._by_endpoint = defaultdict(OperationStats) - self._by_status_class.clear() - self._error_codes.clear() - self._totals = OperationStats() - self._window_start = time.time() - - def record_request( - self, - method: str, - endpoint_type: str, - status_code: int, - latency_ms: float, - bytes_in: int = 0, - bytes_out: int = 0, - error_code: Optional[str] = None, - ) -> None: - success = 200 <= status_code < 400 - status_class = f"{status_code // 100}xx" - - with self._lock: - self._by_method[method].record(latency_ms, success, bytes_in, bytes_out) - self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out) - - self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1 - - if error_code: - self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1 - - self._totals.record(latency_ms, success, bytes_in, bytes_out) - - def get_current_stats(self) -> Dict[str, Any]: - with self._lock: - window_seconds = int(time.time() - self._window_start) - return { - "timestamp": datetime.now(timezone.utc).isoformat(), - "window_seconds": window_seconds, - "by_method": {k: v.to_dict() for k, v in self._by_method.items()}, - "by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()}, - "by_status_class": dict(self._by_status_class), - "error_codes": dict(self._error_codes), - "totals": self._totals.to_dict(), - } - - def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]: - with self._lock: - snapshots = list(self._snapshots) - - if hours: - cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600) - snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff] - - return [s.to_dict() for s in snapshots] - - def shutdown(self) -> None: - self._shutdown.set() - self._take_snapshot() - self._snapshot_thread.join(timeout=5.0) - - -def classify_endpoint(path: str) -> str: - if not path or path == "/": - return "service" - - path = path.rstrip("/") - - if path.startswith("/ui"): - return "ui" - - if path.startswith("/kms"): - return "kms" - - if path.startswith("/myfsio"): - return "service" - - parts = path.lstrip("/").split("/") - if len(parts) == 0: - return "service" - elif len(parts) == 1: - return "bucket" - else: - return "object" diff --git a/app/replication.py b/app/replication.py deleted file mode 100644 index ec2d113..0000000 --- a/app/replication.py +++ /dev/null @@ -1,667 +0,0 @@ -from __future__ import annotations - -import json -import logging -import mimetypes -import threading -import time -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional - -import boto3 -from botocore.config import Config -from botocore.exceptions import ClientError -from boto3.exceptions import S3UploadFailedError - -from .connections import ConnectionStore, RemoteConnection -from .storage import ObjectStorage, StorageError - -logger = logging.getLogger(__name__) - -REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0" - -REPLICATION_MODE_NEW_ONLY = "new_only" -REPLICATION_MODE_ALL = "all" -REPLICATION_MODE_BIDIRECTIONAL = "bidirectional" - - -def _create_s3_client( - connection: RemoteConnection, - *, - health_check: bool = False, - connect_timeout: int = 5, - read_timeout: int = 30, - max_retries: int = 2, -) -> Any: - """Create a boto3 S3 client for the given connection. - Args: - connection: Remote S3 connection configuration - health_check: If True, use minimal retries for quick health checks - """ - config = Config( - user_agent_extra=REPLICATION_USER_AGENT, - connect_timeout=connect_timeout, - read_timeout=read_timeout, - retries={'max_attempts': 1 if health_check else max_retries}, - signature_version='s3v4', - s3={'addressing_style': 'path'}, - request_checksum_calculation='when_required', - response_checksum_validation='when_required', - ) - return boto3.client( - "s3", - endpoint_url=connection.endpoint_url, - aws_access_key_id=connection.access_key, - aws_secret_access_key=connection.secret_key, - region_name=connection.region or 'us-east-1', - config=config, - ) - - -@dataclass -class ReplicationStats: - """Statistics for replication operations - computed dynamically.""" - objects_synced: int = 0 - objects_pending: int = 0 - objects_orphaned: int = 0 - bytes_synced: int = 0 - last_sync_at: Optional[float] = None - last_sync_key: Optional[str] = None - - def to_dict(self) -> dict: - return { - "objects_synced": self.objects_synced, - "objects_pending": self.objects_pending, - "objects_orphaned": self.objects_orphaned, - "bytes_synced": self.bytes_synced, - "last_sync_at": self.last_sync_at, - "last_sync_key": self.last_sync_key, - } - - @classmethod - def from_dict(cls, data: dict) -> "ReplicationStats": - return cls( - objects_synced=data.get("objects_synced", 0), - objects_pending=data.get("objects_pending", 0), - objects_orphaned=data.get("objects_orphaned", 0), - bytes_synced=data.get("bytes_synced", 0), - last_sync_at=data.get("last_sync_at"), - last_sync_key=data.get("last_sync_key"), - ) - - -@dataclass -class ReplicationFailure: - object_key: str - error_message: str - timestamp: float - failure_count: int - bucket_name: str - action: str - last_error_code: Optional[str] = None - - def to_dict(self) -> dict: - return { - "object_key": self.object_key, - "error_message": self.error_message, - "timestamp": self.timestamp, - "failure_count": self.failure_count, - "bucket_name": self.bucket_name, - "action": self.action, - "last_error_code": self.last_error_code, - } - - @classmethod - def from_dict(cls, data: dict) -> "ReplicationFailure": - return cls( - object_key=data["object_key"], - error_message=data["error_message"], - timestamp=data["timestamp"], - failure_count=data["failure_count"], - bucket_name=data["bucket_name"], - action=data["action"], - last_error_code=data.get("last_error_code"), - ) - - -@dataclass -class ReplicationRule: - bucket_name: str - target_connection_id: str - target_bucket: str - enabled: bool = True - mode: str = REPLICATION_MODE_NEW_ONLY - created_at: Optional[float] = None - stats: ReplicationStats = field(default_factory=ReplicationStats) - sync_deletions: bool = True - last_pull_at: Optional[float] = None - filter_prefix: Optional[str] = None - - def to_dict(self) -> dict: - return { - "bucket_name": self.bucket_name, - "target_connection_id": self.target_connection_id, - "target_bucket": self.target_bucket, - "enabled": self.enabled, - "mode": self.mode, - "created_at": self.created_at, - "stats": self.stats.to_dict(), - "sync_deletions": self.sync_deletions, - "last_pull_at": self.last_pull_at, - "filter_prefix": self.filter_prefix, - } - - @classmethod - def from_dict(cls, data: dict) -> "ReplicationRule": - stats_data = data.pop("stats", {}) - if "mode" not in data: - data["mode"] = REPLICATION_MODE_NEW_ONLY - if "created_at" not in data: - data["created_at"] = None - if "sync_deletions" not in data: - data["sync_deletions"] = True - if "last_pull_at" not in data: - data["last_pull_at"] = None - if "filter_prefix" not in data: - data["filter_prefix"] = None - rule = cls(**data) - rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats() - return rule - - -class ReplicationFailureStore: - def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None: - self.storage_root = storage_root - self.max_failures_per_bucket = max_failures_per_bucket - self._lock = threading.Lock() - self._cache: Dict[str, List[ReplicationFailure]] = {} - - def _get_failures_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json" - - def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]: - path = self._get_failures_path(bucket_name) - if not path.exists(): - return [] - try: - with open(path, "r") as f: - data = json.load(f) - return [ReplicationFailure.from_dict(d) for d in data.get("failures", [])] - except (OSError, ValueError, KeyError) as e: - logger.error(f"Failed to load replication failures for {bucket_name}: {e}") - return [] - - def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None: - path = self._get_failures_path(bucket_name) - path.parent.mkdir(parents=True, exist_ok=True) - data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]} - try: - with open(path, "w") as f: - json.dump(data, f, indent=2) - except OSError as e: - logger.error(f"Failed to save replication failures for {bucket_name}: {e}") - - def load_failures(self, bucket_name: str) -> List[ReplicationFailure]: - if bucket_name in self._cache: - return list(self._cache[bucket_name]) - failures = self._load_from_disk(bucket_name) - self._cache[bucket_name] = failures - return list(failures) - - def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None: - trimmed = failures[:self.max_failures_per_bucket] - self._cache[bucket_name] = trimmed - self._save_to_disk(bucket_name, trimmed) - - def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None: - with self._lock: - failures = self.load_failures(bucket_name) - existing = next((f for f in failures if f.object_key == failure.object_key), None) - if existing: - existing.failure_count += 1 - existing.timestamp = failure.timestamp - existing.error_message = failure.error_message - existing.last_error_code = failure.last_error_code - else: - failures.insert(0, failure) - self.save_failures(bucket_name, failures) - - def remove_failure(self, bucket_name: str, object_key: str) -> bool: - with self._lock: - failures = self.load_failures(bucket_name) - original_len = len(failures) - failures = [f for f in failures if f.object_key != object_key] - if len(failures) < original_len: - self.save_failures(bucket_name, failures) - return True - return False - - def clear_failures(self, bucket_name: str) -> None: - with self._lock: - self._cache.pop(bucket_name, None) - path = self._get_failures_path(bucket_name) - if path.exists(): - path.unlink() - - def get_failure(self, bucket_name: str, object_key: str) -> Optional[ReplicationFailure]: - failures = self.load_failures(bucket_name) - return next((f for f in failures if f.object_key == object_key), None) - - def get_failure_count(self, bucket_name: str) -> int: - return len(self.load_failures(bucket_name)) - - -class ReplicationManager: - def __init__( - self, - storage: ObjectStorage, - connections: ConnectionStore, - rules_path: Path, - storage_root: Path, - connect_timeout: int = 5, - read_timeout: int = 30, - max_retries: int = 2, - streaming_threshold_bytes: int = 10 * 1024 * 1024, - max_failures_per_bucket: int = 50, - ) -> None: - self.storage = storage - self.connections = connections - self.rules_path = rules_path - self.storage_root = storage_root - self.connect_timeout = connect_timeout - self.read_timeout = read_timeout - self.max_retries = max_retries - self.streaming_threshold_bytes = streaming_threshold_bytes - self._rules: Dict[str, ReplicationRule] = {} - self._stats_lock = threading.Lock() - self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker") - self._shutdown = False - self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket) - self.reload_rules() - - def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any: - """Create an S3 client with the manager's configured timeouts.""" - return _create_s3_client( - connection, - health_check=health_check, - connect_timeout=self.connect_timeout, - read_timeout=self.read_timeout, - max_retries=self.max_retries, - ) - - def shutdown(self, wait: bool = True) -> None: - """Shutdown the replication executor gracefully. - - Args: - wait: If True, wait for pending tasks to complete - """ - self._shutdown = True - self._executor.shutdown(wait=wait) - logger.info("Replication manager shut down") - - def reload_rules(self) -> None: - if not self.rules_path.exists(): - self._rules = {} - return - try: - with open(self.rules_path, "r") as f: - data = json.load(f) - for bucket, rule_data in data.items(): - self._rules[bucket] = ReplicationRule.from_dict(rule_data) - except (OSError, ValueError) as e: - logger.error(f"Failed to load replication rules: {e}") - - def save_rules(self) -> None: - data = {b: rule.to_dict() for b, rule in self._rules.items()} - self.rules_path.parent.mkdir(parents=True, exist_ok=True) - with open(self.rules_path, "w") as f: - json.dump(data, f, indent=2) - - def check_endpoint_health(self, connection: RemoteConnection) -> bool: - """Check if a remote endpoint is reachable and responsive. - - Returns True if endpoint is healthy, False otherwise. - Uses short timeouts to prevent blocking. - """ - try: - s3 = self._create_client(connection, health_check=True) - s3.list_buckets() - return True - except Exception as e: - logger.warning(f"Endpoint health check failed for {connection.name} ({connection.endpoint_url}): {e}") - return False - - def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]: - return self._rules.get(bucket_name) - - def list_rules(self) -> List[ReplicationRule]: - return list(self._rules.values()) - - def set_rule(self, rule: ReplicationRule) -> None: - old_rule = self._rules.get(rule.bucket_name) - was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False - self._rules[rule.bucket_name] = rule - self.save_rules() - - if rule.mode == REPLICATION_MODE_ALL and rule.enabled and not was_all_mode: - logger.info(f"Replication mode ALL enabled for {rule.bucket_name}, triggering sync of existing objects") - self._executor.submit(self.replicate_existing_objects, rule.bucket_name) - - def delete_rule(self, bucket_name: str) -> None: - if bucket_name in self._rules: - del self._rules[bucket_name] - self.save_rules() - - def _update_last_sync(self, bucket_name: str, object_key: str = "") -> None: - """Update last sync timestamp after a successful operation.""" - with self._stats_lock: - rule = self._rules.get(bucket_name) - if not rule: - return - rule.stats.last_sync_at = time.time() - rule.stats.last_sync_key = object_key - self.save_rules() - - def get_sync_status(self, bucket_name: str) -> Optional[ReplicationStats]: - """Dynamically compute replication status by comparing source and destination buckets.""" - rule = self.get_rule(bucket_name) - if not rule: - return None - - connection = self.connections.get(rule.target_connection_id) - if not connection: - return rule.stats - - try: - source_objects = self.storage.list_objects_all(bucket_name) - source_keys = {obj.key: obj.size for obj in source_objects} - - s3 = self._create_client(connection) - - dest_keys = set() - bytes_synced = 0 - paginator = s3.get_paginator('list_objects_v2') - try: - for page in paginator.paginate(Bucket=rule.target_bucket): - for obj in page.get('Contents', []): - dest_keys.add(obj['Key']) - if obj['Key'] in source_keys: - bytes_synced += obj.get('Size', 0) - except ClientError as e: - if e.response['Error']['Code'] == 'NoSuchBucket': - dest_keys = set() - else: - raise - - synced = source_keys.keys() & dest_keys - orphaned = dest_keys - source_keys.keys() - - if rule.mode == REPLICATION_MODE_ALL: - pending = source_keys.keys() - dest_keys - else: - pending = set() - - rule.stats.objects_synced = len(synced) - rule.stats.objects_pending = len(pending) - rule.stats.objects_orphaned = len(orphaned) - rule.stats.bytes_synced = bytes_synced - - return rule.stats - - except (ClientError, StorageError) as e: - logger.error(f"Failed to compute sync status for {bucket_name}: {e}") - return rule.stats - - def replicate_existing_objects(self, bucket_name: str) -> None: - """Trigger replication for all existing objects in a bucket.""" - rule = self.get_rule(bucket_name) - if not rule or not rule.enabled: - return - - connection = self.connections.get(rule.target_connection_id) - if not connection: - logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found") - return - - if not self.check_endpoint_health(connection): - logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable") - return - - try: - objects = self.storage.list_objects_all(bucket_name) - logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}") - for obj in objects: - self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write") - except StorageError as e: - logger.error(f"Failed to list objects for replication: {e}") - - def create_remote_bucket(self, connection_id: str, bucket_name: str) -> None: - """Create a bucket on the remote connection.""" - connection = self.connections.get(connection_id) - if not connection: - raise ValueError(f"Connection {connection_id} not found") - - try: - s3 = self._create_client(connection) - s3.create_bucket(Bucket=bucket_name) - except ClientError as e: - logger.error(f"Failed to create remote bucket {bucket_name}: {e}") - raise - - def trigger_replication(self, bucket_name: str, object_key: str, action: str = "write") -> None: - rule = self.get_rule(bucket_name) - if not rule or not rule.enabled: - return - - connection = self.connections.get(rule.target_connection_id) - if not connection: - logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found") - return - - if not self.check_endpoint_health(connection): - logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable") - return - - self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action) - - def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None: - if self._shutdown: - return - - current_rule = self.get_rule(bucket_name) - if not current_rule or not current_rule.enabled: - logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed") - return - - if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"): - logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}") - return - - try: - from .storage import ObjectStorage - ObjectStorage._sanitize_object_key(object_key) - except StorageError as e: - logger.error(f"Object key validation failed in replication: {e}") - return - - try: - s3 = self._create_client(conn) - - if action == "delete": - try: - s3.delete_object(Bucket=rule.target_bucket, Key=object_key) - logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") - self._update_last_sync(bucket_name, object_key) - self.failure_store.remove_failure(bucket_name, object_key) - except ClientError as e: - error_code = e.response.get('Error', {}).get('Code') - logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}") - self.failure_store.add_failure(bucket_name, ReplicationFailure( - object_key=object_key, - error_message=str(e), - timestamp=time.time(), - failure_count=1, - bucket_name=bucket_name, - action="delete", - last_error_code=error_code, - )) - return - - try: - path = self.storage.get_object_path(bucket_name, object_key) - except StorageError: - logger.error(f"Source object not found: {bucket_name}/{object_key}") - return - - content_type, _ = mimetypes.guess_type(path) - file_size = path.stat().st_size - - logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}") - - def do_upload() -> None: - """Upload object using appropriate method based on file size. - - For small files (< 10 MiB): Read into memory for simpler handling - For large files: Use streaming upload to avoid memory issues - """ - extra_args = {} - if content_type: - extra_args["ContentType"] = content_type - - if file_size >= self.streaming_threshold_bytes: - s3.upload_file( - str(path), - rule.target_bucket, - object_key, - ExtraArgs=extra_args if extra_args else None, - ) - else: - file_content = path.read_bytes() - put_kwargs = { - "Bucket": rule.target_bucket, - "Key": object_key, - "Body": file_content, - **extra_args, - } - s3.put_object(**put_kwargs) - - try: - do_upload() - except (ClientError, S3UploadFailedError) as e: - error_code = None - if isinstance(e, ClientError): - error_code = e.response['Error']['Code'] - elif isinstance(e, S3UploadFailedError): - if "NoSuchBucket" in str(e): - error_code = 'NoSuchBucket' - - if error_code == 'NoSuchBucket': - logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.") - bucket_ready = False - try: - s3.create_bucket(Bucket=rule.target_bucket) - bucket_ready = True - logger.info(f"Created target bucket {rule.target_bucket}") - except ClientError as bucket_err: - if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'): - logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)") - bucket_ready = True - else: - logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}") - raise e - - if bucket_ready: - do_upload() - else: - raise e - - logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") - self._update_last_sync(bucket_name, object_key) - self.failure_store.remove_failure(bucket_name, object_key) - - except (ClientError, OSError, ValueError) as e: - error_code = None - if isinstance(e, ClientError): - error_code = e.response.get('Error', {}).get('Code') - logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}") - self.failure_store.add_failure(bucket_name, ReplicationFailure( - object_key=object_key, - error_message=str(e), - timestamp=time.time(), - failure_count=1, - bucket_name=bucket_name, - action=action, - last_error_code=error_code, - )) - except Exception as e: - logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}") - self.failure_store.add_failure(bucket_name, ReplicationFailure( - object_key=object_key, - error_message=str(e), - timestamp=time.time(), - failure_count=1, - bucket_name=bucket_name, - action=action, - last_error_code=None, - )) - - def get_failed_items(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[ReplicationFailure]: - failures = self.failure_store.load_failures(bucket_name) - return failures[offset:offset + limit] - - def get_failure_count(self, bucket_name: str) -> int: - return self.failure_store.get_failure_count(bucket_name) - - def retry_failed_item(self, bucket_name: str, object_key: str) -> bool: - failure = self.failure_store.get_failure(bucket_name, object_key) - if not failure: - return False - - rule = self.get_rule(bucket_name) - if not rule or not rule.enabled: - return False - - connection = self.connections.get(rule.target_connection_id) - if not connection: - logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found") - return False - - if not self.check_endpoint_health(connection): - logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable") - return False - - self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, failure.action) - return True - - def retry_all_failed(self, bucket_name: str) -> Dict[str, int]: - failures = self.failure_store.load_failures(bucket_name) - if not failures: - return {"submitted": 0, "skipped": 0} - - rule = self.get_rule(bucket_name) - if not rule or not rule.enabled: - return {"submitted": 0, "skipped": len(failures)} - - connection = self.connections.get(rule.target_connection_id) - if not connection: - logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found") - return {"submitted": 0, "skipped": len(failures)} - - if not self.check_endpoint_health(connection): - logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable") - return {"submitted": 0, "skipped": len(failures)} - - submitted = 0 - for failure in failures: - self._executor.submit(self._replicate_task, bucket_name, failure.object_key, rule, connection, failure.action) - submitted += 1 - - return {"submitted": submitted, "skipped": 0} - - def dismiss_failure(self, bucket_name: str, object_key: str) -> bool: - return self.failure_store.remove_failure(bucket_name, object_key) - - def clear_failures(self, bucket_name: str) -> None: - self.failure_store.clear_failures(bucket_name) diff --git a/app/s3_api.py b/app/s3_api.py deleted file mode 100644 index 97cfd0a..0000000 --- a/app/s3_api.py +++ /dev/null @@ -1,3975 +0,0 @@ -from __future__ import annotations - -import base64 -import hashlib -import hmac -import json -import logging -import mimetypes -import re -import threading -import time -import uuid -from collections import OrderedDict -from datetime import datetime, timedelta, timezone -from typing import Any, Dict, Optional, Tuple -from urllib.parse import quote, urlencode, urlparse, unquote -from xml.etree.ElementTree import Element, SubElement, tostring, ParseError -from defusedxml.ElementTree import fromstring - -try: - import myfsio_core as _rc - if not all(hasattr(_rc, f) for f in ( - "verify_sigv4_signature", "derive_signing_key", "clear_signing_key_cache", - )): - raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release") - _HAS_RUST = True -except ImportError: - _rc = None - _HAS_RUST = False - -from flask import Blueprint, Response, current_app, jsonify, request, g -from werkzeug.http import http_date - -from .access_logging import AccessLoggingService, LoggingConfiguration -from .acl import AclService -from .bucket_policies import BucketPolicyStore -from .encryption import SSECEncryption, SSECMetadata, EncryptionError -from .extensions import limiter -from .iam import IamError, Principal -from .notifications import NotificationService, NotificationConfiguration, WebhookDestination -from .object_lock import ObjectLockService, ObjectLockRetention, ObjectLockConfig, ObjectLockError, RetentionMode -from .replication import ReplicationManager -from .storage import ObjectStorage, StorageError, QuotaExceededError, BucketNotFoundError, ObjectNotFoundError - -logger = logging.getLogger(__name__) - -S3_NS = "http://s3.amazonaws.com/doc/2006-03-01/" - -_HEADER_CONTROL_CHARS = re.compile(r'[\r\n\x00-\x1f\x7f]') - - -def _sanitize_header_value(value: str) -> str: - return _HEADER_CONTROL_CHARS.sub('', value) - - -MAX_XML_PAYLOAD_SIZE = 1048576 # 1 MB - - -def _parse_xml_with_limit(payload: bytes) -> Element: - """Parse XML payload with size limit to prevent DoS attacks.""" - max_size = current_app.config.get("MAX_XML_PAYLOAD_SIZE", MAX_XML_PAYLOAD_SIZE) - if len(payload) > max_size: - raise ParseError(f"XML payload exceeds maximum size of {max_size} bytes") - return fromstring(payload) - - -s3_api_bp = Blueprint("s3_api", __name__) - -def _storage() -> ObjectStorage: - return current_app.extensions["object_storage"] - - -def _acl() -> AclService: - return current_app.extensions["acl"] - - -def _iam(): - return current_app.extensions["iam"] - - -def _replication_manager() -> ReplicationManager: - return current_app.extensions["replication"] - - -def _bucket_policies() -> BucketPolicyStore: - store: BucketPolicyStore = current_app.extensions["bucket_policies"] - store.maybe_reload() - return store - - -def _build_policy_context() -> Dict[str, Any]: - cached = getattr(g, "_policy_context", None) - if cached is not None: - return cached - ctx: Dict[str, Any] = {} - if request.headers.get("Referer"): - ctx["aws:Referer"] = request.headers.get("Referer") - num_proxies = current_app.config.get("NUM_TRUSTED_PROXIES", 0) - if num_proxies > 0 and request.access_route and len(request.access_route) > num_proxies: - ctx["aws:SourceIp"] = request.access_route[-num_proxies] - elif request.remote_addr: - ctx["aws:SourceIp"] = request.remote_addr - elif request.access_route: - ctx["aws:SourceIp"] = request.access_route[0] - ctx["aws:SecureTransport"] = str(request.is_secure).lower() - if request.headers.get("User-Agent"): - ctx["aws:UserAgent"] = request.headers.get("User-Agent") - g._policy_context = ctx - return ctx - - -def _object_lock() -> ObjectLockService: - return current_app.extensions["object_lock"] - - -def _notifications() -> NotificationService: - return current_app.extensions["notifications"] - - -def _access_logging() -> AccessLoggingService: - return current_app.extensions["access_logging"] - - -def _get_list_buckets_limit() -> str: - return current_app.config.get("RATELIMIT_LIST_BUCKETS", "60 per minute") - - -def _get_bucket_ops_limit() -> str: - return current_app.config.get("RATELIMIT_BUCKET_OPS", "120 per minute") - - -def _get_object_ops_limit() -> str: - return current_app.config.get("RATELIMIT_OBJECT_OPS", "240 per minute") - - -def _get_head_ops_limit() -> str: - return current_app.config.get("RATELIMIT_HEAD_OPS", "100 per minute") - - -def _xml_response(element: Element, status: int = 200) -> Response: - xml_bytes = tostring(element, encoding="utf-8") - return Response(xml_bytes, status=status, mimetype="application/xml") - - -def _error_response(code: str, message: str, status: int) -> Response: - g.s3_error_code = code - error = Element("Error") - SubElement(error, "Code").text = code - SubElement(error, "Message").text = message - SubElement(error, "Resource").text = request.path - SubElement(error, "RequestId").text = uuid.uuid4().hex - return _xml_response(error, status) - - -def _require_xml_content_type() -> Response | None: - ct = request.headers.get("Content-Type", "") - if ct and not ct.startswith(("application/xml", "text/xml")): - return _error_response("InvalidRequest", "Content-Type must be application/xml or text/xml", 400) - return None - - -def _parse_range_header(range_header: str, file_size: int) -> list[tuple[int, int]] | None: - if not range_header.startswith("bytes="): - return None - max_range_value = 2**63 - 1 - ranges = [] - range_spec = range_header[6:] - for part in range_spec.split(","): - part = part.strip() - if not part: - continue - try: - if part.startswith("-"): - suffix_length = int(part[1:]) - if suffix_length <= 0 or suffix_length > max_range_value: - return None - start = max(0, file_size - suffix_length) - end = file_size - 1 - elif part.endswith("-"): - start = int(part[:-1]) - if start < 0 or start > max_range_value or start >= file_size: - return None - end = file_size - 1 - else: - start_str, end_str = part.split("-", 1) - start = int(start_str) - end = int(end_str) - if start < 0 or end < 0 or start > max_range_value or end > max_range_value: - return None - if start > end or start >= file_size: - return None - end = min(end, file_size - 1) - except (ValueError, OverflowError): - return None - ranges.append((start, end)) - return ranges if ranges else None - - -def _sign(key: bytes, msg: str) -> bytes: - return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() - - -_SIGNING_KEY_CACHE: OrderedDict[Tuple[str, str, str, str], Tuple[bytes, float]] = OrderedDict() -_SIGNING_KEY_CACHE_LOCK = threading.Lock() -_SIGNING_KEY_CACHE_TTL = 60.0 -_SIGNING_KEY_CACHE_MAX_SIZE = 256 - -_SIGV4_HEADER_RE = re.compile( - r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)" -) -_SIGV4_REQUIRED_HEADERS = frozenset({'host', 'x-amz-date'}) - - -def clear_signing_key_cache() -> None: - if _HAS_RUST: - _rc.clear_signing_key_cache() - with _SIGNING_KEY_CACHE_LOCK: - _SIGNING_KEY_CACHE.clear() - - -def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes: - if _HAS_RUST: - return bytes(_rc.derive_signing_key(key, date_stamp, region_name, service_name)) - - cache_key = (key, date_stamp, region_name, service_name) - now = time.time() - - with _SIGNING_KEY_CACHE_LOCK: - cached = _SIGNING_KEY_CACHE.get(cache_key) - if cached: - signing_key, cached_time = cached - if now - cached_time < _SIGNING_KEY_CACHE_TTL: - _SIGNING_KEY_CACHE.move_to_end(cache_key) - return signing_key - else: - del _SIGNING_KEY_CACHE[cache_key] - - k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp) - k_region = _sign(k_date, region_name) - k_service = _sign(k_region, service_name) - k_signing = _sign(k_service, "aws4_request") - - with _SIGNING_KEY_CACHE_LOCK: - if len(_SIGNING_KEY_CACHE) >= _SIGNING_KEY_CACHE_MAX_SIZE: - _SIGNING_KEY_CACHE.popitem(last=False) - _SIGNING_KEY_CACHE[cache_key] = (k_signing, now) - - return k_signing - - -def _get_canonical_uri(req: Any) -> str: - """Get the canonical URI for SigV4 signature verification. - - AWS SigV4 requires the canonical URI to be URL-encoded exactly as the client - sent it. Flask/Werkzeug automatically URL-decodes request.path, so we need - to get the raw path from the environ. - - The canonical URI should have each path segment URL-encoded (with '/' preserved), - and the encoding should match what the client used when signing. - """ - raw_uri = req.environ.get('RAW_URI') or req.environ.get('REQUEST_URI') - - if raw_uri: - path = raw_uri.split('?')[0] - return path - - return quote(req.path, safe="/-_.~") - - -def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None: - match = _SIGV4_HEADER_RE.match(auth_header) - if not match: - return None - - access_key, date_stamp, region, service, signed_headers_str, signature = match.groups() - secret_key = _iam().get_secret_key(access_key) - if not secret_key: - raise IamError("SignatureDoesNotMatch") - - amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date") - if not amz_date: - raise IamError("Missing Date header") - - try: - request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc) - except ValueError: - raise IamError("Invalid X-Amz-Date format") - - now = datetime.now(timezone.utc) - time_diff = abs((now - request_time).total_seconds()) - tolerance = current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900) - if time_diff > tolerance: - raise IamError("Request timestamp too old or too far in the future") - - signed_headers_set = set(signed_headers_str.split(';')) - if not _SIGV4_REQUIRED_HEADERS.issubset(signed_headers_set): - if not ({'host', 'date'}.issubset(signed_headers_set)): - raise IamError("Required headers not signed") - - canonical_uri = _get_canonical_uri(req) - payload_hash = req.headers.get("X-Amz-Content-Sha256") or "UNSIGNED-PAYLOAD" - - if _HAS_RUST: - query_params = list(req.args.items(multi=True)) - header_values = [] - for h in signed_headers_str.split(";"): - val = req.headers.get(h) or "" - if h.lower() == "expect" and val == "": - val = "100-continue" - header_values.append((h, val)) - if not _rc.verify_sigv4_signature( - req.method, canonical_uri, query_params, signed_headers_str, - header_values, payload_hash, amz_date, date_stamp, region, - service, secret_key, signature, - ): - raise IamError("SignatureDoesNotMatch") - else: - method = req.method - query_args = sorted(req.args.items(multi=True), key=lambda x: (x[0], x[1])) - canonical_query_parts = [] - for k, v in query_args: - canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") - canonical_query_string = "&".join(canonical_query_parts) - - signed_headers_list = signed_headers_str.split(";") - canonical_headers_parts = [] - for header in signed_headers_list: - header_val = req.headers.get(header) - if header_val is None: - header_val = "" - if header.lower() == 'expect' and header_val == "": - header_val = "100-continue" - header_val = " ".join(header_val.split()) - canonical_headers_parts.append(f"{header.lower()}:{header_val}\n") - canonical_headers = "".join(canonical_headers_parts) - - canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}" - - credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" - signing_key = _get_signature_key(secret_key, date_stamp, region, service) - string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" - calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() - if not hmac.compare_digest(calculated_signature, signature): - raise IamError("SignatureDoesNotMatch") - - session_token = req.headers.get("X-Amz-Security-Token") - if session_token: - if not _iam().validate_session_token(access_key, session_token): - raise IamError("InvalidToken") - - return _iam().get_principal(access_key) - - -def _verify_sigv4_query(req: Any) -> Principal | None: - credential = req.args.get("X-Amz-Credential") - signed_headers_str = req.args.get("X-Amz-SignedHeaders") - signature = req.args.get("X-Amz-Signature") - amz_date = req.args.get("X-Amz-Date") - expires = req.args.get("X-Amz-Expires") - - if not (credential and signed_headers_str and signature and amz_date and expires): - return None - - try: - access_key, date_stamp, region, service, _ = credential.split("/") - except ValueError: - raise IamError("Invalid Credential format") - - try: - req_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc) - except ValueError: - raise IamError("Invalid Date format") - - now = datetime.now(timezone.utc) - tolerance = timedelta(seconds=current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900)) - if req_time > now + tolerance: - raise IamError("Request date is too far in the future") - try: - expires_seconds = int(expires) - if expires_seconds <= 0: - raise IamError("Invalid Expires value: must be positive") - except ValueError: - raise IamError("Invalid Expires value: must be an integer") - min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1) - max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800) - if expires_seconds < min_expiry or expires_seconds > max_expiry: - raise IamError(f"Expiration must be between {min_expiry} second(s) and {max_expiry} seconds") - if now > req_time + timedelta(seconds=expires_seconds): - raise IamError("Request expired") - - secret_key = _iam().get_secret_key(access_key) - if not secret_key: - raise IamError("Invalid access key") - - canonical_uri = _get_canonical_uri(req) - - if _HAS_RUST: - query_params = [(k, v) for k, v in req.args.items(multi=True) if k != "X-Amz-Signature"] - header_values = [] - for h in signed_headers_str.split(";"): - val = req.headers.get(h) or "" - if h.lower() == "expect" and val == "": - val = "100-continue" - header_values.append((h, val)) - if not _rc.verify_sigv4_signature( - req.method, canonical_uri, query_params, signed_headers_str, - header_values, "UNSIGNED-PAYLOAD", amz_date, date_stamp, region, - service, secret_key, signature, - ): - raise IamError("SignatureDoesNotMatch") - else: - method = req.method - query_args = [] - for key, value in req.args.items(multi=True): - if key != "X-Amz-Signature": - query_args.append((key, value)) - query_args.sort(key=lambda x: (x[0], x[1])) - - canonical_query_parts = [] - for k, v in query_args: - canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") - canonical_query_string = "&".join(canonical_query_parts) - - signed_headers_list = signed_headers_str.split(";") - canonical_headers_parts = [] - for header in signed_headers_list: - val = req.headers.get(header, "").strip() - if header.lower() == 'expect' and val == "": - val = "100-continue" - val = " ".join(val.split()) - canonical_headers_parts.append(f"{header.lower()}:{val}\n") - canonical_headers = "".join(canonical_headers_parts) - - payload_hash = "UNSIGNED-PAYLOAD" - - canonical_request = "\n".join([ - method, - canonical_uri, - canonical_query_string, - canonical_headers, - signed_headers_str, - payload_hash - ]) - - credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" - signing_key = _get_signature_key(secret_key, date_stamp, region, service) - hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest() - string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}" - calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() - if not hmac.compare_digest(calculated_signature, signature): - raise IamError("SignatureDoesNotMatch") - - session_token = req.args.get("X-Amz-Security-Token") - if session_token: - if not _iam().validate_session_token(access_key, session_token): - raise IamError("InvalidToken") - - return _iam().get_principal(access_key) - - -def _verify_sigv4(req: Any) -> Principal | None: - auth_header = req.headers.get("Authorization") - if auth_header and auth_header.startswith("AWS4-HMAC-SHA256"): - return _verify_sigv4_header(req, auth_header) - - if req.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256": - return _verify_sigv4_query(req) - - return None - - -def _require_principal(): - sigv4_attempted = ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \ - (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256") - if sigv4_attempted: - try: - principal = _verify_sigv4(request) - if principal: - return principal, None - return None, _error_response("AccessDenied", "Signature verification failed", 403) - except IamError as exc: - return None, _error_response("AccessDenied", str(exc), 403) - except (ValueError, TypeError): - return None, _error_response("AccessDenied", "Signature verification failed", 403) - - access_key = request.headers.get("X-Access-Key") - secret_key = request.headers.get("X-Secret-Key") - if not access_key or not secret_key: - return None, _error_response("AccessDenied", "Missing credentials", 403) - try: - principal = _iam().authenticate(access_key, secret_key) - return principal, None - except IamError as exc: - return None, _error_response("AccessDenied", str(exc), 403) - - -def _authorize_action(principal: Principal | None, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None: - iam_allowed = False - iam_error: IamError | None = None - if principal is not None: - try: - _iam().authorize(principal, bucket_name, action, object_key=object_key) - iam_allowed = True - except IamError as exc: - iam_error = exc - else: - iam_error = IamError("Missing credentials") - - policy_decision = None - access_key = principal.access_key if principal else None - if bucket_name: - policy_context = _build_policy_context() - policy_decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action, policy_context) - if policy_decision == "deny": - raise IamError("Access denied by bucket policy") - - if iam_allowed: - return - if policy_decision == "allow": - return - - acl_allowed = False - if bucket_name: - acl_service = _acl() - acl_allowed = acl_service.evaluate_bucket_acl( - bucket_name, - access_key, - action, - is_authenticated=principal is not None, - ) - if acl_allowed: - return - - raise iam_error or IamError("Access denied") - - -def _object_principal(action: str, bucket_name: str, object_key: str): - principal, error = _require_principal() - try: - _authorize_action(principal, bucket_name, action, object_key=object_key) - return principal, None - except IamError as exc: - if not error: - return None, _error_response("AccessDenied", str(exc), 403) - return None, error - - -def _canonical_uri(bucket_name: str, object_key: str | None) -> str: - segments = [bucket_name] - if object_key: - segments.extend(object_key.split("/")) - encoded = [quote(segment, safe="-_.~") for segment in segments] - return "/" + "/".join(encoded) - - -def _extract_request_metadata() -> Dict[str, str]: - metadata: Dict[str, str] = {} - for header, value in request.headers.items(): - if header.lower().startswith("x-amz-meta-"): - key = header[11:] - if key and not (key.startswith("__") and key.endswith("__")): - metadata[key] = value - return metadata - - -def _derive_signing_key(secret: str, date_stamp: str, region: str, service: str) -> bytes: - def _sign(key: bytes, msg: str) -> bytes: - return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() - - k_date = _sign(("AWS4" + secret).encode("utf-8"), date_stamp) - k_region = _sign(k_date, region) - k_service = _sign(k_region, service) - return _sign(k_service, "aws4_request") - - -def _generate_presigned_url( - *, - principal: Principal, - secret_key: str, - method: str, - bucket_name: str, - object_key: str, - expires_in: int, - api_base_url: str | None = None, -) -> str: - region = current_app.config["AWS_REGION"] - service = current_app.config["AWS_SERVICE"] - algorithm = "AWS4-HMAC-SHA256" - now = datetime.now(timezone.utc) - amz_date = now.strftime("%Y%m%dT%H%M%SZ") - date_stamp = now.strftime("%Y%m%d") - credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" - credential = f"{principal.access_key}/{credential_scope}" - - query_params = { - "X-Amz-Algorithm": algorithm, - "X-Amz-Credential": credential, - "X-Amz-Date": amz_date, - "X-Amz-Expires": str(expires_in), - "X-Amz-SignedHeaders": "host", - "X-Amz-Content-Sha256": "UNSIGNED-PAYLOAD", - } - canonical_query = _encode_query_params(query_params) - - api_base = api_base_url or current_app.config.get("API_BASE_URL") - if api_base: - parsed = urlparse(api_base) - host = parsed.netloc - scheme = parsed.scheme - else: - host = request.host - scheme = request.scheme or "http" - - canonical_headers = f"host:{host}\n" - canonical_request = "\n".join( - [ - method, - _canonical_uri(bucket_name, object_key), - canonical_query, - canonical_headers, - "host", - "UNSIGNED-PAYLOAD", - ] - ) - hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest() - string_to_sign = "\n".join( - [ - algorithm, - amz_date, - credential_scope, - hashed_request, - ] - ) - signing_key = _derive_signing_key(secret_key, date_stamp, region, service) - signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() - query_with_sig = canonical_query + f"&X-Amz-Signature={signature}" - return f"{scheme}://{host}{_canonical_uri(bucket_name, object_key)}?{query_with_sig}" - - -def _encode_query_params(params: dict[str, str]) -> str: - parts = [] - for key in sorted(params.keys()): - value = params[key] - encoded_key = quote(str(key), safe="-_.~") - encoded_value = quote(str(value), safe="-_.~") - parts.append(f"{encoded_key}={encoded_value}") - return "&".join(parts) - - -def _strip_ns(tag: str | None) -> str: - if not tag: - return "" - return tag.split("}")[-1] - - -def _find_element(parent: Element, name: str) -> Optional[Element]: - """Find a child element by name, trying S3 namespace then no namespace. - - This handles XML documents that may or may not include namespace prefixes. - """ - el = parent.find(f"{{{S3_NS}}}{name}") - if el is None: - el = parent.find(name) - return el - - -def _find_element_text(parent: Element, name: str, default: str = "") -> str: - """Find a child element and return its text content. - - Returns the default value if element not found or has no text. - """ - el = _find_element(parent, name) - if el is None or el.text is None: - return default - return el.text.strip() - - -def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]: - try: - root = _parse_xml_with_limit(payload) - except ParseError as exc: - raise ValueError("Malformed XML") from exc - if _strip_ns(root.tag) != "Tagging": - raise ValueError("Root element must be Tagging") - tagset = root.find(".//{http://s3.amazonaws.com/doc/2006-03-01/}TagSet") - if tagset is None: - tagset = root.find("TagSet") - if tagset is None: - return [] - tags: list[dict[str, str]] = [] - for tag_el in list(tagset): - if _strip_ns(tag_el.tag) != "Tag": - continue - key = _find_element_text(tag_el, "Key") - if not key: - continue - value = _find_element_text(tag_el, "Value") - tags.append({"Key": key, "Value": value}) - return tags - - -def _render_tagging_document(tags: list[dict[str, str]]) -> Element: - root = Element("Tagging") - tagset_el = SubElement(root, "TagSet") - for tag in tags: - tag_el = SubElement(tagset_el, "Tag") - SubElement(tag_el, "Key").text = tag.get("Key", "") - SubElement(tag_el, "Value").text = tag.get("Value", "") - return root - -DANGEROUS_CONTENT_TYPES = frozenset([ - "text/html", - "application/xhtml+xml", - "application/javascript", - "text/javascript", - "application/x-javascript", - "text/ecmascript", - "application/ecmascript", - "image/svg+xml", -]) - -SAFE_EXTENSION_MAP = { - ".txt": ["text/plain"], - ".json": ["application/json"], - ".xml": ["application/xml", "text/xml"], - ".csv": ["text/csv"], - ".pdf": ["application/pdf"], - ".png": ["image/png"], - ".jpg": ["image/jpeg"], - ".jpeg": ["image/jpeg"], - ".gif": ["image/gif"], - ".webp": ["image/webp"], - ".mp4": ["video/mp4"], - ".mp3": ["audio/mpeg"], - ".zip": ["application/zip"], - ".gz": ["application/gzip"], - ".tar": ["application/x-tar"], -} - - -def _validate_content_type(object_key: str, content_type: str | None) -> str | None: - """Validate Content-Type header for security. - - Returns an error message if validation fails, None otherwise. - - Rules: - 1. Block dangerous MIME types that can execute scripts (unless explicitly allowed) - 2. Warn if Content-Type doesn't match file extension (but don't block) - """ - if not content_type: - return None - - base_type = content_type.split(";")[0].strip().lower() - - if base_type in DANGEROUS_CONTENT_TYPES: - ext = "." + object_key.rsplit(".", 1)[-1].lower() if "." in object_key else "" - - allowed_dangerous = { - ".svg": "image/svg+xml", - ".html": "text/html", - ".htm": "text/html", - ".xhtml": "application/xhtml+xml", - ".js": "application/javascript", - ".mjs": "application/javascript", - } - - if ext in allowed_dangerous and base_type == allowed_dangerous[ext]: - return None - - return ( - f"Content-Type '{content_type}' is potentially dangerous and not allowed " - f"for object key '{object_key}'. Use a safe Content-Type or rename the file " - f"with an appropriate extension." - ) - - return None - - -def _parse_cors_document(payload: bytes) -> list[dict[str, Any]]: - try: - root = _parse_xml_with_limit(payload) - except ParseError as exc: - raise ValueError("Malformed XML") from exc - if _strip_ns(root.tag) != "CORSConfiguration": - raise ValueError("Root element must be CORSConfiguration") - rules: list[dict[str, Any]] = [] - for rule_el in list(root): - if _strip_ns(rule_el.tag) != "CORSRule": - continue - rule: dict[str, Any] = { - "AllowedOrigins": [], - "AllowedMethods": [], - "AllowedHeaders": [], - "ExposeHeaders": [], - } - for child in list(rule_el): - name = _strip_ns(child.tag) - if name == "AllowedOrigin": - rule["AllowedOrigins"].append((child.text or "")) - elif name == "AllowedMethod": - rule["AllowedMethods"].append((child.text or "")) - elif name == "AllowedHeader": - rule["AllowedHeaders"].append((child.text or "")) - elif name == "ExposeHeader": - rule["ExposeHeaders"].append((child.text or "")) - elif name == "MaxAgeSeconds": - try: - rule["MaxAgeSeconds"] = int(child.text or 0) - except ValueError: - raise ValueError("MaxAgeSeconds must be an integer") from None - rules.append(rule) - return rules - - -def _render_cors_document(rules: list[dict[str, Any]]) -> Element: - root = Element("CORSConfiguration") - for rule in rules: - rule_el = SubElement(root, "CORSRule") - for origin in rule.get("AllowedOrigins", []): - SubElement(rule_el, "AllowedOrigin").text = origin - for method in rule.get("AllowedMethods", []): - SubElement(rule_el, "AllowedMethod").text = method - for header in rule.get("AllowedHeaders", []): - SubElement(rule_el, "AllowedHeader").text = header - for header in rule.get("ExposeHeaders", []): - SubElement(rule_el, "ExposeHeader").text = header - if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None: - SubElement(rule_el, "MaxAgeSeconds").text = str(rule["MaxAgeSeconds"]) - return root - - -def _parse_encryption_document(payload: bytes) -> dict[str, Any]: - try: - root = _parse_xml_with_limit(payload) - except ParseError as exc: - raise ValueError("Malformed XML") from exc - if _strip_ns(root.tag) != "ServerSideEncryptionConfiguration": - raise ValueError("Root element must be ServerSideEncryptionConfiguration") - rules: list[dict[str, Any]] = [] - for rule_el in list(root): - if _strip_ns(rule_el.tag) != "Rule": - continue - default_el = None - bucket_key_el = None - for child in list(rule_el): - name = _strip_ns(child.tag) - if name == "ApplyServerSideEncryptionByDefault": - default_el = child - elif name == "BucketKeyEnabled": - bucket_key_el = child - if default_el is None: - continue - algo_el = default_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}SSEAlgorithm") - if algo_el is None: - algo_el = default_el.find("SSEAlgorithm") - if algo_el is None or not (algo_el.text or "").strip(): - raise ValueError("SSEAlgorithm is required") - rule: dict[str, Any] = {"SSEAlgorithm": algo_el.text.strip()} - kms_el = default_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}KMSMasterKeyID") - if kms_el is None: - kms_el = default_el.find("KMSMasterKeyID") - if kms_el is not None and kms_el.text: - rule["KMSMasterKeyID"] = kms_el.text.strip() - if bucket_key_el is not None and bucket_key_el.text: - rule["BucketKeyEnabled"] = bucket_key_el.text.strip().lower() in {"true", "1"} - rules.append(rule) - if not rules: - raise ValueError("At least one Rule is required") - return {"Rules": rules} - - -def _render_encryption_document(config: dict[str, Any]) -> Element: - root = Element("ServerSideEncryptionConfiguration") - for rule in config.get("Rules", []): - rule_el = SubElement(root, "Rule") - default_el = SubElement(rule_el, "ApplyServerSideEncryptionByDefault") - SubElement(default_el, "SSEAlgorithm").text = rule.get("SSEAlgorithm", "") - if rule.get("KMSMasterKeyID"): - SubElement(default_el, "KMSMasterKeyID").text = rule["KMSMasterKeyID"] - if "BucketKeyEnabled" in rule: - SubElement(rule_el, "BucketKeyEnabled").text = "true" if rule["BucketKeyEnabled"] else "false" - return root - - -def _stream_file(path, chunk_size: int = 1024 * 1024): - with path.open("rb") as handle: - while True: - chunk = handle.read(chunk_size) - if not chunk: - break - yield chunk - - -def _method_not_allowed(allowed: list[str]) -> Response: - response = _error_response( - "MethodNotAllowed", - "The specified method is not allowed for this resource", - 405, - ) - response.headers["Allow"] = ", ".join(sorted({method.upper() for method in allowed})) - return response - - -def _check_conditional_headers(etag: str, last_modified: float | None) -> Response | None: - from email.utils import parsedate_to_datetime - - if_match = request.headers.get("If-Match") - if if_match: - if if_match.strip() != "*": - match_etags = [e.strip().strip('"') for e in if_match.split(",")] - if etag not in match_etags: - return Response(status=412) - - if_unmodified = request.headers.get("If-Unmodified-Since") - if not if_match and if_unmodified and last_modified is not None: - try: - dt = parsedate_to_datetime(if_unmodified) - obj_dt = datetime.fromtimestamp(last_modified, timezone.utc) - if obj_dt > dt: - return Response(status=412) - except (TypeError, ValueError): - pass - - if_none_match = request.headers.get("If-None-Match") - if if_none_match: - if if_none_match.strip() == "*": - resp = Response(status=304) - resp.headers["ETag"] = f'"{etag}"' - if last_modified is not None: - resp.headers["Last-Modified"] = http_date(last_modified) - return resp - none_match_etags = [e.strip().strip('"') for e in if_none_match.split(",")] - if etag in none_match_etags: - resp = Response(status=304) - resp.headers["ETag"] = f'"{etag}"' - if last_modified is not None: - resp.headers["Last-Modified"] = http_date(last_modified) - return resp - - if_modified = request.headers.get("If-Modified-Since") - if not if_none_match and if_modified and last_modified is not None: - try: - dt = parsedate_to_datetime(if_modified) - obj_dt = datetime.fromtimestamp(last_modified, timezone.utc) - if obj_dt <= dt: - resp = Response(status=304) - resp.headers["ETag"] = f'"{etag}"' - resp.headers["Last-Modified"] = http_date(last_modified) - return resp - except (TypeError, ValueError): - pass - - return None - - -def _apply_object_headers( - response: Response, - *, - file_stat, - metadata: Dict[str, str] | None, - etag: str, - size_override: int | None = None, - mtime_override: float | None = None, -) -> None: - effective_size = size_override if size_override is not None else (file_stat.st_size if file_stat is not None else None) - effective_mtime = mtime_override if mtime_override is not None else (file_stat.st_mtime if file_stat is not None else None) - if effective_size is not None and response.status_code != 206: - response.headers["Content-Length"] = str(effective_size) - if effective_mtime is not None: - response.headers["Last-Modified"] = http_date(effective_mtime) - response.headers["ETag"] = f'"{etag}"' - response.headers["Accept-Ranges"] = "bytes" - for key, value in (metadata or {}).items(): - if key.startswith("__") and key.endswith("__"): - continue - safe_value = _sanitize_header_value(str(value)) - response.headers[f"X-Amz-Meta-{key}"] = safe_value - - -def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None: - handlers = { - "versioning": _bucket_versioning_handler, - "tagging": _bucket_tagging_handler, - "cors": _bucket_cors_handler, - "encryption": _bucket_encryption_handler, - "location": _bucket_location_handler, - "acl": _bucket_acl_handler, - "versions": _bucket_list_versions_handler, - "lifecycle": _bucket_lifecycle_handler, - "quota": _bucket_quota_handler, - "object-lock": _bucket_object_lock_handler, - "notification": _bucket_notification_handler, - "logging": _bucket_logging_handler, - "uploads": _bucket_uploads_handler, - "policy": _bucket_policy_handler, - "policyStatus": _bucket_policy_status_handler, - "replication": _bucket_replication_handler, - "website": _bucket_website_handler, - } - requested = [key for key in handlers if key in request.args] - if not requested: - return None - if len(requested) > 1: - return _error_response( - "InvalidRequest", - "Only a single bucket subresource can be requested at a time", - 400, - ) - handler = handlers[requested[0]] - return handler(bucket_name) - - -def _bucket_versioning_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT"}: - return _method_not_allowed(["GET", "PUT"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "versioning") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - - if request.method == "PUT": - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body is required", 400) - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - if _strip_ns(root.tag) != "VersioningConfiguration": - return _error_response("MalformedXML", "Root element must be VersioningConfiguration", 400) - status_el = root.find("{http://s3.amazonaws.com/doc/2006-03-01/}Status") - if status_el is None: - status_el = root.find("Status") - status = (status_el.text or "").strip() if status_el is not None else "" - if status not in {"Enabled", "Suspended", ""}: - return _error_response("MalformedXML", "Status must be Enabled or Suspended", 400) - try: - storage.set_bucket_versioning(bucket_name, status == "Enabled") - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket versioning updated", extra={"bucket": bucket_name, "status": status}) - return Response(status=200) - - try: - enabled = storage.is_versioning_enabled(bucket_name) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - root = Element("VersioningConfiguration") - SubElement(root, "Status").text = "Enabled" if enabled else "Suspended" - return _xml_response(root) - - -def _bucket_tagging_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "tagging") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if request.method == "GET": - try: - tags = storage.get_bucket_tags(bucket_name) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - if not tags: - return _error_response("NoSuchTagSet", "No tags are configured for this bucket", 404) - return _xml_response(_render_tagging_document(tags)) - if request.method == "DELETE": - try: - storage.set_bucket_tags(bucket_name, None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket tags deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - try: - tags = _parse_tagging_document(payload) - except ValueError as exc: - return _error_response("MalformedXML", str(exc), 400) - tag_limit = current_app.config.get("OBJECT_TAG_LIMIT", 50) - if len(tags) > tag_limit: - return _error_response("InvalidTag", f"A maximum of {tag_limit} tags is supported", 400) - try: - storage.set_bucket_tags(bucket_name, tags) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket tags updated", extra={"bucket": bucket_name, "tags": len(tags)}) - return Response(status=204) - - -def _object_tagging_handler(bucket_name: str, object_key: str) -> Response: - """Handle object tagging operations (GET/PUT/DELETE //?tagging).""" - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - - principal, error = _require_principal() - if error: - return error - - action = "read" if request.method == "GET" else "write" - try: - _authorize_action(principal, bucket_name, action, object_key=object_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - - if request.method == "GET": - try: - tags = storage.get_object_tags(bucket_name, object_key) - except BucketNotFoundError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - except ObjectNotFoundError as exc: - return _error_response("NoSuchKey", str(exc), 404) - except StorageError as exc: - return _error_response("InternalError", str(exc), 500) - return _xml_response(_render_tagging_document(tags)) - - if request.method == "DELETE": - try: - storage.delete_object_tags(bucket_name, object_key) - except BucketNotFoundError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - except ObjectNotFoundError as exc: - return _error_response("NoSuchKey", str(exc), 404) - except StorageError as exc: - return _error_response("InternalError", str(exc), 500) - current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key}) - return Response(status=204) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - try: - tags = _parse_tagging_document(payload) - except ValueError as exc: - return _error_response("MalformedXML", str(exc), 400) - if len(tags) > 10: - return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400) - try: - storage.set_object_tags(bucket_name, object_key, tags) - except BucketNotFoundError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - except ObjectNotFoundError as exc: - return _error_response("NoSuchKey", str(exc), 404) - except StorageError as exc: - return _error_response("InternalError", str(exc), 500) - current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)}) - return Response(status=204) - - -def _validate_cors_origin(origin: str) -> bool: - """Validate a CORS origin pattern.""" - import re - origin = origin.strip() - if not origin: - return False - if origin == "*": - return True - if origin.startswith("*."): - domain = origin[2:] - if not domain or ".." in domain: - return False - return bool(re.match(r'^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$', domain)) - if origin.startswith(("http://", "https://")): - try: - from urllib.parse import urlparse - parsed = urlparse(origin) - if not parsed.netloc: - return False - if parsed.path and parsed.path != "/": - return False - return True - except Exception: - return False - return False - - -def _sanitize_cors_rules(rules: list[dict[str, Any]]) -> list[dict[str, Any]]: - sanitized: list[dict[str, Any]] = [] - for rule in rules: - allowed_origins = [origin.strip() for origin in rule.get("AllowedOrigins", []) if origin and origin.strip()] - allowed_methods = [method.strip().upper() for method in rule.get("AllowedMethods", []) if method and method.strip()] - allowed_headers = [header.strip() for header in rule.get("AllowedHeaders", []) if header and header.strip()] - expose_headers = [header.strip() for header in rule.get("ExposeHeaders", []) if header and header.strip()] - if not allowed_origins or not allowed_methods: - raise ValueError("Each CORSRule must include AllowedOrigin and AllowedMethod entries") - for origin in allowed_origins: - if not _validate_cors_origin(origin): - raise ValueError(f"Invalid CORS origin: {origin}") - valid_methods = {"GET", "PUT", "POST", "DELETE", "HEAD"} - for method in allowed_methods: - if method not in valid_methods: - raise ValueError(f"Invalid CORS method: {method}") - sanitized_rule: dict[str, Any] = { - "AllowedOrigins": allowed_origins, - "AllowedMethods": allowed_methods, - } - if allowed_headers: - sanitized_rule["AllowedHeaders"] = allowed_headers - if expose_headers: - sanitized_rule["ExposeHeaders"] = expose_headers - if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None: - sanitized_rule["MaxAgeSeconds"] = int(rule["MaxAgeSeconds"]) - sanitized.append(sanitized_rule) - return sanitized - - -def _bucket_cors_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "cors") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if request.method == "GET": - try: - rules = storage.get_bucket_cors(bucket_name) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - if not rules: - return _error_response("NoSuchCORSConfiguration", "No CORS configuration found", 404) - return _xml_response(_render_cors_document(rules)) - if request.method == "DELETE": - try: - storage.set_bucket_cors(bucket_name, None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket CORS deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - try: - storage.set_bucket_cors(bucket_name, None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket CORS cleared", extra={"bucket": bucket_name}) - return Response(status=204) - try: - rules = _parse_cors_document(payload) - sanitized = _sanitize_cors_rules(rules) - except ValueError as exc: - return _error_response("MalformedXML", str(exc), 400) - if not sanitized: - return _error_response("InvalidRequest", "At least one CORSRule must be supplied", 400) - try: - storage.set_bucket_cors(bucket_name, sanitized) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket CORS updated", extra={"bucket": bucket_name, "rules": len(sanitized)}) - return Response(status=204) - - -def _bucket_encryption_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "encryption") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if request.method == "GET": - try: - config = storage.get_bucket_encryption(bucket_name) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - if not config: - return _error_response( - "ServerSideEncryptionConfigurationNotFoundError", - "No server-side encryption configuration found", - 404, - ) - return _xml_response(_render_encryption_document(config)) - if request.method == "DELETE": - try: - storage.set_bucket_encryption(bucket_name, None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket encryption deleted", extra={"bucket": bucket_name}) - return Response(status=204) - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - try: - storage.set_bucket_encryption(bucket_name, None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket encryption cleared", extra={"bucket": bucket_name}) - return Response(status=204) - try: - config = _parse_encryption_document(payload) - except ValueError as exc: - return _error_response("MalformedXML", str(exc), 400) - try: - storage.set_bucket_encryption(bucket_name, config) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket encryption updated", extra={"bucket": bucket_name}) - return Response(status=204) - - -def _bucket_location_handler(bucket_name: str) -> Response: - if request.method != "GET": - return _method_not_allowed(["GET"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "list") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - region = current_app.config.get("AWS_REGION", "us-east-1") - root = Element("LocationConstraint") - root.text = region if region != "us-east-1" else None - return _xml_response(root) - - -def _bucket_acl_handler(bucket_name: str) -> Response: - from .acl import create_canned_acl, Acl, AclGrant, GRANTEE_ALL_USERS, GRANTEE_AUTHENTICATED_USERS - - if request.method not in {"GET", "PUT"}: - return _method_not_allowed(["GET", "PUT"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "share") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - acl_service = _acl() - owner_id = principal.access_key if principal else "anonymous" - - if request.method == "PUT": - canned_acl = request.headers.get("x-amz-acl", "private") - acl = acl_service.set_bucket_canned_acl(bucket_name, canned_acl, owner_id) - current_app.logger.info("Bucket ACL set", extra={"bucket": bucket_name, "acl": canned_acl}) - return Response(status=200) - - acl = acl_service.get_bucket_acl(bucket_name) - if not acl: - acl = create_canned_acl("private", owner_id) - - root = Element("AccessControlPolicy") - owner_el = SubElement(root, "Owner") - SubElement(owner_el, "ID").text = acl.owner - SubElement(owner_el, "DisplayName").text = acl.owner - - acl_el = SubElement(root, "AccessControlList") - for grant in acl.grants: - grant_el = SubElement(acl_el, "Grant") - grantee = SubElement(grant_el, "Grantee") - if grant.grantee == GRANTEE_ALL_USERS: - grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group") - SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AllUsers" - elif grant.grantee == GRANTEE_AUTHENTICATED_USERS: - grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group") - SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AuthenticatedUsers" - else: - grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser") - SubElement(grantee, "ID").text = grant.grantee - SubElement(grantee, "DisplayName").text = grant.grantee - SubElement(grant_el, "Permission").text = grant.permission - - return _xml_response(root) - - -def _object_acl_handler(bucket_name: str, object_key: str) -> Response: - from .acl import create_canned_acl, GRANTEE_ALL_USERS, GRANTEE_AUTHENTICATED_USERS - - if request.method not in {"GET", "PUT"}: - return _method_not_allowed(["GET", "PUT"]) - storage = _storage() - try: - path = storage.get_object_path(bucket_name, object_key) - except (StorageError, FileNotFoundError): - return _error_response("NoSuchKey", "Object not found", 404) - - if request.method == "PUT": - principal, error = _object_principal("write", bucket_name, object_key) - if error: - return error - owner_id = principal.access_key if principal else "anonymous" - canned_acl = request.headers.get("x-amz-acl", "private") - acl = create_canned_acl(canned_acl, owner_id) - acl_service = _acl() - metadata = storage.get_object_metadata(bucket_name, object_key) - metadata.update(acl_service.create_object_acl_metadata(acl)) - safe_key = storage._sanitize_object_key(object_key, storage._object_key_max_length_bytes) - storage._write_metadata(bucket_name, safe_key, metadata) - current_app.logger.info("Object ACL set", extra={"bucket": bucket_name, "key": object_key, "acl": canned_acl}) - return Response(status=200) - - principal, error = _object_principal("read", bucket_name, object_key) - if error: - return error - owner_id = principal.access_key if principal else "anonymous" - acl_service = _acl() - metadata = storage.get_object_metadata(bucket_name, object_key) - acl = acl_service.get_object_acl(bucket_name, object_key, metadata) - if not acl: - acl = create_canned_acl("private", owner_id) - - root = Element("AccessControlPolicy") - owner_el = SubElement(root, "Owner") - SubElement(owner_el, "ID").text = acl.owner - SubElement(owner_el, "DisplayName").text = acl.owner - acl_el = SubElement(root, "AccessControlList") - for grant in acl.grants: - grant_el = SubElement(acl_el, "Grant") - grantee = SubElement(grant_el, "Grantee") - if grant.grantee == GRANTEE_ALL_USERS: - grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group") - SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AllUsers" - elif grant.grantee == GRANTEE_AUTHENTICATED_USERS: - grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group") - SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AuthenticatedUsers" - else: - grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser") - SubElement(grantee, "ID").text = grant.grantee - SubElement(grantee, "DisplayName").text = grant.grantee - SubElement(grant_el, "Permission").text = grant.permission - return _xml_response(root) - - -def _object_attributes_handler(bucket_name: str, object_key: str) -> Response: - if request.method != "GET": - return _method_not_allowed(["GET"]) - principal, error = _object_principal("read", bucket_name, object_key) - if error: - return error - storage = _storage() - try: - path = storage.get_object_path(bucket_name, object_key) - file_stat = path.stat() - metadata = storage.get_object_metadata(bucket_name, object_key) - except (StorageError, FileNotFoundError): - return _error_response("NoSuchKey", "Object not found", 404) - - requested = request.headers.get("x-amz-object-attributes", "") - attrs = {a.strip() for a in requested.split(",") if a.strip()} - - root = Element("GetObjectAttributesResponse") - if "ETag" in attrs: - etag = metadata.get("__etag__") or storage._compute_etag(path) - SubElement(root, "ETag").text = etag - if "StorageClass" in attrs: - SubElement(root, "StorageClass").text = "STANDARD" - if "ObjectSize" in attrs: - SubElement(root, "ObjectSize").text = str(file_stat.st_size) - if "Checksum" in attrs: - SubElement(root, "Checksum") - if "ObjectParts" in attrs: - SubElement(root, "ObjectParts") - - response = _xml_response(root) - response.headers["Last-Modified"] = http_date(file_stat.st_mtime) - return response - - -def _bucket_list_versions_handler(bucket_name: str) -> Response: - """Handle ListObjectVersions (GET /?versions).""" - if request.method != "GET": - return _method_not_allowed(["GET"]) - - principal, error = _require_principal() - try: - _authorize_action(principal, bucket_name, "list") - except IamError as exc: - if error: - return error - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - - try: - objects = storage.list_objects_all(bucket_name) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - - prefix = request.args.get("prefix", "") - delimiter = request.args.get("delimiter", "") - try: - max_keys = int(request.args.get("max-keys", 1000)) - if max_keys < 1: - return _error_response("InvalidArgument", "max-keys must be a positive integer", 400) - max_keys = min(max_keys, 1000) - except ValueError: - return _error_response("InvalidArgument", "max-keys must be an integer", 400) - key_marker = request.args.get("key-marker", "") - - if prefix: - objects = [obj for obj in objects if obj.key.startswith(prefix)] - - if key_marker: - objects = [obj for obj in objects if obj.key > key_marker] - - root = Element("ListVersionsResult", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - SubElement(root, "Name").text = bucket_name - SubElement(root, "Prefix").text = prefix - SubElement(root, "KeyMarker").text = key_marker - SubElement(root, "MaxKeys").text = str(max_keys) - if delimiter: - SubElement(root, "Delimiter").text = delimiter - - version_count = 0 - is_truncated = False - next_key_marker = "" - - for obj in objects: - if version_count >= max_keys: - is_truncated = True - break - - version = SubElement(root, "Version") - SubElement(version, "Key").text = obj.key - SubElement(version, "VersionId").text = "null" - SubElement(version, "IsLatest").text = "true" - SubElement(version, "LastModified").text = obj.last_modified.strftime("%Y-%m-%dT%H:%M:%S.000Z") - if obj.etag: - SubElement(version, "ETag").text = f'"{obj.etag}"' - SubElement(version, "Size").text = str(obj.size) - SubElement(version, "StorageClass").text = "STANDARD" - - owner = SubElement(version, "Owner") - SubElement(owner, "ID").text = "local-owner" - SubElement(owner, "DisplayName").text = "Local Owner" - - version_count += 1 - next_key_marker = obj.key - - try: - versions = storage.list_object_versions(bucket_name, obj.key) - for v in versions: - if version_count >= max_keys: - is_truncated = True - break - - ver_elem = SubElement(root, "Version") - SubElement(ver_elem, "Key").text = obj.key - SubElement(ver_elem, "VersionId").text = v.get("version_id", "unknown") - SubElement(ver_elem, "IsLatest").text = "false" - SubElement(ver_elem, "LastModified").text = v.get("archived_at") or "1970-01-01T00:00:00Z" - SubElement(ver_elem, "ETag").text = f'"{v.get("etag", "")}"' - SubElement(ver_elem, "Size").text = str(v.get("size", 0)) - SubElement(ver_elem, "StorageClass").text = "STANDARD" - - owner = SubElement(ver_elem, "Owner") - SubElement(owner, "ID").text = "local-owner" - SubElement(owner, "DisplayName").text = "Local Owner" - - version_count += 1 - except StorageError: - pass - - SubElement(root, "IsTruncated").text = "true" if is_truncated else "false" - if is_truncated and next_key_marker: - SubElement(root, "NextKeyMarker").text = next_key_marker - - return _xml_response(root) - - -def _bucket_lifecycle_handler(bucket_name: str) -> Response: - """Handle bucket lifecycle configuration (GET/PUT/DELETE /?lifecycle).""" - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "lifecycle") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - if request.method == "GET": - config = storage.get_bucket_lifecycle(bucket_name) - if not config: - return _error_response("NoSuchLifecycleConfiguration", "The lifecycle configuration does not exist", 404) - return _xml_response(_render_lifecycle_config(config)) - - if request.method == "DELETE": - storage.set_bucket_lifecycle(bucket_name, None) - current_app.logger.info("Bucket lifecycle deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body is required", 400) - try: - config = _parse_lifecycle_config(payload) - storage.set_bucket_lifecycle(bucket_name, config) - except ValueError as exc: - return _error_response("MalformedXML", str(exc), 400) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - - current_app.logger.info("Bucket lifecycle updated", extra={"bucket": bucket_name}) - return Response(status=200) - - -def _render_lifecycle_config(config: list) -> Element: - """Render lifecycle configuration to XML.""" - root = Element("LifecycleConfiguration", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - for rule in config: - rule_el = SubElement(root, "Rule") - SubElement(rule_el, "ID").text = rule.get("ID", "") - - filter_el = SubElement(rule_el, "Filter") - if rule.get("Prefix"): - SubElement(filter_el, "Prefix").text = rule.get("Prefix", "") - - SubElement(rule_el, "Status").text = rule.get("Status", "Enabled") - - if "Expiration" in rule: - exp = rule["Expiration"] - exp_el = SubElement(rule_el, "Expiration") - if "Days" in exp: - SubElement(exp_el, "Days").text = str(exp["Days"]) - if "Date" in exp: - SubElement(exp_el, "Date").text = exp["Date"] - if exp.get("ExpiredObjectDeleteMarker"): - SubElement(exp_el, "ExpiredObjectDeleteMarker").text = "true" - - if "NoncurrentVersionExpiration" in rule: - nve = rule["NoncurrentVersionExpiration"] - nve_el = SubElement(rule_el, "NoncurrentVersionExpiration") - if "NoncurrentDays" in nve: - SubElement(nve_el, "NoncurrentDays").text = str(nve["NoncurrentDays"]) - - if "AbortIncompleteMultipartUpload" in rule: - aimu = rule["AbortIncompleteMultipartUpload"] - aimu_el = SubElement(rule_el, "AbortIncompleteMultipartUpload") - if "DaysAfterInitiation" in aimu: - SubElement(aimu_el, "DaysAfterInitiation").text = str(aimu["DaysAfterInitiation"]) - - return root - - -def _parse_lifecycle_config(payload: bytes) -> list: - """Parse lifecycle configuration from XML.""" - try: - root = _parse_xml_with_limit(payload) - except ParseError as exc: - raise ValueError(f"Unable to parse XML document: {exc}") from exc - - if _strip_ns(root.tag) != "LifecycleConfiguration": - raise ValueError("Root element must be LifecycleConfiguration") - - rules = [] - for rule_el in root.findall("{http://s3.amazonaws.com/doc/2006-03-01/}Rule") or root.findall("Rule"): - rule: dict = {} - - id_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}ID") or rule_el.find("ID") - if id_el is not None and id_el.text: - rule["ID"] = id_el.text.strip() - - filter_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Filter") or rule_el.find("Filter") - if filter_el is not None: - prefix_el = filter_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Prefix") or filter_el.find("Prefix") - if prefix_el is not None and prefix_el.text: - rule["Prefix"] = prefix_el.text - - if "Prefix" not in rule: - prefix_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Prefix") or rule_el.find("Prefix") - if prefix_el is not None: - rule["Prefix"] = prefix_el.text or "" - - status_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Status") or rule_el.find("Status") - rule["Status"] = (status_el.text or "Enabled").strip() if status_el is not None else "Enabled" - - exp_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Expiration") or rule_el.find("Expiration") - if exp_el is not None: - expiration: dict = {} - days_el = exp_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Days") or exp_el.find("Days") - if days_el is not None and days_el.text: - days_val = int(days_el.text.strip()) - if days_val <= 0: - raise ValueError("Expiration Days must be a positive integer") - expiration["Days"] = days_val - date_el = exp_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Date") or exp_el.find("Date") - if date_el is not None and date_el.text: - expiration["Date"] = date_el.text.strip() - eodm_el = exp_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}ExpiredObjectDeleteMarker") or exp_el.find("ExpiredObjectDeleteMarker") - if eodm_el is not None and (eodm_el.text or "").strip().lower() in {"true", "1"}: - expiration["ExpiredObjectDeleteMarker"] = True - if expiration: - rule["Expiration"] = expiration - - nve_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}NoncurrentVersionExpiration") or rule_el.find("NoncurrentVersionExpiration") - if nve_el is not None: - nve: dict = {} - days_el = nve_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}NoncurrentDays") or nve_el.find("NoncurrentDays") - if days_el is not None and days_el.text: - noncurrent_days = int(days_el.text.strip()) - if noncurrent_days <= 0: - raise ValueError("NoncurrentDays must be a positive integer") - nve["NoncurrentDays"] = noncurrent_days - if nve: - rule["NoncurrentVersionExpiration"] = nve - - aimu_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}AbortIncompleteMultipartUpload") or rule_el.find("AbortIncompleteMultipartUpload") - if aimu_el is not None: - aimu: dict = {} - days_el = aimu_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}DaysAfterInitiation") or aimu_el.find("DaysAfterInitiation") - if days_el is not None and days_el.text: - days_after = int(days_el.text.strip()) - if days_after <= 0: - raise ValueError("DaysAfterInitiation must be a positive integer") - aimu["DaysAfterInitiation"] = days_after - if aimu: - rule["AbortIncompleteMultipartUpload"] = aimu - - rules.append(rule) - - return rules - - -def _bucket_quota_handler(bucket_name: str) -> Response: - """Handle bucket quota configuration (GET/PUT/DELETE /?quota).""" - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "quota") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - if request.method == "GET": - quota = storage.get_bucket_quota(bucket_name) - if not quota: - return _error_response("NoSuchQuotaConfiguration", "No quota configuration found", 404) - - stats = storage.bucket_stats(bucket_name) - return jsonify({ - "quota": quota, - "usage": { - "bytes": stats.get("bytes", 0), - "objects": stats.get("objects", 0), - } - }) - - if request.method == "DELETE": - try: - storage.set_bucket_quota(bucket_name, max_bytes=None, max_objects=None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket quota deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - payload = request.get_json(silent=True) - if not payload: - return _error_response("MalformedRequest", "Request body must be JSON with quota limits", 400) - - max_size_bytes = payload.get("max_size_bytes") - max_objects = payload.get("max_objects") - - if max_size_bytes is None and max_objects is None: - return _error_response("InvalidArgument", "At least one of max_size_bytes or max_objects is required", 400) - - if max_size_bytes is not None: - try: - max_size_bytes = int(max_size_bytes) - if max_size_bytes < 0: - raise ValueError("must be non-negative") - except (TypeError, ValueError) as exc: - return _error_response("InvalidArgument", f"max_size_bytes {exc}", 400) - - if max_objects is not None: - try: - max_objects = int(max_objects) - if max_objects < 0: - raise ValueError("must be non-negative") - except (TypeError, ValueError) as exc: - return _error_response("InvalidArgument", f"max_objects {exc}", 400) - - try: - storage.set_bucket_quota(bucket_name, max_bytes=max_size_bytes, max_objects=max_objects) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - - current_app.logger.info( - "Bucket quota updated", - extra={"bucket": bucket_name, "max_size_bytes": max_size_bytes, "max_objects": max_objects} - ) - return Response(status=204) - - -def _bucket_object_lock_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT"}: - return _method_not_allowed(["GET", "PUT"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "object_lock") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - lock_service = _object_lock() - - if request.method == "GET": - config = lock_service.get_bucket_lock_config(bucket_name) - root = Element("ObjectLockConfiguration", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - SubElement(root, "ObjectLockEnabled").text = "Enabled" if config.enabled else "Disabled" - return _xml_response(root) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body is required", 400) - - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - - enabled_el = root.find("{http://s3.amazonaws.com/doc/2006-03-01/}ObjectLockEnabled") or root.find("ObjectLockEnabled") - enabled = (enabled_el.text or "").strip() == "Enabled" if enabled_el is not None else False - - config = ObjectLockConfig(enabled=enabled) - lock_service.set_bucket_lock_config(bucket_name, config) - - current_app.logger.info("Bucket object lock updated", extra={"bucket": bucket_name, "enabled": enabled}) - return Response(status=200) - - -def _bucket_notification_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "notification") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - notification_service = _notifications() - - if request.method == "GET": - configs = notification_service.get_bucket_notifications(bucket_name) - root = Element("NotificationConfiguration", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - for config in configs: - webhook_el = SubElement(root, "WebhookConfiguration") - SubElement(webhook_el, "Id").text = config.id - for event in config.events: - SubElement(webhook_el, "Event").text = event - dest_el = SubElement(webhook_el, "Destination") - SubElement(dest_el, "Url").text = config.destination.url - if config.prefix_filter or config.suffix_filter: - filter_el = SubElement(webhook_el, "Filter") - key_el = SubElement(filter_el, "S3Key") - if config.prefix_filter: - rule_el = SubElement(key_el, "FilterRule") - SubElement(rule_el, "Name").text = "prefix" - SubElement(rule_el, "Value").text = config.prefix_filter - if config.suffix_filter: - rule_el = SubElement(key_el, "FilterRule") - SubElement(rule_el, "Name").text = "suffix" - SubElement(rule_el, "Value").text = config.suffix_filter - return _xml_response(root) - - if request.method == "DELETE": - notification_service.delete_bucket_notifications(bucket_name) - current_app.logger.info("Bucket notifications deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - notification_service.delete_bucket_notifications(bucket_name) - return Response(status=200) - - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - - configs: list[NotificationConfiguration] = [] - for webhook_el in root.findall("{http://s3.amazonaws.com/doc/2006-03-01/}WebhookConfiguration") or root.findall("WebhookConfiguration"): - config_id = _find_element_text(webhook_el, "Id") or uuid.uuid4().hex - events = [el.text for el in webhook_el.findall("{http://s3.amazonaws.com/doc/2006-03-01/}Event") or webhook_el.findall("Event") if el.text] - - dest_el = _find_element(webhook_el, "Destination") - url = _find_element_text(dest_el, "Url") if dest_el else "" - if not url: - return _error_response("InvalidArgument", "Destination URL is required", 400) - - prefix = "" - suffix = "" - filter_el = _find_element(webhook_el, "Filter") - if filter_el: - key_el = _find_element(filter_el, "S3Key") - if key_el: - for rule_el in key_el.findall("{http://s3.amazonaws.com/doc/2006-03-01/}FilterRule") or key_el.findall("FilterRule"): - name = _find_element_text(rule_el, "Name") - value = _find_element_text(rule_el, "Value") - if name == "prefix": - prefix = value - elif name == "suffix": - suffix = value - - configs.append(NotificationConfiguration( - id=config_id, - events=events, - destination=WebhookDestination(url=url), - prefix_filter=prefix, - suffix_filter=suffix, - )) - - notification_service.set_bucket_notifications(bucket_name, configs) - current_app.logger.info("Bucket notifications updated", extra={"bucket": bucket_name, "configs": len(configs)}) - return Response(status=200) - - -def _bucket_logging_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "logging") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - logging_service = _access_logging() - - if request.method == "GET": - config = logging_service.get_bucket_logging(bucket_name) - root = Element("BucketLoggingStatus", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - if config and config.enabled: - logging_enabled = SubElement(root, "LoggingEnabled") - SubElement(logging_enabled, "TargetBucket").text = config.target_bucket - SubElement(logging_enabled, "TargetPrefix").text = config.target_prefix - return _xml_response(root) - - if request.method == "DELETE": - logging_service.delete_bucket_logging(bucket_name) - current_app.logger.info("Bucket logging deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - logging_service.delete_bucket_logging(bucket_name) - return Response(status=200) - - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - - logging_enabled = _find_element(root, "LoggingEnabled") - if logging_enabled is None: - logging_service.delete_bucket_logging(bucket_name) - return Response(status=200) - - target_bucket = _find_element_text(logging_enabled, "TargetBucket") - if not target_bucket: - return _error_response("InvalidArgument", "TargetBucket is required", 400) - - if not storage.bucket_exists(target_bucket): - return _error_response("InvalidTargetBucketForLogging", "Target bucket does not exist", 400) - - target_prefix = _find_element_text(logging_enabled, "TargetPrefix") - - config = LoggingConfiguration( - target_bucket=target_bucket, - target_prefix=target_prefix, - enabled=True, - ) - logging_service.set_bucket_logging(bucket_name, config) - - current_app.logger.info( - "Bucket logging updated", - extra={"bucket": bucket_name, "target_bucket": target_bucket, "target_prefix": target_prefix} - ) - return Response(status=200) - - -def _bucket_uploads_handler(bucket_name: str) -> Response: - if request.method != "GET": - return _method_not_allowed(["GET"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "list") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - key_marker = request.args.get("key-marker", "") - upload_id_marker = request.args.get("upload-id-marker", "") - prefix = request.args.get("prefix", "") - delimiter = request.args.get("delimiter", "") - try: - max_uploads = int(request.args.get("max-uploads", 1000)) - if max_uploads < 1: - return _error_response("InvalidArgument", "max-uploads must be a positive integer", 400) - max_uploads = min(max_uploads, 1000) - except ValueError: - return _error_response("InvalidArgument", "max-uploads must be an integer", 400) - - uploads = storage.list_multipart_uploads(bucket_name, include_orphaned=True) - - if prefix: - uploads = [u for u in uploads if u["object_key"].startswith(prefix)] - if key_marker: - uploads = [u for u in uploads if u["object_key"] > key_marker or - (u["object_key"] == key_marker and upload_id_marker and u["upload_id"] > upload_id_marker)] - - uploads.sort(key=lambda u: (u["object_key"], u["upload_id"])) - - is_truncated = len(uploads) > max_uploads - if is_truncated: - uploads = uploads[:max_uploads] - - root = Element("ListMultipartUploadsResult", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - SubElement(root, "Bucket").text = bucket_name - SubElement(root, "KeyMarker").text = key_marker - SubElement(root, "UploadIdMarker").text = upload_id_marker - if prefix: - SubElement(root, "Prefix").text = prefix - if delimiter: - SubElement(root, "Delimiter").text = delimiter - SubElement(root, "MaxUploads").text = str(max_uploads) - SubElement(root, "IsTruncated").text = "true" if is_truncated else "false" - - if is_truncated and uploads: - SubElement(root, "NextKeyMarker").text = uploads[-1]["object_key"] - SubElement(root, "NextUploadIdMarker").text = uploads[-1]["upload_id"] - - for upload in uploads: - upload_el = SubElement(root, "Upload") - SubElement(upload_el, "Key").text = upload["object_key"] - SubElement(upload_el, "UploadId").text = upload["upload_id"] - if upload.get("created_at"): - SubElement(upload_el, "Initiated").text = upload["created_at"] - if upload.get("orphaned"): - SubElement(upload_el, "StorageClass").text = "ORPHANED" - - return _xml_response(root) - - -def _object_retention_handler(bucket_name: str, object_key: str) -> Response: - if request.method not in {"GET", "PUT"}: - return _method_not_allowed(["GET", "PUT"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "object_lock", object_key=object_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - try: - storage.get_object_path(bucket_name, object_key) - except StorageError: - return _error_response("NoSuchKey", "Object does not exist", 404) - - lock_service = _object_lock() - - if request.method == "GET": - retention = lock_service.get_object_retention(bucket_name, object_key) - if not retention: - return _error_response("NoSuchObjectLockConfiguration", "No retention policy", 404) - - root = Element("Retention", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - SubElement(root, "Mode").text = retention.mode.value - SubElement(root, "RetainUntilDate").text = retention.retain_until_date.strftime("%Y-%m-%dT%H:%M:%S.000Z") - return _xml_response(root) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body is required", 400) - - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - - mode_str = _find_element_text(root, "Mode") - retain_until_str = _find_element_text(root, "RetainUntilDate") - - if not mode_str or not retain_until_str: - return _error_response("InvalidArgument", "Mode and RetainUntilDate are required", 400) - - try: - mode = RetentionMode(mode_str) - except ValueError: - return _error_response("InvalidArgument", f"Invalid retention mode: {mode_str}", 400) - - try: - retain_until = datetime.fromisoformat(retain_until_str.replace("Z", "+00:00")) - except ValueError: - return _error_response("InvalidArgument", f"Invalid date format: {retain_until_str}", 400) - - bypass = request.headers.get("x-amz-bypass-governance-retention", "").lower() == "true" - - retention = ObjectLockRetention(mode=mode, retain_until_date=retain_until) - try: - lock_service.set_object_retention(bucket_name, object_key, retention, bypass_governance=bypass) - except ObjectLockError as exc: - return _error_response("AccessDenied", str(exc), 403) - - current_app.logger.info( - "Object retention set", - extra={"bucket": bucket_name, "key": object_key, "mode": mode_str, "until": retain_until_str} - ) - return Response(status=200) - - -def _object_legal_hold_handler(bucket_name: str, object_key: str) -> Response: - if request.method not in {"GET", "PUT"}: - return _method_not_allowed(["GET", "PUT"]) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "object_lock", object_key=object_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - - try: - storage.get_object_path(bucket_name, object_key) - except StorageError: - return _error_response("NoSuchKey", "Object does not exist", 404) - - lock_service = _object_lock() - - if request.method == "GET": - enabled = lock_service.get_legal_hold(bucket_name, object_key) - root = Element("LegalHold", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") - SubElement(root, "Status").text = "ON" if enabled else "OFF" - return _xml_response(root) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body is required", 400) - - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - - status = _find_element_text(root, "Status") - if status not in {"ON", "OFF"}: - return _error_response("InvalidArgument", "Status must be ON or OFF", 400) - - lock_service.set_legal_hold(bucket_name, object_key, status == "ON") - - current_app.logger.info( - "Object legal hold set", - extra={"bucket": bucket_name, "key": object_key, "status": status} - ) - return Response(status=200) - - -def _bulk_delete_handler(bucket_name: str) -> Response: - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "delete") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body must include a Delete specification", 400) - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - if _strip_ns(root.tag) != "Delete": - return _error_response("MalformedXML", "Root element must be Delete", 400) - - quiet = False - objects: list[dict[str, str | None]] = [] - for child in list(root): - name = _strip_ns(child.tag) - if name == "Quiet": - quiet = (child.text or "").strip().lower() in {"true", "1"} - continue - if name != "Object": - continue - key_text = "" - version_text: str | None = None - for entry in list(child): - entry_name = _strip_ns(entry.tag) - if entry_name == "Key": - key_text = (entry.text or "").strip() - elif entry_name == "VersionId": - version_text = (entry.text or "").strip() or None - if not key_text: - continue - objects.append({"Key": key_text, "VersionId": version_text}) - - if not objects: - return _error_response("MalformedXML", "At least one Object entry is required", 400) - if len(objects) > 1000: - return _error_response("MalformedXML", "A maximum of 1000 objects can be deleted per request", 400) - - storage = _storage() - deleted: list[dict[str, str | None]] = [] - errors: list[dict[str, str]] = [] - for entry in objects: - key = entry["Key"] or "" - version_id = entry.get("VersionId") - try: - if version_id: - storage.delete_object_version(bucket_name, key, version_id) - deleted.append({"Key": key, "VersionId": version_id}) - else: - storage.delete_object(bucket_name, key) - deleted.append({"Key": key, "VersionId": None}) - except StorageError as exc: - errors.append({"Key": key, "Code": "InvalidRequest", "Message": str(exc)}) - - result = Element("DeleteResult") - if not quiet: - for item in deleted: - deleted_el = SubElement(result, "Deleted") - SubElement(deleted_el, "Key").text = item["Key"] - if item.get("VersionId"): - SubElement(deleted_el, "VersionId").text = item["VersionId"] - for err in errors: - error_el = SubElement(result, "Error") - SubElement(error_el, "Key").text = err.get("Key", "") - SubElement(error_el, "Code").text = err.get("Code", "InvalidRequest") - SubElement(error_el, "Message").text = err.get("Message", "Request failed") - - current_app.logger.info( - "Bulk object delete", - extra={"bucket": bucket_name, "deleted": len(deleted), "errors": len(errors)}, - ) - return _xml_response(result, status=200) - - -def _post_object(bucket_name: str) -> Response: - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - object_key = request.form.get("key") - policy_b64 = request.form.get("policy") - signature = request.form.get("x-amz-signature") - credential = request.form.get("x-amz-credential") - algorithm = request.form.get("x-amz-algorithm") - amz_date = request.form.get("x-amz-date") - if not all([object_key, policy_b64, signature, credential, algorithm, amz_date]): - return _error_response("InvalidArgument", "Missing required form fields", 400) - if algorithm != "AWS4-HMAC-SHA256": - return _error_response("InvalidArgument", "Unsupported signing algorithm", 400) - try: - policy_json = base64.b64decode(policy_b64).decode("utf-8") - policy = __import__("json").loads(policy_json) - except (ValueError, __import__("json").JSONDecodeError) as exc: - return _error_response("InvalidPolicyDocument", f"Invalid policy: {exc}", 400) - expiration = policy.get("expiration") - if expiration: - try: - exp_time = datetime.fromisoformat(expiration.replace("Z", "+00:00")) - if datetime.now(timezone.utc) > exp_time: - return _error_response("AccessDenied", "Policy expired", 403) - except ValueError: - return _error_response("InvalidPolicyDocument", "Invalid expiration format", 400) - conditions = policy.get("conditions", []) - validation_error = _validate_post_policy_conditions(bucket_name, object_key, conditions, request.form, request.content_length or 0) - if validation_error: - return _error_response("AccessDenied", validation_error, 403) - try: - parts = credential.split("/") - if len(parts) != 5: - raise ValueError("Invalid credential format") - access_key, date_stamp, region, service, _ = parts - except ValueError: - return _error_response("InvalidArgument", "Invalid credential format", 400) - secret_key = _iam().get_secret_key(access_key) - if not secret_key: - return _error_response("AccessDenied", "Invalid access key", 403) - signing_key = _derive_signing_key(secret_key, date_stamp, region, service) - expected_signature = hmac.new(signing_key, policy_b64.encode("utf-8"), hashlib.sha256).hexdigest() - if not hmac.compare_digest(expected_signature, signature): - return _error_response("SignatureDoesNotMatch", "Signature verification failed", 403) - principal = _iam().get_principal(access_key) - if not principal: - return _error_response("AccessDenied", "Invalid access key", 403) - if "${filename}" in object_key: - temp_key = object_key.replace("${filename}", request.files.get("file").filename if request.files.get("file") else "upload") - else: - temp_key = object_key - try: - _authorize_action(principal, bucket_name, "write", object_key=temp_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - file = request.files.get("file") - if not file: - return _error_response("InvalidArgument", "Missing file field", 400) - if "${filename}" in object_key: - object_key = object_key.replace("${filename}", file.filename or "upload") - metadata = {} - for field_name, value in request.form.items(): - if field_name.lower().startswith("x-amz-meta-"): - key = field_name[11:] - if key and not (key.startswith("__") and key.endswith("__")): - metadata[key] = value - try: - meta = storage.put_object(bucket_name, object_key, file.stream, metadata=metadata or None) - except QuotaExceededError as exc: - return _error_response("QuotaExceeded", str(exc), 403) - except StorageError as exc: - return _error_response("InvalidArgument", str(exc), 400) - current_app.logger.info("Object uploaded via POST", extra={"bucket": bucket_name, "key": object_key, "size": meta.size}) - success_action_status = request.form.get("success_action_status", "204") - success_action_redirect = request.form.get("success_action_redirect") - if success_action_redirect: - allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", []) - if not allowed_hosts: - current_app.logger.warning( - "ALLOWED_REDIRECT_HOSTS not configured, falling back to request Host header. " - "Set ALLOWED_REDIRECT_HOSTS for production deployments." - ) - allowed_hosts = [request.host] - parsed = urlparse(success_action_redirect) - if parsed.scheme not in ("http", "https"): - return _error_response("InvalidArgument", "Redirect URL must use http or https", 400) - if parsed.netloc not in allowed_hosts: - return _error_response("InvalidArgument", "Redirect URL host not allowed", 400) - redirect_url = f"{success_action_redirect}?bucket={bucket_name}&key={quote(object_key)}&etag={meta.etag}" - return Response(status=303, headers={"Location": redirect_url}) - if success_action_status == "200": - root = Element("PostResponse") - SubElement(root, "Location").text = f"/{bucket_name}/{object_key}" - SubElement(root, "Bucket").text = bucket_name - SubElement(root, "Key").text = object_key - SubElement(root, "ETag").text = f'"{meta.etag}"' - return _xml_response(root, status=200) - if success_action_status == "201": - root = Element("PostResponse") - SubElement(root, "Location").text = f"/{bucket_name}/{object_key}" - SubElement(root, "Bucket").text = bucket_name - SubElement(root, "Key").text = object_key - SubElement(root, "ETag").text = f'"{meta.etag}"' - return _xml_response(root, status=201) - return Response(status=204) - - -def _validate_post_policy_conditions(bucket_name: str, object_key: str, conditions: list, form_data, content_length: int) -> Optional[str]: - for condition in conditions: - if isinstance(condition, dict): - for key, expected_value in condition.items(): - if key == "bucket": - if bucket_name != expected_value: - return f"Bucket must be {expected_value}" - elif key == "key": - if object_key != expected_value: - return f"Key must be {expected_value}" - else: - actual_value = form_data.get(key, "") - if actual_value != expected_value: - return f"Field {key} must be {expected_value}" - elif isinstance(condition, list) and len(condition) >= 2: - operator = condition[0].lower() if isinstance(condition[0], str) else "" - if operator == "starts-with" and len(condition) == 3: - field = condition[1].lstrip("$") - prefix = condition[2] - if field == "key": - if not object_key.startswith(prefix): - return f"Key must start with {prefix}" - else: - actual_value = form_data.get(field, "") - if not actual_value.startswith(prefix): - return f"Field {field} must start with {prefix}" - elif operator == "eq" and len(condition) == 3: - field = condition[1].lstrip("$") - expected = condition[2] - if field == "key": - if object_key != expected: - return f"Key must equal {expected}" - else: - actual_value = form_data.get(field, "") - if actual_value != expected: - return f"Field {field} must equal {expected}" - elif operator == "content-length-range" and len(condition) == 3: - try: - min_size, max_size = int(condition[1]), int(condition[2]) - except (TypeError, ValueError): - return "Invalid content-length-range values" - if content_length < min_size or content_length > max_size: - return f"Content length must be between {min_size} and {max_size}" - return None - - -@s3_api_bp.get("/") -@limiter.limit(_get_list_buckets_limit) -def list_buckets() -> Response: - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, None, "list") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - root = Element("ListAllMyBucketsResult") - owner = SubElement(root, "Owner") - SubElement(owner, "ID").text = principal.access_key - SubElement(owner, "DisplayName").text = principal.display_name - buckets_el = SubElement(root, "Buckets") - - storage_buckets = _storage().list_buckets() - allowed = set(_iam().buckets_for_principal(principal, [b.name for b in storage_buckets])) - for bucket in storage_buckets: - if bucket.name not in allowed: - continue - bucket_el = SubElement(buckets_el, "Bucket") - SubElement(bucket_el, "Name").text = bucket.name - SubElement(bucket_el, "CreationDate").text = bucket.created_at.isoformat() - - return _xml_response(root) - - -@s3_api_bp.route("/", methods=["PUT", "DELETE", "GET", "POST"], strict_slashes=False) -@limiter.limit(_get_bucket_ops_limit) -def bucket_handler(bucket_name: str) -> Response: - storage = _storage() - subresource_response = _maybe_handle_bucket_subresource(bucket_name) - if subresource_response is not None: - return subresource_response - - if request.method == "POST": - if "delete" in request.args: - return _bulk_delete_handler(bucket_name) - content_type = request.headers.get("Content-Type", "") - if "multipart/form-data" in content_type: - return _post_object(bucket_name) - return _method_not_allowed(["GET", "PUT", "DELETE"]) - - if request.method == "PUT": - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "create_bucket") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - try: - storage.create_bucket(bucket_name) - except FileExistsError: - return _error_response("BucketAlreadyExists", "Bucket exists", 409) - except StorageError as exc: - return _error_response("InvalidBucketName", str(exc), 400) - current_app.logger.info("Bucket created", extra={"bucket": bucket_name}) - return Response(status=200) - - if request.method == "DELETE": - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "delete_bucket") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - try: - storage.delete_bucket(bucket_name) - _bucket_policies().delete_policy(bucket_name) - _replication_manager().delete_rule(bucket_name) - except StorageError as exc: - code = "BucketNotEmpty" if "not empty" in str(exc) else "NoSuchBucket" - status = 409 if code == "BucketNotEmpty" else 404 - return _error_response(code, str(exc), status) - current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name}) - return Response(status=204) - - principal, error = _require_principal() - try: - _authorize_action(principal, bucket_name, "list") - except IamError as exc: - if error: - return error - return _error_response("AccessDenied", str(exc), 403) - - list_type = request.args.get("list-type") - prefix = request.args.get("prefix", "") - delimiter = request.args.get("delimiter", "") - try: - max_keys = int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])) - if max_keys < 1: - return _error_response("InvalidArgument", "max-keys must be a positive integer", 400) - max_keys = min(max_keys, 1000) - except ValueError: - return _error_response("InvalidArgument", "max-keys must be an integer", 400) - - marker = request.args.get("marker", "") # ListObjects v1 - continuation_token = request.args.get("continuation-token", "") # ListObjectsV2 - start_after = request.args.get("start-after", "") # ListObjectsV2 - - effective_start = "" - if list_type == "2": - if continuation_token: - try: - effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8") - except (ValueError, UnicodeDecodeError): - return _error_response("InvalidArgument", "Invalid continuation token", 400) - elif start_after: - effective_start = start_after - else: - effective_start = marker - - try: - if delimiter: - shallow_result = storage.list_objects_shallow( - bucket_name, - prefix=prefix, - delimiter=delimiter, - max_keys=max_keys, - continuation_token=effective_start or None, - ) - objects = shallow_result.objects - common_prefixes = shallow_result.common_prefixes - is_truncated = shallow_result.is_truncated - - next_marker = shallow_result.next_continuation_token or "" - next_continuation_token = "" - if is_truncated and next_marker and list_type == "2": - next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8") - else: - list_result = storage.list_objects( - bucket_name, - max_keys=max_keys, - continuation_token=effective_start or None, - prefix=prefix or None, - ) - objects = list_result.objects - common_prefixes = [] - is_truncated = list_result.is_truncated - - next_marker = "" - next_continuation_token = "" - if is_truncated: - if objects: - next_marker = objects[-1].key - if list_type == "2" and next_marker: - next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8") - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - - if list_type == "2": - root = Element("ListBucketResult") - SubElement(root, "Name").text = bucket_name - SubElement(root, "Prefix").text = prefix - SubElement(root, "MaxKeys").text = str(max_keys) - SubElement(root, "KeyCount").text = str(len(objects) + len(common_prefixes)) - SubElement(root, "IsTruncated").text = "true" if is_truncated else "false" - if delimiter: - SubElement(root, "Delimiter").text = delimiter - - continuation_token = request.args.get("continuation-token", "") - start_after = request.args.get("start-after", "") - if continuation_token: - SubElement(root, "ContinuationToken").text = continuation_token - if start_after: - SubElement(root, "StartAfter").text = start_after - - if is_truncated and next_continuation_token: - SubElement(root, "NextContinuationToken").text = next_continuation_token - - for meta in objects: - obj_el = SubElement(root, "Contents") - SubElement(obj_el, "Key").text = meta.key - SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat() - if meta.etag: - SubElement(obj_el, "ETag").text = f'"{meta.etag}"' - SubElement(obj_el, "Size").text = str(meta.size) - SubElement(obj_el, "StorageClass").text = "STANDARD" - - for cp in common_prefixes: - cp_el = SubElement(root, "CommonPrefixes") - SubElement(cp_el, "Prefix").text = cp - else: - root = Element("ListBucketResult") - SubElement(root, "Name").text = bucket_name - SubElement(root, "Prefix").text = prefix - SubElement(root, "Marker").text = marker - SubElement(root, "MaxKeys").text = str(max_keys) - SubElement(root, "IsTruncated").text = "true" if is_truncated else "false" - if delimiter: - SubElement(root, "Delimiter").text = delimiter - - if is_truncated and delimiter and next_marker: - SubElement(root, "NextMarker").text = next_marker - - for meta in objects: - obj_el = SubElement(root, "Contents") - SubElement(obj_el, "Key").text = meta.key - SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat() - if meta.etag: - SubElement(obj_el, "ETag").text = f'"{meta.etag}"' - SubElement(obj_el, "Size").text = str(meta.size) - - for cp in common_prefixes: - cp_el = SubElement(root, "CommonPrefixes") - SubElement(cp_el, "Prefix").text = cp - - return _xml_response(root) - - -@s3_api_bp.route("//", methods=["PUT", "GET", "DELETE", "HEAD", "POST"], strict_slashes=False) -@limiter.limit(_get_object_ops_limit) -def object_handler(bucket_name: str, object_key: str): - storage = _storage() - - if "tagging" in request.args: - return _object_tagging_handler(bucket_name, object_key) - - if "retention" in request.args: - return _object_retention_handler(bucket_name, object_key) - - if "legal-hold" in request.args: - return _object_legal_hold_handler(bucket_name, object_key) - - if "acl" in request.args: - return _object_acl_handler(bucket_name, object_key) - - if "attributes" in request.args: - return _object_attributes_handler(bucket_name, object_key) - - if request.method == "POST": - if "uploads" in request.args: - return _initiate_multipart_upload(bucket_name, object_key) - if "uploadId" in request.args: - return _complete_multipart_upload(bucket_name, object_key) - if "select" in request.args: - return _select_object_content(bucket_name, object_key) - return _method_not_allowed(["GET", "PUT", "DELETE", "HEAD", "POST"]) - - if request.method == "PUT": - if "partNumber" in request.args and "uploadId" in request.args: - return _upload_part(bucket_name, object_key) - - copy_source = request.headers.get("x-amz-copy-source") - if copy_source: - return _copy_object(bucket_name, object_key, copy_source) - - principal, error = _object_principal("write", bucket_name, object_key) - if error: - return error - - bypass_governance = request.headers.get("x-amz-bypass-governance-retention", "").lower() == "true" - lock_service = _object_lock() - can_overwrite, lock_reason = lock_service.can_overwrite_object(bucket_name, object_key, bypass_governance=bypass_governance) - if not can_overwrite: - return _error_response("AccessDenied", lock_reason, 403) - - stream = request.stream - content_encoding = request.headers.get("Content-Encoding", "").lower() - if "aws-chunked" in content_encoding: - stream = AwsChunkedDecoder(stream) - - metadata = _extract_request_metadata() - - content_type = request.headers.get("Content-Type") - validation_error = _validate_content_type(object_key, content_type) - if validation_error: - return _error_response("InvalidArgument", validation_error, 400) - - metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream" - - try: - meta = storage.put_object( - bucket_name, - object_key, - stream, - metadata=metadata or None, - ) - except QuotaExceededError as exc: - return _error_response("QuotaExceeded", str(exc), 403) - except StorageError as exc: - message = str(exc) - if "Bucket" in message: - return _error_response("NoSuchBucket", message, 404) - return _error_response("InvalidArgument", message, 400) - - content_md5 = request.headers.get("Content-MD5") - if content_md5 and meta.etag: - try: - expected_md5 = base64.b64decode(content_md5).hex() - except Exception: - storage.delete_object(bucket_name, object_key) - return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400) - if expected_md5 != meta.etag: - storage.delete_object(bucket_name, object_key) - return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400) - - if current_app.logger.isEnabledFor(logging.INFO): - current_app.logger.info( - "Object uploaded", - extra={"bucket": bucket_name, "key": object_key, "size": meta.size}, - ) - response = Response(status=200) - if meta.etag: - response.headers["ETag"] = f'"{meta.etag}"' - - _notifications().emit_object_created( - bucket_name, - object_key, - size=meta.size, - etag=meta.etag, - request_id=getattr(g, "request_id", ""), - source_ip=request.remote_addr or "", - user_identity=principal.access_key if principal else "", - operation="Put", - ) - - user_agent = request.headers.get("User-Agent", "") - if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent: - _replication_manager().trigger_replication(bucket_name, object_key, action="write") - - return response - - if request.method in {"GET", "HEAD"}: - if request.method == "GET" and "uploadId" in request.args: - return _list_parts(bucket_name, object_key) - - _, error = _object_principal("read", bucket_name, object_key) - if error: - return error - try: - path = storage.get_object_path(bucket_name, object_key) - except StorageError as exc: - return _error_response("NoSuchKey", str(exc), 404) - metadata = storage.get_object_metadata(bucket_name, object_key) - mimetype = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream" - - is_encrypted = "x-amz-server-side-encryption" in metadata - - cond_etag = metadata.get("__etag__") - _etag_was_healed = False - if not cond_etag and not is_encrypted: - try: - cond_etag = storage._compute_etag(path) - _etag_was_healed = True - storage.heal_missing_etag(bucket_name, object_key, cond_etag) - except OSError: - cond_etag = None - if cond_etag: - cond_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None - if cond_mtime is None: - try: - cond_mtime = path.stat().st_mtime - except OSError: - pass - cond_resp = _check_conditional_headers(cond_etag, cond_mtime) - if cond_resp: - return cond_resp - - if request.method == "GET": - range_header = request.headers.get("Range") - - if is_encrypted and hasattr(storage, 'get_object_data'): - try: - data, clean_metadata = storage.get_object_data(bucket_name, object_key) - file_size = len(data) - etag = hashlib.md5(data).hexdigest() - - if range_header: - try: - ranges = _parse_range_header(range_header, file_size) - except (ValueError, TypeError): - ranges = None - if ranges is None: - return _error_response("InvalidRange", "Range Not Satisfiable", 416) - start, end = ranges[0] - partial_data = data[start:end + 1] - response = Response(partial_data, status=206, mimetype=mimetype) - response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}" - response.headers["Content-Length"] = len(partial_data) - logged_bytes = len(partial_data) - else: - response = Response(data, mimetype=mimetype) - response.headers["Content-Length"] = file_size - logged_bytes = file_size - except StorageError as exc: - return _error_response("InternalError", str(exc), 500) - else: - try: - stat = path.stat() - file_size = stat.st_size - etag = cond_etag or storage._compute_etag(path) - except PermissionError: - return _error_response("AccessDenied", "Permission denied accessing object", 403) - except OSError as exc: - return _error_response("InternalError", f"Failed to access object: {exc}", 500) - - if range_header: - try: - ranges = _parse_range_header(range_header, file_size) - except (ValueError, TypeError): - ranges = None - if ranges is None: - return _error_response("InvalidRange", "Range Not Satisfiable", 416) - start, end = ranges[0] - length = end - start + 1 - - def stream_range(file_path, start_pos, length_to_read): - with open(file_path, "rb") as f: - f.seek(start_pos) - remaining = length_to_read - while remaining > 0: - chunk_size = min(262144, remaining) - chunk = f.read(chunk_size) - if not chunk: - break - remaining -= len(chunk) - yield chunk - - response = Response(stream_range(path, start, length), status=206, mimetype=mimetype, direct_passthrough=True) - response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}" - response.headers["Content-Length"] = length - logged_bytes = length - else: - response = Response(_stream_file(path), mimetype=mimetype, direct_passthrough=True) - logged_bytes = file_size - else: - if is_encrypted and hasattr(storage, 'get_object_data'): - try: - data, _ = storage.get_object_data(bucket_name, object_key) - response = Response(status=200) - response.headers["Content-Length"] = len(data) - etag = hashlib.md5(data).hexdigest() - except StorageError as exc: - return _error_response("InternalError", str(exc), 500) - else: - try: - stat = path.stat() - response = Response(status=200) - etag = cond_etag or storage._compute_etag(path) - except PermissionError: - return _error_response("AccessDenied", "Permission denied accessing object", 403) - except OSError as exc: - return _error_response("InternalError", f"Failed to access object: {exc}", 500) - response.headers["Content-Type"] = mimetype - logged_bytes = 0 - - file_stat = stat if not is_encrypted else None - _apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag) - - if request.method == "GET": - response_overrides = { - "response-content-type": "Content-Type", - "response-content-language": "Content-Language", - "response-expires": "Expires", - "response-cache-control": "Cache-Control", - "response-content-disposition": "Content-Disposition", - "response-content-encoding": "Content-Encoding", - } - for param, header in response_overrides.items(): - value = request.args.get(param) - if value: - response.headers[header] = _sanitize_header_value(value) - - if current_app.logger.isEnabledFor(logging.INFO): - action = "Object read" if request.method == "GET" else "Object head" - current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes}) - return response - - if "uploadId" in request.args: - return _abort_multipart_upload(bucket_name, object_key) - - _, error = _object_principal("delete", bucket_name, object_key) - if error: - return error - - bypass_governance = request.headers.get("x-amz-bypass-governance-retention", "").lower() == "true" - lock_service = _object_lock() - can_delete, lock_reason = lock_service.can_delete_object(bucket_name, object_key, bypass_governance=bypass_governance) - if not can_delete: - return _error_response("AccessDenied", lock_reason, 403) - - storage.delete_object(bucket_name, object_key) - lock_service.delete_object_lock_metadata(bucket_name, object_key) - if current_app.logger.isEnabledFor(logging.INFO): - current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key}) - - principal, _ = _require_principal() - _notifications().emit_object_removed( - bucket_name, - object_key, - request_id=getattr(g, "request_id", ""), - source_ip=request.remote_addr or "", - user_identity=principal.access_key if principal else "", - ) - - user_agent = request.headers.get("User-Agent", "") - if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent: - _replication_manager().trigger_replication(bucket_name, object_key, action="delete") - - return Response(status=204) - - -def _list_parts(bucket_name: str, object_key: str) -> Response: - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "read", object_key=object_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - upload_id = request.args.get("uploadId") - if not upload_id: - return _error_response("InvalidArgument", "uploadId is required", 400) - - try: - parts = _storage().list_multipart_parts(bucket_name, upload_id) - except StorageError as exc: - return _error_response("NoSuchUpload", str(exc), 404) - - root = Element("ListPartsResult") - SubElement(root, "Bucket").text = bucket_name - SubElement(root, "Key").text = object_key - SubElement(root, "UploadId").text = upload_id - - initiator = SubElement(root, "Initiator") - SubElement(initiator, "ID").text = principal.access_key - SubElement(initiator, "DisplayName").text = principal.display_name - - owner = SubElement(root, "Owner") - SubElement(owner, "ID").text = principal.access_key - SubElement(owner, "DisplayName").text = principal.display_name - - SubElement(root, "StorageClass").text = "STANDARD" - SubElement(root, "PartNumberMarker").text = "0" - SubElement(root, "NextPartNumberMarker").text = str(parts[-1]["PartNumber"]) if parts else "0" - SubElement(root, "MaxParts").text = "1000" - SubElement(root, "IsTruncated").text = "false" - - for part in parts: - p = SubElement(root, "Part") - SubElement(p, "PartNumber").text = str(part["PartNumber"]) - SubElement(p, "LastModified").text = part["LastModified"].isoformat() - SubElement(p, "ETag").text = f'"{part["ETag"]}"' - SubElement(p, "Size").text = str(part["Size"]) - - return _xml_response(root) - - -def _bucket_policy_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "policy") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - store = _bucket_policies() - if request.method == "GET": - policy = store.get_policy(bucket_name) - if not policy: - return _error_response("NoSuchBucketPolicy", "No bucket policy attached", 404) - return jsonify(policy) - if request.method == "DELETE": - store.delete_policy(bucket_name) - current_app.logger.info("Bucket policy removed", extra={"bucket": bucket_name}) - return Response(status=204) - raw_body = request.get_data(cache=False) or b"" - try: - payload = json.loads(raw_body) - except (json.JSONDecodeError, ValueError): - return _error_response("MalformedPolicy", "Policy document must be JSON", 400) - if not payload: - return _error_response("MalformedPolicy", "Policy document must be JSON", 400) - try: - store.set_policy(bucket_name, payload) - current_app.logger.info("Bucket policy updated", extra={"bucket": bucket_name}) - except ValueError as exc: - return _error_response("MalformedPolicy", str(exc), 400) - return Response(status=204) - - -def _bucket_policy_status_handler(bucket_name: str) -> Response: - if request.method != "GET": - return _method_not_allowed(["GET"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "policy") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - store = _bucket_policies() - policy = store.get_policy(bucket_name) - is_public = False - if policy: - for statement in policy.get("Statement", []): - if statement.get("Effect") == "Allow" and statement.get("Principal") == "*": - is_public = True - break - root = Element("PolicyStatus") - SubElement(root, "IsPublic").text = "TRUE" if is_public else "FALSE" - return _xml_response(root) - - -def _bucket_replication_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "replication") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if not storage.bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket does not exist", 404) - replication = _replication_manager() - if request.method == "GET": - rule = replication.get_rule(bucket_name) - if not rule: - return _error_response("ReplicationConfigurationNotFoundError", "Replication configuration not found", 404) - return _xml_response(_render_replication_config(rule)) - if request.method == "DELETE": - replication.delete_rule(bucket_name) - current_app.logger.info("Bucket replication removed", extra={"bucket": bucket_name}) - return Response(status=204) - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - try: - rule = _parse_replication_config(bucket_name, payload) - except ValueError as exc: - return _error_response("MalformedXML", str(exc), 400) - replication.set_rule(rule) - current_app.logger.info("Bucket replication updated", extra={"bucket": bucket_name}) - return Response(status=200) - - -def _parse_replication_config(bucket_name: str, payload: bytes): - from .replication import ReplicationRule, REPLICATION_MODE_ALL - root = _parse_xml_with_limit(payload) - if _strip_ns(root.tag) != "ReplicationConfiguration": - raise ValueError("Root element must be ReplicationConfiguration") - rule_el = None - for child in list(root): - if _strip_ns(child.tag) == "Rule": - rule_el = child - break - if rule_el is None: - raise ValueError("At least one Rule is required") - status_el = _find_element(rule_el, "Status") - status = status_el.text if status_el is not None and status_el.text else "Enabled" - enabled = status.lower() == "enabled" - filter_prefix = None - filter_el = _find_element(rule_el, "Filter") - if filter_el is not None: - prefix_el = _find_element(filter_el, "Prefix") - if prefix_el is not None and prefix_el.text: - filter_prefix = prefix_el.text - dest_el = _find_element(rule_el, "Destination") - if dest_el is None: - raise ValueError("Destination element is required") - bucket_el = _find_element(dest_el, "Bucket") - if bucket_el is None or not bucket_el.text: - raise ValueError("Destination Bucket is required") - target_bucket, target_connection_id = _parse_destination_arn(bucket_el.text) - sync_deletions = True - dm_el = _find_element(rule_el, "DeleteMarkerReplication") - if dm_el is not None: - dm_status_el = _find_element(dm_el, "Status") - if dm_status_el is not None and dm_status_el.text: - sync_deletions = dm_status_el.text.lower() == "enabled" - return ReplicationRule( - bucket_name=bucket_name, - target_connection_id=target_connection_id, - target_bucket=target_bucket, - enabled=enabled, - mode=REPLICATION_MODE_ALL, - sync_deletions=sync_deletions, - filter_prefix=filter_prefix, - ) - - -def _bucket_website_handler(bucket_name: str) -> Response: - if request.method not in {"GET", "PUT", "DELETE"}: - return _method_not_allowed(["GET", "PUT", "DELETE"]) - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - return _error_response("InvalidRequest", "Website hosting is not enabled", 400) - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "website") - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - storage = _storage() - if request.method == "GET": - try: - config = storage.get_bucket_website(bucket_name) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - if not config: - return _error_response("NoSuchWebsiteConfiguration", "The specified bucket does not have a website configuration", 404) - root = Element("WebsiteConfiguration") - root.set("xmlns", S3_NS) - index_doc = config.get("index_document") - if index_doc: - idx_el = SubElement(root, "IndexDocument") - SubElement(idx_el, "Suffix").text = index_doc - error_doc = config.get("error_document") - if error_doc: - err_el = SubElement(root, "ErrorDocument") - SubElement(err_el, "Key").text = error_doc - return _xml_response(root) - if request.method == "DELETE": - try: - storage.set_bucket_website(bucket_name, None) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket website config deleted", extra={"bucket": bucket_name}) - return Response(status=204) - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - if not payload.strip(): - return _error_response("MalformedXML", "Request body is required", 400) - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - if _strip_ns(root.tag) != "WebsiteConfiguration": - return _error_response("MalformedXML", "Root element must be WebsiteConfiguration", 400) - index_el = _find_element(root, "IndexDocument") - if index_el is None: - return _error_response("InvalidArgument", "IndexDocument is required", 400) - suffix_el = _find_element(index_el, "Suffix") - if suffix_el is None or not (suffix_el.text or "").strip(): - return _error_response("InvalidArgument", "IndexDocument Suffix is required", 400) - index_suffix = suffix_el.text.strip() - if "/" in index_suffix: - return _error_response("InvalidArgument", "IndexDocument Suffix must not contain '/'", 400) - website_config: Dict[str, Any] = {"index_document": index_suffix} - error_el = _find_element(root, "ErrorDocument") - if error_el is not None: - key_el = _find_element(error_el, "Key") - if key_el is not None and (key_el.text or "").strip(): - website_config["error_document"] = key_el.text.strip() - try: - storage.set_bucket_website(bucket_name, website_config) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - current_app.logger.info("Bucket website config updated", extra={"bucket": bucket_name, "index": index_suffix}) - return Response(status=200) - - -def _parse_destination_arn(arn: str) -> tuple: - if not arn.startswith("arn:aws:s3:::"): - raise ValueError(f"Invalid ARN format: {arn}") - bucket_part = arn[13:] - if "/" in bucket_part: - connection_id, bucket_name = bucket_part.split("/", 1) - else: - connection_id = "local" - bucket_name = bucket_part - return bucket_name, connection_id - - -def _render_replication_config(rule) -> Element: - root = Element("ReplicationConfiguration") - SubElement(root, "Role").text = "arn:aws:iam::000000000000:role/replication" - rule_el = SubElement(root, "Rule") - SubElement(rule_el, "ID").text = f"{rule.bucket_name}-replication" - SubElement(rule_el, "Status").text = "Enabled" if rule.enabled else "Disabled" - SubElement(rule_el, "Priority").text = "1" - filter_el = SubElement(rule_el, "Filter") - if rule.filter_prefix: - SubElement(filter_el, "Prefix").text = rule.filter_prefix - dest_el = SubElement(rule_el, "Destination") - if rule.target_connection_id == "local": - arn = f"arn:aws:s3:::{rule.target_bucket}" - else: - arn = f"arn:aws:s3:::{rule.target_connection_id}/{rule.target_bucket}" - SubElement(dest_el, "Bucket").text = arn - dm_el = SubElement(rule_el, "DeleteMarkerReplication") - SubElement(dm_el, "Status").text = "Enabled" if rule.sync_deletions else "Disabled" - return root - - -@s3_api_bp.route("/", methods=["HEAD"]) -@limiter.limit(_get_head_ops_limit) -def head_bucket(bucket_name: str) -> Response: - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "list") - if not _storage().bucket_exists(bucket_name): - return _error_response("NoSuchBucket", "Bucket not found", 404) - return Response(status=200) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - -@s3_api_bp.route("//", methods=["HEAD"]) -@limiter.limit(_get_head_ops_limit) -def head_object(bucket_name: str, object_key: str) -> Response: - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, bucket_name, "read", object_key=object_key) - storage = _storage() - path = storage.get_object_path(bucket_name, object_key) - metadata = storage.get_object_metadata(bucket_name, object_key) - etag = metadata.get("__etag__") - if not etag: - etag = storage._compute_etag(path) - storage.heal_missing_etag(bucket_name, object_key, etag) - - head_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None - if head_mtime is None: - try: - head_mtime = path.stat().st_mtime - except OSError: - pass - cond_resp = _check_conditional_headers(etag, head_mtime) - if cond_resp: - return cond_resp - - cached_size = metadata.get("__size__") - cached_mtime = metadata.get("__last_modified__") - if cached_size is not None and cached_mtime is not None: - size_val = int(cached_size) - mtime_val = float(cached_mtime) - response = Response(status=200) - _apply_object_headers(response, file_stat=None, metadata=metadata, etag=etag, size_override=size_val, mtime_override=mtime_val) - else: - stat = path.stat() - response = Response(status=200) - _apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag) - response.headers["Content-Type"] = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream" - return response - except (StorageError, FileNotFoundError): - return _error_response("NoSuchKey", "Object not found", 404) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - -def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response: - """Handle S3 CopyObject operation.""" - from urllib.parse import unquote - copy_source = unquote(copy_source) - if copy_source.startswith("/"): - copy_source = copy_source[1:] - - parts = copy_source.split("/", 1) - if len(parts) != 2: - return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400) - - source_bucket, source_key = parts - if not source_bucket or not source_key: - return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400) - - principal, error = _require_principal() - if error: - return error - try: - _authorize_action(principal, source_bucket, "read", object_key=source_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - try: - _authorize_action(principal, dest_bucket, "write", object_key=dest_key) - except IamError as exc: - return _error_response("AccessDenied", str(exc), 403) - - storage = _storage() - - try: - source_path = storage.get_object_path(source_bucket, source_key) - except StorageError: - return _error_response("NoSuchKey", "Source object not found", 404) - - source_stat = source_path.stat() - source_etag = storage._compute_etag(source_path) - source_mtime = datetime.fromtimestamp(source_stat.st_mtime, timezone.utc) - - copy_source_if_match = request.headers.get("x-amz-copy-source-if-match") - if copy_source_if_match: - expected_etag = copy_source_if_match.strip('"') - if source_etag != expected_etag: - return _error_response("PreconditionFailed", "Source ETag does not match", 412) - - copy_source_if_none_match = request.headers.get("x-amz-copy-source-if-none-match") - if copy_source_if_none_match: - not_expected_etag = copy_source_if_none_match.strip('"') - if source_etag == not_expected_etag: - return _error_response("PreconditionFailed", "Source ETag matches", 412) - - copy_source_if_modified_since = request.headers.get("x-amz-copy-source-if-modified-since") - if copy_source_if_modified_since: - from email.utils import parsedate_to_datetime - try: - if_modified = parsedate_to_datetime(copy_source_if_modified_since) - if source_mtime <= if_modified: - return _error_response("PreconditionFailed", "Source not modified since specified date", 412) - except (TypeError, ValueError): - pass - - copy_source_if_unmodified_since = request.headers.get("x-amz-copy-source-if-unmodified-since") - if copy_source_if_unmodified_since: - from email.utils import parsedate_to_datetime - try: - if_unmodified = parsedate_to_datetime(copy_source_if_unmodified_since) - if source_mtime > if_unmodified: - return _error_response("PreconditionFailed", "Source modified since specified date", 412) - except (TypeError, ValueError): - pass - - source_metadata = storage.get_object_metadata(source_bucket, source_key) - - metadata_directive = request.headers.get("x-amz-metadata-directive", "COPY").upper() - if metadata_directive == "REPLACE": - metadata = _extract_request_metadata() - content_type = request.headers.get("Content-Type") - validation_error = _validate_content_type(dest_key, content_type) - if validation_error: - return _error_response("InvalidArgument", validation_error, 400) - else: - metadata = {k: v for k, v in source_metadata.items() if not (k.startswith("__") and k.endswith("__"))} - - try: - with source_path.open("rb") as stream: - meta = storage.put_object( - dest_bucket, - dest_key, - stream, - metadata=metadata or None, - ) - except StorageError as exc: - message = str(exc) - if "Bucket" in message: - return _error_response("NoSuchBucket", message, 404) - return _error_response("InvalidArgument", message, 400) - - current_app.logger.info( - "Object copied", - extra={ - "source_bucket": source_bucket, - "source_key": source_key, - "dest_bucket": dest_bucket, - "dest_key": dest_key, - "size": meta.size, - }, - ) - - user_agent = request.headers.get("User-Agent", "") - if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent: - _replication_manager().trigger_replication(dest_bucket, dest_key, action="write") - - root = Element("CopyObjectResult") - SubElement(root, "LastModified").text = meta.last_modified.isoformat() - if meta.etag: - SubElement(root, "ETag").text = f'"{meta.etag}"' - return _xml_response(root) - - -class AwsChunkedDecoder: - """Decodes aws-chunked encoded streams. - - Performance optimized with buffered line reading instead of byte-by-byte. - """ - - def __init__(self, stream): - self.stream = stream - self._read_buffer = bytearray() - self.chunk_remaining = 0 - self.finished = False - - def _read_line(self) -> bytes: - """Read until CRLF using buffered reads instead of byte-by-byte. - - Performance: Reads in batches of 64-256 bytes instead of 1 byte at a time. - """ - line = bytearray() - while True: - if self._read_buffer: - idx = self._read_buffer.find(b"\r\n") - if idx != -1: - line.extend(self._read_buffer[: idx + 2]) - del self._read_buffer[: idx + 2] - return bytes(line) - line.extend(self._read_buffer) - self._read_buffer.clear() - - chunk = self.stream.read(64) - if not chunk: - return bytes(line) if line else b"" - self._read_buffer.extend(chunk) - - def _read_exact(self, n: int) -> bytes: - """Read exactly n bytes, using buffer first.""" - result = bytearray() - if self._read_buffer: - take = min(len(self._read_buffer), n) - result.extend(self._read_buffer[:take]) - del self._read_buffer[:take] - n -= take - if n > 0: - data = self.stream.read(n) - if data: - result.extend(data) - - return bytes(result) - - def read(self, size=-1): - if self.finished: - return b"" - - result = bytearray() - while size == -1 or len(result) < size: - if self.chunk_remaining > 0: - to_read = self.chunk_remaining - if size != -1: - to_read = min(to_read, size - len(result)) - - chunk = self._read_exact(to_read) - if not chunk: - raise IOError("Unexpected EOF in chunk data") - - result.extend(chunk) - self.chunk_remaining -= len(chunk) - - if self.chunk_remaining == 0: - crlf = self._read_exact(2) - if crlf != b"\r\n": - raise IOError("Malformed chunk: missing CRLF") - else: - line = self._read_line() - if not line: - self.finished = True - return bytes(result) - - try: - line_str = line.decode("ascii").strip() - if ";" in line_str: - line_str = line_str.split(";")[0] - chunk_size = int(line_str, 16) - except ValueError: - raise IOError(f"Invalid chunk size: {line}") - - if chunk_size == 0: - self.finished = True - while True: - trailer = self._read_line() - if trailer == b"\r\n" or not trailer: - break - return bytes(result) - - self.chunk_remaining = chunk_size - - return bytes(result) - - -def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response: - principal, error = _object_principal("write", bucket_name, object_key) - if error: - return error - - metadata = _extract_request_metadata() - content_type = request.headers.get("Content-Type") - metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream" - try: - upload_id = _storage().initiate_multipart_upload( - bucket_name, - object_key, - metadata=metadata or None - ) - except StorageError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - - root = Element("InitiateMultipartUploadResult") - SubElement(root, "Bucket").text = bucket_name - SubElement(root, "Key").text = object_key - SubElement(root, "UploadId").text = upload_id - return _xml_response(root) - - -def _upload_part(bucket_name: str, object_key: str) -> Response: - copy_source = request.headers.get("x-amz-copy-source") - if copy_source: - return _upload_part_copy(bucket_name, object_key, copy_source) - - principal, error = _object_principal("write", bucket_name, object_key) - if error: - return error - - upload_id = request.args.get("uploadId") - part_number_str = request.args.get("partNumber") - if not upload_id or not part_number_str: - return _error_response("InvalidArgument", "uploadId and partNumber are required", 400) - - try: - part_number = int(part_number_str) - except ValueError: - return _error_response("InvalidArgument", "partNumber must be an integer", 400) - - if part_number < 1 or part_number > 10000: - return _error_response("InvalidArgument", "partNumber must be between 1 and 10000", 400) - - stream = request.stream - content_encoding = request.headers.get("Content-Encoding", "").lower() - if "aws-chunked" in content_encoding: - stream = AwsChunkedDecoder(stream) - - try: - etag = _storage().upload_multipart_part(bucket_name, upload_id, part_number, stream) - except StorageError as exc: - if "NoSuchBucket" in str(exc): - return _error_response("NoSuchBucket", str(exc), 404) - if "Multipart upload not found" in str(exc): - return _error_response("NoSuchUpload", str(exc), 404) - return _error_response("InvalidArgument", str(exc), 400) - - content_md5 = request.headers.get("Content-MD5") - if content_md5 and etag: - try: - expected_md5 = base64.b64decode(content_md5).hex() - except Exception: - return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400) - if expected_md5 != etag: - return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400) - - response = Response(status=200) - response.headers["ETag"] = f'"{etag}"' - return response - - -def _upload_part_copy(bucket_name: str, object_key: str, copy_source: str) -> Response: - principal, error = _object_principal("write", bucket_name, object_key) - if error: - return error - - upload_id = request.args.get("uploadId") - part_number_str = request.args.get("partNumber") - if not upload_id or not part_number_str: - return _error_response("InvalidArgument", "uploadId and partNumber are required", 400) - - try: - part_number = int(part_number_str) - except ValueError: - return _error_response("InvalidArgument", "partNumber must be an integer", 400) - - if part_number < 1 or part_number > 10000: - return _error_response("InvalidArgument", "partNumber must be between 1 and 10000", 400) - - copy_source = unquote(copy_source) - if copy_source.startswith("/"): - copy_source = copy_source[1:] - parts = copy_source.split("/", 1) - if len(parts) != 2: - return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400) - source_bucket, source_key = parts - if not source_bucket or not source_key: - return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400) - - _, read_error = _object_principal("read", source_bucket, source_key) - if read_error: - return read_error - - copy_source_range = request.headers.get("x-amz-copy-source-range") - start_byte, end_byte = None, None - if copy_source_range: - match = re.match(r"bytes=(\d+)-(\d+)", copy_source_range) - if not match: - return _error_response("InvalidArgument", "Invalid x-amz-copy-source-range format", 400) - start_byte, end_byte = int(match.group(1)), int(match.group(2)) - - try: - result = _storage().upload_part_copy( - bucket_name, upload_id, part_number, - source_bucket, source_key, - start_byte, end_byte - ) - except ObjectNotFoundError: - return _error_response("NoSuchKey", "Source object not found", 404) - except StorageError as exc: - if "Multipart upload not found" in str(exc): - return _error_response("NoSuchUpload", str(exc), 404) - if "Invalid byte range" in str(exc): - return _error_response("InvalidRange", str(exc), 416) - return _error_response("InvalidArgument", str(exc), 400) - - root = Element("CopyPartResult") - SubElement(root, "LastModified").text = result["last_modified"].strftime("%Y-%m-%dT%H:%M:%S.000Z") - SubElement(root, "ETag").text = f'"{result["etag"]}"' - return _xml_response(root) - - -def _complete_multipart_upload(bucket_name: str, object_key: str) -> Response: - principal, error = _object_principal("write", bucket_name, object_key) - if error: - return error - - upload_id = request.args.get("uploadId") - if not upload_id: - return _error_response("InvalidArgument", "uploadId is required", 400) - - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - - if _strip_ns(root.tag) != "CompleteMultipartUpload": - return _error_response("MalformedXML", "Root element must be CompleteMultipartUpload", 400) - - parts = [] - for part_el in list(root): - if _strip_ns(part_el.tag) != "Part": - continue - part_number_el = part_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}PartNumber") - if part_number_el is None: - part_number_el = part_el.find("PartNumber") - - etag_el = part_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}ETag") - if etag_el is None: - etag_el = part_el.find("ETag") - - if part_number_el is not None and etag_el is not None: - try: - part_num = int(part_number_el.text or 0) - except ValueError: - return _error_response("InvalidArgument", "PartNumber must be an integer", 400) - if part_num < 1 or part_num > 10000: - return _error_response("InvalidArgument", f"PartNumber {part_num} must be between 1 and 10000", 400) - parts.append({ - "PartNumber": part_num, - "ETag": (etag_el.text or "").strip('"') - }) - - try: - meta = _storage().complete_multipart_upload(bucket_name, upload_id, parts) - except QuotaExceededError as exc: - return _error_response("QuotaExceeded", str(exc), 403) - except StorageError as exc: - if "NoSuchBucket" in str(exc): - return _error_response("NoSuchBucket", str(exc), 404) - if "Multipart upload not found" in str(exc): - return _error_response("NoSuchUpload", str(exc), 404) - return _error_response("InvalidPart", str(exc), 400) - - user_agent = request.headers.get("User-Agent", "") - if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent: - _replication_manager().trigger_replication(bucket_name, object_key, action="write") - - root = Element("CompleteMultipartUploadResult") - location = f"{request.host_url}{bucket_name}/{object_key}" - SubElement(root, "Location").text = location - SubElement(root, "Bucket").text = bucket_name - SubElement(root, "Key").text = object_key - if meta.etag: - SubElement(root, "ETag").text = f'"{meta.etag}"' - - return _xml_response(root) - - -def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response: - principal, error = _object_principal("delete", bucket_name, object_key) - if error: - return error - - upload_id = request.args.get("uploadId") - if not upload_id: - return _error_response("InvalidArgument", "uploadId is required", 400) - - try: - _storage().abort_multipart_upload(bucket_name, upload_id) - except BucketNotFoundError as exc: - return _error_response("NoSuchBucket", str(exc), 404) - except StorageError as exc: - current_app.logger.warning(f"Error aborting multipart upload: {exc}") - - return Response(status=204) - - -def _select_object_content(bucket_name: str, object_key: str) -> Response: - _, error = _object_principal("read", bucket_name, object_key) - if error: - return error - ct_error = _require_xml_content_type() - if ct_error: - return ct_error - payload = request.get_data(cache=False) or b"" - try: - root = _parse_xml_with_limit(payload) - except ParseError: - return _error_response("MalformedXML", "Unable to parse XML document", 400) - if _strip_ns(root.tag) != "SelectObjectContentRequest": - return _error_response("MalformedXML", "Root element must be SelectObjectContentRequest", 400) - expression_el = _find_element(root, "Expression") - if expression_el is None or not expression_el.text: - return _error_response("InvalidRequest", "Expression is required", 400) - expression = expression_el.text - expression_type_el = _find_element(root, "ExpressionType") - expression_type = expression_type_el.text if expression_type_el is not None and expression_type_el.text else "SQL" - if expression_type.upper() != "SQL": - return _error_response("InvalidRequest", "Only SQL expression type is supported", 400) - input_el = _find_element(root, "InputSerialization") - if input_el is None: - return _error_response("InvalidRequest", "InputSerialization is required", 400) - try: - input_format, input_config = _parse_select_input_serialization(input_el) - except ValueError as exc: - return _error_response("InvalidRequest", str(exc), 400) - output_el = _find_element(root, "OutputSerialization") - if output_el is None: - return _error_response("InvalidRequest", "OutputSerialization is required", 400) - try: - output_format, output_config = _parse_select_output_serialization(output_el) - except ValueError as exc: - return _error_response("InvalidRequest", str(exc), 400) - storage = _storage() - try: - path = storage.get_object_path(bucket_name, object_key) - except ObjectNotFoundError: - return _error_response("NoSuchKey", "Object not found", 404) - except StorageError: - return _error_response("NoSuchKey", "Object not found", 404) - from .select_content import execute_select_query, SelectError - try: - result_stream = execute_select_query( - file_path=path, - expression=expression, - input_format=input_format, - input_config=input_config, - output_format=output_format, - output_config=output_config, - ) - except SelectError as exc: - return _error_response("InvalidRequest", str(exc), 400) - - def generate_events(): - bytes_scanned = 0 - bytes_returned = 0 - for chunk in result_stream: - bytes_returned += len(chunk) - yield _encode_select_event("Records", chunk) - stats_payload = _build_stats_xml(bytes_scanned, bytes_returned) - yield _encode_select_event("Stats", stats_payload) - yield _encode_select_event("End", b"") - - return Response(generate_events(), mimetype="application/octet-stream", headers={"x-amz-request-charged": "requester"}) - - -def _parse_select_input_serialization(el: Element) -> tuple: - csv_el = _find_element(el, "CSV") - if csv_el is not None: - file_header_el = _find_element(csv_el, "FileHeaderInfo") - config = { - "file_header_info": file_header_el.text.upper() if file_header_el is not None and file_header_el.text else "NONE", - "comments": _find_element_text(csv_el, "Comments", "#"), - "field_delimiter": _find_element_text(csv_el, "FieldDelimiter", ","), - "record_delimiter": _find_element_text(csv_el, "RecordDelimiter", "\n"), - "quote_character": _find_element_text(csv_el, "QuoteCharacter", '"'), - "quote_escape_character": _find_element_text(csv_el, "QuoteEscapeCharacter", '"'), - } - return "CSV", config - json_el = _find_element(el, "JSON") - if json_el is not None: - type_el = _find_element(json_el, "Type") - config = { - "type": type_el.text.upper() if type_el is not None and type_el.text else "DOCUMENT", - } - return "JSON", config - parquet_el = _find_element(el, "Parquet") - if parquet_el is not None: - return "Parquet", {} - raise ValueError("InputSerialization must specify CSV, JSON, or Parquet") - - -def _parse_select_output_serialization(el: Element) -> tuple: - csv_el = _find_element(el, "CSV") - if csv_el is not None: - config = { - "field_delimiter": _find_element_text(csv_el, "FieldDelimiter", ","), - "record_delimiter": _find_element_text(csv_el, "RecordDelimiter", "\n"), - "quote_character": _find_element_text(csv_el, "QuoteCharacter", '"'), - "quote_fields": _find_element_text(csv_el, "QuoteFields", "ASNEEDED").upper(), - } - return "CSV", config - json_el = _find_element(el, "JSON") - if json_el is not None: - config = { - "record_delimiter": _find_element_text(json_el, "RecordDelimiter", "\n"), - } - return "JSON", config - raise ValueError("OutputSerialization must specify CSV or JSON") - - -def _encode_select_event(event_type: str, payload: bytes) -> bytes: - import struct - import binascii - headers = _build_event_headers(event_type) - headers_length = len(headers) - total_length = 4 + 4 + 4 + headers_length + len(payload) + 4 - prelude = struct.pack(">I", total_length) + struct.pack(">I", headers_length) - prelude_crc = binascii.crc32(prelude) & 0xffffffff - prelude += struct.pack(">I", prelude_crc) - message = prelude + headers + payload - message_crc = binascii.crc32(message) & 0xffffffff - message += struct.pack(">I", message_crc) - return message - - -def _build_event_headers(event_type: str) -> bytes: - headers = b"" - headers += _encode_select_header(":event-type", event_type) - if event_type == "Records": - headers += _encode_select_header(":content-type", "application/octet-stream") - elif event_type == "Stats": - headers += _encode_select_header(":content-type", "text/xml") - headers += _encode_select_header(":message-type", "event") - return headers - - -def _encode_select_header(name: str, value: str) -> bytes: - import struct - name_bytes = name.encode("utf-8") - value_bytes = value.encode("utf-8") - header = struct.pack("B", len(name_bytes)) + name_bytes - header += struct.pack("B", 7) - header += struct.pack(">H", len(value_bytes)) + value_bytes - return header - - -def _build_stats_xml(bytes_scanned: int, bytes_returned: int) -> bytes: - stats = Element("Stats") - SubElement(stats, "BytesScanned").text = str(bytes_scanned) - SubElement(stats, "BytesProcessed").text = str(bytes_scanned) - SubElement(stats, "BytesReturned").text = str(bytes_returned) - return tostring(stats, encoding="utf-8") - - -@s3_api_bp.before_request -def resolve_principal(): - g.principal = None - try: - if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \ - (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"): - g.principal = _verify_sigv4(request) - return - except IamError as exc: - logger.debug(f"SigV4 authentication failed: {exc}") - except (ValueError, KeyError) as exc: - logger.debug(f"SigV4 parsing error: {exc}") - - access_key = request.headers.get("X-Access-Key") - secret_key = request.headers.get("X-Secret-Key") - if access_key and secret_key: - try: - g.principal = _iam().authenticate(access_key, secret_key) - except IamError as exc: - logger.debug(f"Header authentication failed: {exc}") diff --git a/app/s3_client.py b/app/s3_client.py deleted file mode 100644 index 916cd2a..0000000 --- a/app/s3_client.py +++ /dev/null @@ -1,296 +0,0 @@ -from __future__ import annotations - -import json -import logging -import threading -import time -from typing import Any, Generator, Optional - -import boto3 -from botocore.config import Config -from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError -from flask import current_app, session - -logger = logging.getLogger(__name__) - -UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0" - -_BOTO_ERROR_MAP = { - "NoSuchBucket": 404, - "NoSuchKey": 404, - "NoSuchUpload": 404, - "BucketAlreadyExists": 409, - "BucketAlreadyOwnedByYou": 409, - "BucketNotEmpty": 409, - "AccessDenied": 403, - "InvalidAccessKeyId": 403, - "SignatureDoesNotMatch": 403, - "InvalidBucketName": 400, - "InvalidArgument": 400, - "MalformedXML": 400, - "EntityTooLarge": 400, - "QuotaExceeded": 403, -} - -_UPLOAD_REGISTRY_MAX_AGE = 86400 -_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600 - - -class UploadRegistry: - def __init__(self) -> None: - self._entries: dict[str, tuple[str, str, float]] = {} - self._lock = threading.Lock() - self._last_cleanup = time.monotonic() - - def register(self, upload_id: str, bucket_name: str, object_key: str) -> None: - with self._lock: - self._entries[upload_id] = (bucket_name, object_key, time.monotonic()) - self._maybe_cleanup() - - def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]: - with self._lock: - entry = self._entries.get(upload_id) - if entry is None: - return None - stored_bucket, key, created_at = entry - if stored_bucket != bucket_name: - return None - if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE: - del self._entries[upload_id] - return None - return key - - def remove(self, upload_id: str) -> None: - with self._lock: - self._entries.pop(upload_id, None) - - def _maybe_cleanup(self) -> None: - now = time.monotonic() - if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL: - return - self._last_cleanup = now - cutoff = now - _UPLOAD_REGISTRY_MAX_AGE - stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff] - for uid in stale: - del self._entries[uid] - - -class S3ProxyClient: - def __init__(self, api_base_url: str, region: str = "us-east-1") -> None: - if not api_base_url: - raise ValueError("api_base_url is required for S3ProxyClient") - self._api_base_url = api_base_url.rstrip("/") - self._region = region - self.upload_registry = UploadRegistry() - - @property - def api_base_url(self) -> str: - return self._api_base_url - - def get_client(self, access_key: str, secret_key: str) -> Any: - if not access_key or not secret_key: - raise ValueError("Both access_key and secret_key are required") - config = Config( - user_agent_extra=UI_PROXY_USER_AGENT, - connect_timeout=5, - read_timeout=30, - retries={"max_attempts": 0}, - signature_version="s3v4", - s3={"addressing_style": "path"}, - request_checksum_calculation="when_required", - response_checksum_validation="when_required", - ) - return boto3.client( - "s3", - endpoint_url=self._api_base_url, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - region_name=self._region, - config=config, - ) - - -def _get_proxy() -> S3ProxyClient: - proxy = current_app.extensions.get("s3_proxy") - if proxy is None: - raise RuntimeError( - "S3 proxy not configured. Set API_BASE_URL or run both API and UI servers." - ) - return proxy - - -def _get_session_creds() -> tuple[str, str]: - secret_store = current_app.extensions["secret_store"] - secret_store.purge_expired() - token = session.get("cred_token") - if not token: - raise PermissionError("Not authenticated") - creds = secret_store.peek(token) - if not creds: - raise PermissionError("Session expired") - access_key = creds.get("access_key", "") - secret_key = creds.get("secret_key", "") - if not access_key or not secret_key: - raise PermissionError("Invalid session credentials") - return access_key, secret_key - - -def get_session_s3_client() -> Any: - proxy = _get_proxy() - access_key, secret_key = _get_session_creds() - return proxy.get_client(access_key, secret_key) - - -def get_upload_registry() -> UploadRegistry: - return _get_proxy().upload_registry - - -def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]: - error_info = exc.response.get("Error", {}) - code = error_info.get("Code", "InternalError") - message = error_info.get("Message") or "S3 operation failed" - http_status = _BOTO_ERROR_MAP.get(code) - if http_status is None: - http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500) - return {"error": message}, http_status - - -def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]: - logger.error("S3 API connection failed: %s", exc) - return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502 - - -def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str: - from .ui import _format_datetime_display - return _format_datetime_display(dt, display_tz) - - -def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str: - from .ui import _format_datetime_iso - return _format_datetime_iso(dt, display_tz) - - -def build_url_templates(bucket_name: str) -> dict[str, str]: - from flask import url_for - preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - restore_t = url_for( - "ui.restore_object_version", - bucket_name=bucket_name, - object_key="KEY_PLACEHOLDER", - version_id="VERSION_ID_PLACEHOLDER", - ) - tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER") - return { - "preview": preview_t, - "download": preview_t + "?download=1", - "presign": presign_t, - "delete": delete_t, - "versions": versions_t, - "restore": restore_t, - "tags": tags_t, - "copy": copy_t, - "move": move_t, - "metadata": metadata_t, - } - - -def translate_list_objects( - boto3_response: dict[str, Any], - url_templates: dict[str, str], - display_tz: str = "UTC", - versioning_enabled: bool = False, -) -> dict[str, Any]: - objects_data = [] - for obj in boto3_response.get("Contents", []): - last_mod = obj["LastModified"] - objects_data.append({ - "key": obj["Key"], - "size": obj["Size"], - "last_modified": last_mod.isoformat(), - "last_modified_display": format_datetime_display(last_mod, display_tz), - "last_modified_iso": format_datetime_iso(last_mod, display_tz), - "etag": obj.get("ETag", "").strip('"'), - }) - return { - "objects": objects_data, - "is_truncated": boto3_response.get("IsTruncated", False), - "next_continuation_token": boto3_response.get("NextContinuationToken"), - "total_count": boto3_response.get("KeyCount", len(objects_data)), - "versioning_enabled": versioning_enabled, - "url_templates": url_templates, - } - - -def get_versioning_via_s3(client: Any, bucket_name: str) -> bool: - try: - resp = client.get_bucket_versioning(Bucket=bucket_name) - return resp.get("Status") == "Enabled" - except ClientError as exc: - code = exc.response.get("Error", {}).get("Code", "") - if code != "NoSuchBucket": - logger.warning("Failed to check versioning for %s: %s", bucket_name, code) - return False - - -def stream_objects_ndjson( - client: Any, - bucket_name: str, - prefix: Optional[str], - url_templates: dict[str, str], - display_tz: str = "UTC", - versioning_enabled: bool = False, - delimiter: Optional[str] = None, -) -> Generator[str, None, None]: - meta_line = json.dumps({ - "type": "meta", - "versioning_enabled": versioning_enabled, - "url_templates": url_templates, - }) + "\n" - yield meta_line - - yield json.dumps({"type": "count", "total_count": 0}) + "\n" - - kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000} - if prefix: - kwargs["Prefix"] = prefix - if delimiter: - kwargs["Delimiter"] = delimiter - - running_count = 0 - try: - paginator = client.get_paginator("list_objects_v2") - for page in paginator.paginate(**kwargs): - for cp in page.get("CommonPrefixes", []): - yield json.dumps({ - "type": "folder", - "prefix": cp["Prefix"], - }) + "\n" - page_contents = page.get("Contents", []) - for obj in page_contents: - last_mod = obj["LastModified"] - yield json.dumps({ - "type": "object", - "key": obj["Key"], - "size": obj["Size"], - "last_modified": last_mod.isoformat(), - "last_modified_display": format_datetime_display(last_mod, display_tz), - "last_modified_iso": format_datetime_iso(last_mod, display_tz), - "etag": obj.get("ETag", "").strip('"'), - }) + "\n" - running_count += len(page_contents) - yield json.dumps({"type": "count", "total_count": running_count}) + "\n" - except ClientError as exc: - error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed") - yield json.dumps({"type": "error", "error": error_msg}) + "\n" - return - except (EndpointConnectionError, ConnectionClosedError): - yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n" - return - - yield json.dumps({"type": "done"}) + "\n" diff --git a/app/secret_store.py b/app/secret_store.py deleted file mode 100644 index 903c2fa..0000000 --- a/app/secret_store.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import annotations - -import secrets -import time -from typing import Any, Dict, Optional - - -class EphemeralSecretStore: - """Keeps values in-memory for a short period and returns them once.""" - - def __init__(self, default_ttl: int = 300) -> None: - self._default_ttl = max(default_ttl, 1) - self._store: Dict[str, tuple[Any, float]] = {} - - def remember(self, payload: Any, *, ttl: Optional[int] = None) -> str: - token = secrets.token_urlsafe(16) - expires_at = time.time() + (ttl or self._default_ttl) - self._store[token] = (payload, expires_at) - return token - - def peek(self, token: str | None) -> Any | None: - if not token: - return None - entry = self._store.get(token) - if not entry: - return None - payload, expires_at = entry - if expires_at < time.time(): - self._store.pop(token, None) - return None - return payload - - def pop(self, token: str | None) -> Any | None: - if not token: - return None - entry = self._store.pop(token, None) - if not entry: - return None - payload, expires_at = entry - if expires_at < time.time(): - return None - return payload - - def purge_expired(self) -> None: - now = time.time() - stale = [token for token, (_, expires_at) in self._store.items() if expires_at < now] - for token in stale: - self._store.pop(token, None) diff --git a/app/select_content.py b/app/select_content.py deleted file mode 100644 index 57a3362..0000000 --- a/app/select_content.py +++ /dev/null @@ -1,171 +0,0 @@ -"""S3 SelectObjectContent SQL query execution using DuckDB.""" -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any, Dict, Generator, Optional - -try: - import duckdb - DUCKDB_AVAILABLE = True -except ImportError: - DUCKDB_AVAILABLE = False - - -class SelectError(Exception): - """Error during SELECT query execution.""" - pass - - -def execute_select_query( - file_path: Path, - expression: str, - input_format: str, - input_config: Dict[str, Any], - output_format: str, - output_config: Dict[str, Any], - chunk_size: int = 65536, -) -> Generator[bytes, None, None]: - """Execute SQL query on object content.""" - if not DUCKDB_AVAILABLE: - raise SelectError("DuckDB is not installed. Install with: pip install duckdb") - - conn = duckdb.connect(":memory:") - - try: - if input_format == "CSV": - _load_csv(conn, file_path, input_config) - elif input_format == "JSON": - _load_json(conn, file_path, input_config) - elif input_format == "Parquet": - _load_parquet(conn, file_path) - else: - raise SelectError(f"Unsupported input format: {input_format}") - - normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data") - - try: - result = conn.execute(normalized_expression) - except duckdb.Error as exc: - raise SelectError(f"SQL execution error: {exc}") - - if output_format == "CSV": - yield from _output_csv(result, output_config, chunk_size) - elif output_format == "JSON": - yield from _output_json(result, output_config, chunk_size) - else: - raise SelectError(f"Unsupported output format: {output_format}") - - finally: - conn.close() - - -def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None: - """Load CSV file into DuckDB.""" - file_header_info = config.get("file_header_info", "NONE") - delimiter = config.get("field_delimiter", ",") - quote = config.get("quote_character", '"') - - header = file_header_info in ("USE", "IGNORE") - path_str = str(file_path).replace("\\", "/") - - conn.execute(f""" - CREATE TABLE data AS - SELECT * FROM read_csv('{path_str}', - header={header}, - delim='{delimiter}', - quote='{quote}' - ) - """) - - -def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None: - """Load JSON file into DuckDB.""" - json_type = config.get("type", "DOCUMENT") - path_str = str(file_path).replace("\\", "/") - - if json_type == "LINES": - conn.execute(f""" - CREATE TABLE data AS - SELECT * FROM read_json_auto('{path_str}', format='newline_delimited') - """) - else: - conn.execute(f""" - CREATE TABLE data AS - SELECT * FROM read_json_auto('{path_str}', format='array') - """) - - -def _load_parquet(conn, file_path: Path) -> None: - """Load Parquet file into DuckDB.""" - path_str = str(file_path).replace("\\", "/") - conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')") - - -def _output_csv( - result, - config: Dict[str, Any], - chunk_size: int, -) -> Generator[bytes, None, None]: - """Output query results as CSV.""" - delimiter = config.get("field_delimiter", ",") - record_delimiter = config.get("record_delimiter", "\n") - quote = config.get("quote_character", '"') - - buffer = "" - - while True: - rows = result.fetchmany(1000) - if not rows: - break - - for row in rows: - fields = [] - for value in row: - if value is None: - fields.append("") - elif isinstance(value, str): - if delimiter in value or quote in value or record_delimiter in value: - escaped = value.replace(quote, quote + quote) - fields.append(f'{quote}{escaped}{quote}') - else: - fields.append(value) - else: - fields.append(str(value)) - - buffer += delimiter.join(fields) + record_delimiter - - while len(buffer) >= chunk_size: - yield buffer[:chunk_size].encode("utf-8") - buffer = buffer[chunk_size:] - - if buffer: - yield buffer.encode("utf-8") - - -def _output_json( - result, - config: Dict[str, Any], - chunk_size: int, -) -> Generator[bytes, None, None]: - """Output query results as JSON Lines.""" - record_delimiter = config.get("record_delimiter", "\n") - columns = [desc[0] for desc in result.description] - - buffer = "" - - while True: - rows = result.fetchmany(1000) - if not rows: - break - - for row in rows: - record = dict(zip(columns, row)) - buffer += json.dumps(record, default=str) + record_delimiter - - while len(buffer) >= chunk_size: - yield buffer[:chunk_size].encode("utf-8") - buffer = buffer[chunk_size:] - - if buffer: - yield buffer.encode("utf-8") diff --git a/app/site_registry.py b/app/site_registry.py deleted file mode 100644 index b257326..0000000 --- a/app/site_registry.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import annotations - -import json -import time -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional - - -@dataclass -class SiteInfo: - site_id: str - endpoint: str - region: str = "us-east-1" - priority: int = 100 - display_name: str = "" - created_at: Optional[float] = None - updated_at: Optional[float] = None - - def __post_init__(self) -> None: - if not self.display_name: - self.display_name = self.site_id - if self.created_at is None: - self.created_at = time.time() - - def to_dict(self) -> Dict[str, Any]: - return { - "site_id": self.site_id, - "endpoint": self.endpoint, - "region": self.region, - "priority": self.priority, - "display_name": self.display_name, - "created_at": self.created_at, - "updated_at": self.updated_at, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> SiteInfo: - return cls( - site_id=data["site_id"], - endpoint=data.get("endpoint", ""), - region=data.get("region", "us-east-1"), - priority=data.get("priority", 100), - display_name=data.get("display_name", ""), - created_at=data.get("created_at"), - updated_at=data.get("updated_at"), - ) - - -@dataclass -class PeerSite: - site_id: str - endpoint: str - region: str = "us-east-1" - priority: int = 100 - display_name: str = "" - created_at: Optional[float] = None - updated_at: Optional[float] = None - connection_id: Optional[str] = None - is_healthy: Optional[bool] = None - last_health_check: Optional[float] = None - - def __post_init__(self) -> None: - if not self.display_name: - self.display_name = self.site_id - if self.created_at is None: - self.created_at = time.time() - - def to_dict(self) -> Dict[str, Any]: - return { - "site_id": self.site_id, - "endpoint": self.endpoint, - "region": self.region, - "priority": self.priority, - "display_name": self.display_name, - "created_at": self.created_at, - "updated_at": self.updated_at, - "connection_id": self.connection_id, - "is_healthy": self.is_healthy, - "last_health_check": self.last_health_check, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> PeerSite: - return cls( - site_id=data["site_id"], - endpoint=data.get("endpoint", ""), - region=data.get("region", "us-east-1"), - priority=data.get("priority", 100), - display_name=data.get("display_name", ""), - created_at=data.get("created_at"), - updated_at=data.get("updated_at"), - connection_id=data.get("connection_id"), - is_healthy=data.get("is_healthy"), - last_health_check=data.get("last_health_check"), - ) - - -class SiteRegistry: - def __init__(self, config_path: Path) -> None: - self.config_path = config_path - self._local_site: Optional[SiteInfo] = None - self._peers: Dict[str, PeerSite] = {} - self.reload() - - def reload(self) -> None: - if not self.config_path.exists(): - self._local_site = None - self._peers = {} - return - - try: - with open(self.config_path, "r", encoding="utf-8") as f: - data = json.load(f) - - if data.get("local"): - self._local_site = SiteInfo.from_dict(data["local"]) - else: - self._local_site = None - - self._peers = {} - for peer_data in data.get("peers", []): - peer = PeerSite.from_dict(peer_data) - self._peers[peer.site_id] = peer - - except (OSError, json.JSONDecodeError, KeyError): - self._local_site = None - self._peers = {} - - def save(self) -> None: - self.config_path.parent.mkdir(parents=True, exist_ok=True) - data = { - "local": self._local_site.to_dict() if self._local_site else None, - "peers": [peer.to_dict() for peer in self._peers.values()], - } - with open(self.config_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - - def get_local_site(self) -> Optional[SiteInfo]: - return self._local_site - - def set_local_site(self, site: SiteInfo) -> None: - site.updated_at = time.time() - self._local_site = site - self.save() - - def list_peers(self) -> List[PeerSite]: - return list(self._peers.values()) - - def get_peer(self, site_id: str) -> Optional[PeerSite]: - return self._peers.get(site_id) - - def add_peer(self, peer: PeerSite) -> None: - peer.created_at = peer.created_at or time.time() - self._peers[peer.site_id] = peer - self.save() - - def update_peer(self, peer: PeerSite) -> None: - if peer.site_id not in self._peers: - raise ValueError(f"Peer {peer.site_id} not found") - peer.updated_at = time.time() - self._peers[peer.site_id] = peer - self.save() - - def delete_peer(self, site_id: str) -> bool: - if site_id in self._peers: - del self._peers[site_id] - self.save() - return True - return False - - def update_health(self, site_id: str, is_healthy: bool) -> None: - peer = self._peers.get(site_id) - if peer: - peer.is_healthy = is_healthy - peer.last_health_check = time.time() - self.save() diff --git a/app/site_sync.py b/app/site_sync.py deleted file mode 100644 index 57cf185..0000000 --- a/app/site_sync.py +++ /dev/null @@ -1,416 +0,0 @@ -from __future__ import annotations - -import json -import logging -import tempfile -import threading -import time -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -import boto3 -from botocore.config import Config -from botocore.exceptions import ClientError - -if TYPE_CHECKING: - from .connections import ConnectionStore, RemoteConnection - from .replication import ReplicationManager, ReplicationRule - from .storage import ObjectStorage - -logger = logging.getLogger(__name__) - -SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0" - - -@dataclass -class SyncedObjectInfo: - last_synced_at: float - remote_etag: str - source: str - - def to_dict(self) -> Dict[str, Any]: - return { - "last_synced_at": self.last_synced_at, - "remote_etag": self.remote_etag, - "source": self.source, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo": - return cls( - last_synced_at=data["last_synced_at"], - remote_etag=data["remote_etag"], - source=data["source"], - ) - - -@dataclass -class SyncState: - synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict) - last_full_sync: Optional[float] = None - - def to_dict(self) -> Dict[str, Any]: - return { - "synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()}, - "last_full_sync": self.last_full_sync, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "SyncState": - synced_objects = {} - for k, v in data.get("synced_objects", {}).items(): - synced_objects[k] = SyncedObjectInfo.from_dict(v) - return cls( - synced_objects=synced_objects, - last_full_sync=data.get("last_full_sync"), - ) - - -@dataclass -class SiteSyncStats: - last_sync_at: Optional[float] = None - objects_pulled: int = 0 - objects_skipped: int = 0 - conflicts_resolved: int = 0 - deletions_applied: int = 0 - errors: int = 0 - - def to_dict(self) -> Dict[str, Any]: - return { - "last_sync_at": self.last_sync_at, - "objects_pulled": self.objects_pulled, - "objects_skipped": self.objects_skipped, - "conflicts_resolved": self.conflicts_resolved, - "deletions_applied": self.deletions_applied, - "errors": self.errors, - } - - -@dataclass -class RemoteObjectMeta: - key: str - size: int - last_modified: datetime - etag: str - - @classmethod - def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta": - return cls( - key=obj["Key"], - size=obj.get("Size", 0), - last_modified=obj["LastModified"], - etag=obj.get("ETag", "").strip('"'), - ) - - -def _create_sync_client( - connection: "RemoteConnection", - *, - connect_timeout: int = 10, - read_timeout: int = 120, - max_retries: int = 2, -) -> Any: - config = Config( - user_agent_extra=SITE_SYNC_USER_AGENT, - connect_timeout=connect_timeout, - read_timeout=read_timeout, - retries={"max_attempts": max_retries}, - signature_version="s3v4", - s3={"addressing_style": "path"}, - request_checksum_calculation="when_required", - response_checksum_validation="when_required", - ) - return boto3.client( - "s3", - endpoint_url=connection.endpoint_url, - aws_access_key_id=connection.access_key, - aws_secret_access_key=connection.secret_key, - region_name=connection.region or "us-east-1", - config=config, - ) - - -class SiteSyncWorker: - def __init__( - self, - storage: "ObjectStorage", - connections: "ConnectionStore", - replication_manager: "ReplicationManager", - storage_root: Path, - interval_seconds: int = 60, - batch_size: int = 100, - connect_timeout: int = 10, - read_timeout: int = 120, - max_retries: int = 2, - clock_skew_tolerance_seconds: float = 1.0, - ): - self.storage = storage - self.connections = connections - self.replication_manager = replication_manager - self.storage_root = storage_root - self.interval_seconds = interval_seconds - self.batch_size = batch_size - self.connect_timeout = connect_timeout - self.read_timeout = read_timeout - self.max_retries = max_retries - self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds - self._lock = threading.Lock() - self._shutdown = threading.Event() - self._sync_thread: Optional[threading.Thread] = None - self._bucket_stats: Dict[str, SiteSyncStats] = {} - - def _create_client(self, connection: "RemoteConnection") -> Any: - """Create an S3 client with the worker's configured timeouts.""" - return _create_sync_client( - connection, - connect_timeout=self.connect_timeout, - read_timeout=self.read_timeout, - max_retries=self.max_retries, - ) - - def start(self) -> None: - if self._sync_thread is not None and self._sync_thread.is_alive(): - return - self._shutdown.clear() - self._sync_thread = threading.Thread( - target=self._sync_loop, name="site-sync-worker", daemon=True - ) - self._sync_thread.start() - logger.info("Site sync worker started (interval=%ds)", self.interval_seconds) - - def shutdown(self) -> None: - self._shutdown.set() - if self._sync_thread is not None: - self._sync_thread.join(timeout=10.0) - logger.info("Site sync worker shut down") - - def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]: - from .replication import REPLICATION_MODE_BIDIRECTIONAL - rule = self.replication_manager.get_rule(bucket_name) - if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled: - return None - return self._sync_bucket(rule) - - def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]: - with self._lock: - return self._bucket_stats.get(bucket_name) - - def _sync_loop(self) -> None: - while not self._shutdown.is_set(): - self._shutdown.wait(timeout=self.interval_seconds) - if self._shutdown.is_set(): - break - self._run_sync_cycle() - - def _run_sync_cycle(self) -> None: - from .replication import REPLICATION_MODE_BIDIRECTIONAL - for bucket_name, rule in list(self.replication_manager._rules.items()): - if self._shutdown.is_set(): - break - if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled: - continue - try: - stats = self._sync_bucket(rule) - with self._lock: - self._bucket_stats[bucket_name] = stats - except Exception as e: - logger.exception("Site sync failed for bucket %s: %s", bucket_name, e) - - def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats: - stats = SiteSyncStats() - connection = self.connections.get(rule.target_connection_id) - if not connection: - logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name) - stats.errors += 1 - return stats - - try: - local_objects = self._list_local_objects(rule.bucket_name) - except Exception as e: - logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e) - stats.errors += 1 - return stats - - try: - remote_objects = self._list_remote_objects(rule, connection) - except Exception as e: - logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e) - stats.errors += 1 - return stats - - sync_state = self._load_sync_state(rule.bucket_name) - local_keys = set(local_objects.keys()) - remote_keys = set(remote_objects.keys()) - - to_pull = [] - for key in remote_keys: - remote_meta = remote_objects[key] - local_meta = local_objects.get(key) - if local_meta is None: - to_pull.append(key) - else: - resolution = self._resolve_conflict(local_meta, remote_meta) - if resolution == "pull": - to_pull.append(key) - stats.conflicts_resolved += 1 - else: - stats.objects_skipped += 1 - - pulled_count = 0 - for key in to_pull: - if self._shutdown.is_set(): - break - if pulled_count >= self.batch_size: - break - remote_meta = remote_objects[key] - success = self._pull_object(rule, key, connection, remote_meta) - if success: - stats.objects_pulled += 1 - pulled_count += 1 - sync_state.synced_objects[key] = SyncedObjectInfo( - last_synced_at=time.time(), - remote_etag=remote_meta.etag, - source="remote", - ) - else: - stats.errors += 1 - - if rule.sync_deletions: - for key in list(sync_state.synced_objects.keys()): - if key not in remote_keys and key in local_keys: - tracked = sync_state.synced_objects[key] - if tracked.source == "remote": - local_meta = local_objects.get(key) - if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at: - success = self._apply_remote_deletion(rule.bucket_name, key) - if success: - stats.deletions_applied += 1 - del sync_state.synced_objects[key] - - sync_state.last_full_sync = time.time() - self._save_sync_state(rule.bucket_name, sync_state) - - with self.replication_manager._stats_lock: - rule.last_pull_at = time.time() - self.replication_manager.save_rules() - - stats.last_sync_at = time.time() - logger.info( - "Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d", - rule.bucket_name, - stats.objects_pulled, - stats.objects_skipped, - stats.conflicts_resolved, - stats.deletions_applied, - stats.errors, - ) - return stats - - def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]: - from .storage import ObjectMeta - objects = self.storage.list_objects_all(bucket_name) - return {obj.key: obj for obj in objects} - - def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]: - s3 = self._create_client(connection) - result: Dict[str, RemoteObjectMeta] = {} - paginator = s3.get_paginator("list_objects_v2") - try: - for page in paginator.paginate(Bucket=rule.target_bucket): - for obj in page.get("Contents", []): - meta = RemoteObjectMeta.from_s3_object(obj) - result[meta.key] = meta - except ClientError as e: - if e.response["Error"]["Code"] == "NoSuchBucket": - return {} - raise - return result - - def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str: - local_ts = local_meta.last_modified.timestamp() - remote_ts = remote_meta.last_modified.timestamp() - - if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds: - local_etag = local_meta.etag or "" - if remote_meta.etag == local_etag: - return "skip" - return "pull" if remote_meta.etag > local_etag else "keep" - - return "pull" if remote_ts > local_ts else "keep" - - def _pull_object( - self, - rule: "ReplicationRule", - object_key: str, - connection: "RemoteConnection", - remote_meta: RemoteObjectMeta, - ) -> bool: - s3 = self._create_client(connection) - tmp_path = None - try: - tmp_dir = self.storage_root / ".myfsio.sys" / "tmp" - tmp_dir.mkdir(parents=True, exist_ok=True) - with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file: - tmp_path = Path(tmp_file.name) - - s3.download_file(rule.target_bucket, object_key, str(tmp_path)) - - head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key) - user_metadata = head_response.get("Metadata", {}) - - with open(tmp_path, "rb") as f: - self.storage.put_object( - rule.bucket_name, - object_key, - f, - metadata=user_metadata if user_metadata else None, - ) - - logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key) - return True - - except ClientError as e: - logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e) - return False - except Exception as e: - logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e) - return False - finally: - if tmp_path and tmp_path.exists(): - try: - tmp_path.unlink() - except OSError: - pass - - def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool: - try: - self.storage.delete_object(bucket_name, object_key) - logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key) - return True - except Exception as e: - logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e) - return False - - def _sync_state_path(self, bucket_name: str) -> Path: - return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json" - - def _load_sync_state(self, bucket_name: str) -> SyncState: - path = self._sync_state_path(bucket_name) - if not path.exists(): - return SyncState() - try: - data = json.loads(path.read_text(encoding="utf-8")) - return SyncState.from_dict(data) - except (json.JSONDecodeError, OSError, KeyError) as e: - logger.warning("Failed to load sync state for %s: %s", bucket_name, e) - return SyncState() - - def _save_sync_state(self, bucket_name: str, state: SyncState) -> None: - path = self._sync_state_path(bucket_name) - path.parent.mkdir(parents=True, exist_ok=True) - try: - path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8") - except OSError as e: - logger.warning("Failed to save sync state for %s: %s", bucket_name, e) diff --git a/app/storage.py b/app/storage.py deleted file mode 100644 index d2469e9..0000000 --- a/app/storage.py +++ /dev/null @@ -1,2904 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -import logging -import os -import re -import shutil -import stat -import threading -import time -import unicodedata -import uuid -from collections import OrderedDict -from concurrent.futures import ThreadPoolExecutor -from contextlib import contextmanager -from dataclasses import dataclass -from datetime import datetime, timezone -from pathlib import Path, PurePosixPath -from typing import Any, BinaryIO, Dict, Generator, List, Optional - -try: - import myfsio_core as _rc - if not all(hasattr(_rc, f) for f in ( - "validate_bucket_name", "validate_object_key", "md5_file", - "shallow_scan", "bucket_stats_scan", "search_objects_scan", - "stream_to_file_with_md5", "assemble_parts_with_md5", - "build_object_cache", "read_index_entry", "write_index_entry", - "delete_index_entry", "check_bucket_contents", - )): - raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release") - _HAS_RUST = True -except ImportError: - _rc = None - _HAS_RUST = False - -logger = logging.getLogger(__name__) - -if os.name == "nt": - import msvcrt - - @contextmanager - def _file_lock(file_handle) -> Generator[None, None, None]: - """Acquire an exclusive lock on a file (Windows).""" - try: - msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1) - yield - finally: - try: - file_handle.seek(0) - msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1) - except OSError: - pass -else: - import fcntl # type: ignore - - @contextmanager - def _file_lock(file_handle) -> Generator[None, None, None]: - """Acquire an exclusive lock on a file (Unix).""" - try: - fcntl.flock(file_handle.fileno(), fcntl.LOCK_EX) - yield - finally: - fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN) - - -@contextmanager -def _atomic_lock_file(lock_path: Path, max_retries: int = 10, base_delay: float = 0.1) -> Generator[None, None, None]: - """Atomically acquire a lock file with exponential backoff. - - Uses O_EXCL to ensure atomic creation of the lock file. - """ - lock_path.parent.mkdir(parents=True, exist_ok=True) - fd = None - for attempt in range(max_retries): - try: - fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY) - break - except FileExistsError: - if attempt == max_retries - 1: - raise BlockingIOError("Another upload to this key is in progress") - delay = base_delay * (2 ** attempt) - time.sleep(min(delay, 2.0)) - try: - yield - finally: - if fd is not None: - os.close(fd) - try: - lock_path.unlink(missing_ok=True) - except OSError: - pass - - -WINDOWS_RESERVED_NAMES = { - "CON", - "PRN", - "AUX", - "NUL", - "COM1", - "COM2", - "COM3", - "COM4", - "COM5", - "COM6", - "COM7", - "COM8", - "COM9", - "LPT1", - "LPT2", - "LPT3", - "LPT4", - "LPT5", - "LPT6", - "LPT7", - "LPT8", - "LPT9", -} - - -class StorageError(RuntimeError): - """Raised when the storage layer encounters an unrecoverable problem.""" - - -class BucketNotFoundError(StorageError): - """Raised when the bucket does not exist.""" - - -class ObjectNotFoundError(StorageError): - """Raised when the object does not exist.""" - - -class QuotaExceededError(StorageError): - """Raised when an operation would exceed bucket quota limits.""" - - def __init__(self, message: str, quota: Dict[str, Any], usage: Dict[str, int]): - super().__init__(message) - self.quota = quota - self.usage = usage - - -@dataclass -class ObjectMeta: - key: str - size: int - last_modified: datetime - etag: Optional[str] = None - metadata: Optional[Dict[str, str]] = None - - -@dataclass -class BucketMeta: - name: str - created_at: datetime - - -@dataclass -class ListObjectsResult: - """Paginated result for object listing.""" - objects: List[ObjectMeta] - is_truncated: bool - next_continuation_token: Optional[str] - total_count: Optional[int] = None - - -@dataclass -class ShallowListResult: - """Result for delimiter-aware directory-level listing.""" - objects: List[ObjectMeta] - common_prefixes: List[str] - is_truncated: bool - next_continuation_token: Optional[str] - - -def _utcnow() -> datetime: - return datetime.now(timezone.utc) - - -def _utc_isoformat() -> str: - return _utcnow().isoformat().replace("+00:00", "Z") - - -class ObjectStorage: - """Very small filesystem wrapper implementing the bare S3 primitives.""" - - INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"} - SYSTEM_ROOT = ".myfsio.sys" - SYSTEM_BUCKETS_DIR = "buckets" - SYSTEM_MULTIPART_DIR = "multipart" - SYSTEM_TMP_DIR = "tmp" - BUCKET_META_DIR = "meta" - BUCKET_VERSIONS_DIR = "versions" - MULTIPART_MANIFEST = "manifest.json" - BUCKET_CONFIG_FILE = ".bucket.json" - - def __init__( - self, - root: Path, - cache_ttl: int = 5, - object_cache_max_size: int = 100, - bucket_config_cache_ttl: float = 30.0, - object_key_max_length_bytes: int = 1024, - meta_read_cache_max: int = 2048, - ) -> None: - self.root = Path(root) - self.root.mkdir(parents=True, exist_ok=True) - self._ensure_system_roots() - self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float, float]] = OrderedDict() - self._obj_cache_lock = threading.Lock() - self._meta_cache_lock = threading.Lock() - self._registry_lock = threading.Lock() - self._bucket_locks: Dict[str, threading.Lock] = {} - self._cache_version: Dict[str, int] = {} - self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {} - self._bucket_config_cache_ttl = bucket_config_cache_ttl - self._cache_ttl = cache_ttl - self._object_cache_max_size = object_cache_max_size - self._object_key_max_length_bytes = object_key_max_length_bytes - self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {} - self._meta_index_locks: Dict[str, threading.Lock] = {} - self._meta_read_cache: OrderedDict[tuple, Optional[Dict[str, Any]]] = OrderedDict() - self._meta_read_cache_max = meta_read_cache_max - self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup") - self._stats_mem: Dict[str, Dict[str, int]] = {} - self._stats_serial: Dict[str, int] = {} - self._stats_mem_time: Dict[str, float] = {} - self._stats_lock = threading.Lock() - self._stats_dirty: set[str] = set() - self._stats_flush_timer: Optional[threading.Timer] = None - self._etag_index_dirty: set[str] = set() - self._etag_index_flush_timer: Optional[threading.Timer] = None - self._etag_index_mem: Dict[str, tuple[Dict[str, str], float]] = {} - - def _get_bucket_lock(self, bucket_id: str) -> threading.Lock: - with self._registry_lock: - if bucket_id not in self._bucket_locks: - self._bucket_locks[bucket_id] = threading.Lock() - return self._bucket_locks[bucket_id] - - def list_buckets(self) -> List[BucketMeta]: - buckets: List[BucketMeta] = [] - for bucket in sorted(self.root.iterdir()): - if bucket.is_dir() and bucket.name != self.SYSTEM_ROOT: - stat = bucket.stat() - buckets.append( - BucketMeta( - name=bucket.name, - created_at=datetime.fromtimestamp(stat.st_ctime, timezone.utc), - ) - ) - return buckets - - def bucket_exists(self, bucket_name: str) -> bool: - return self._bucket_path(bucket_name).exists() - - def _require_bucket_exists(self, bucket_path: Path) -> None: - """Raise BucketNotFoundError if bucket does not exist.""" - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - - def _validate_bucket_name(self, bucket_name: str) -> None: - if _HAS_RUST: - error = _rc.validate_bucket_name(bucket_name) - if error: - raise StorageError(error) - return - if len(bucket_name) < 3 or len(bucket_name) > 63: - raise StorageError("Bucket name must be between 3 and 63 characters") - if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name): - raise StorageError("Bucket name must consist of lowercase letters, numbers, periods, and hyphens, and must start and end with a letter or number") - if ".." in bucket_name: - raise StorageError("Bucket name must not contain consecutive periods") - if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", bucket_name): - raise StorageError("Bucket name must not be formatted as an IP address") - - def create_bucket(self, bucket_name: str) -> None: - self._validate_bucket_name(bucket_name) - bucket_path = self._bucket_path(bucket_name) - bucket_path.mkdir(parents=True, exist_ok=False) - self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True) - - def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - - with self._stats_lock: - if bucket_name in self._stats_mem: - cached_at = self._stats_mem_time.get(bucket_name, 0.0) - if (time.monotonic() - cached_at) < cache_ttl: - return dict(self._stats_mem[bucket_name]) - self._stats_mem.pop(bucket_name, None) - self._stats_mem_time.pop(bucket_name, None) - - cache_path = self._system_bucket_root(bucket_name) / "stats.json" - cached_stats = None - - if cache_path.exists(): - try: - cached_stats = json.loads(cache_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - pass - - object_count = 0 - total_bytes = 0 - version_count = 0 - version_bytes = 0 - - internal = self.INTERNAL_FOLDERS - bucket_str = str(bucket_path) - - try: - if _HAS_RUST: - versions_root = str(self._bucket_versions_root(bucket_name)) - object_count, total_bytes, version_count, version_bytes = _rc.bucket_stats_scan( - bucket_str, versions_root - ) - else: - stack = [bucket_str] - while stack: - current = stack.pop() - try: - with os.scandir(current) as it: - for entry in it: - if current == bucket_str and entry.name in internal: - continue - if entry.is_dir(follow_symlinks=False): - stack.append(entry.path) - elif entry.is_file(follow_symlinks=False): - object_count += 1 - total_bytes += entry.stat(follow_symlinks=False).st_size - except PermissionError: - continue - - versions_root = self._bucket_versions_root(bucket_name) - if versions_root.exists(): - v_stack = [str(versions_root)] - while v_stack: - v_current = v_stack.pop() - try: - with os.scandir(v_current) as it: - for entry in it: - if entry.is_dir(follow_symlinks=False): - v_stack.append(entry.path) - elif entry.is_file(follow_symlinks=False) and entry.name.endswith(".bin"): - version_count += 1 - version_bytes += entry.stat(follow_symlinks=False).st_size - except PermissionError: - continue - except OSError: - if cached_stats is not None: - return cached_stats - raise - - existing_serial = 0 - if cached_stats is not None: - existing_serial = cached_stats.get("_cache_serial", 0) - - stats = { - "objects": object_count, - "bytes": total_bytes, - "version_count": version_count, - "version_bytes": version_bytes, - "total_objects": object_count + version_count, - "total_bytes": total_bytes + version_bytes, - "_cache_serial": existing_serial, - } - - with self._stats_lock: - self._stats_mem[bucket_name] = stats - self._stats_mem_time[bucket_name] = time.monotonic() - self._stats_serial[bucket_name] = existing_serial - - try: - cache_path.parent.mkdir(parents=True, exist_ok=True) - self._atomic_write_json(cache_path, stats) - except OSError: - pass - - return stats - - def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None: - with self._stats_lock: - self._stats_mem.pop(bucket_id, None) - self._stats_mem_time.pop(bucket_id, None) - self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1 - self._stats_dirty.discard(bucket_id) - cache_path = self._system_bucket_root(bucket_id) / "stats.json" - try: - cache_path.unlink(missing_ok=True) - except OSError: - pass - - def _update_bucket_stats_cache( - self, - bucket_id: str, - *, - bytes_delta: int = 0, - objects_delta: int = 0, - version_bytes_delta: int = 0, - version_count_delta: int = 0, - ) -> None: - with self._stats_lock: - if bucket_id not in self._stats_mem: - self._stats_mem[bucket_id] = { - "objects": 0, "bytes": 0, "version_count": 0, - "version_bytes": 0, "total_objects": 0, "total_bytes": 0, - "_cache_serial": 0, - } - data = self._stats_mem[bucket_id] - data["objects"] = max(0, data["objects"] + objects_delta) - data["bytes"] = max(0, data["bytes"] + bytes_delta) - data["version_count"] = max(0, data["version_count"] + version_count_delta) - data["version_bytes"] = max(0, data["version_bytes"] + version_bytes_delta) - data["total_objects"] = max(0, data["total_objects"] + objects_delta + version_count_delta) - data["total_bytes"] = max(0, data["total_bytes"] + bytes_delta + version_bytes_delta) - data["_cache_serial"] = data["_cache_serial"] + 1 - self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1 - self._stats_mem_time[bucket_id] = time.monotonic() - self._stats_dirty.add(bucket_id) - needs_immediate = data["objects"] == 0 and objects_delta < 0 - if needs_immediate: - self._flush_stats() - else: - self._schedule_stats_flush() - - def _schedule_stats_flush(self) -> None: - if self._stats_flush_timer is None or not self._stats_flush_timer.is_alive(): - self._stats_flush_timer = threading.Timer(3.0, self._flush_stats) - self._stats_flush_timer.daemon = True - self._stats_flush_timer.start() - - def _flush_stats(self) -> None: - with self._stats_lock: - dirty = list(self._stats_dirty) - self._stats_dirty.clear() - snapshots = {b: dict(self._stats_mem[b]) for b in dirty if b in self._stats_mem} - for bucket_id, data in snapshots.items(): - cache_path = self._system_bucket_root(bucket_id) / "stats.json" - try: - cache_path.parent.mkdir(parents=True, exist_ok=True) - self._atomic_write_json(cache_path, data, sync=False) - except OSError: - pass - - def shutdown_stats(self) -> None: - if self._stats_flush_timer is not None: - self._stats_flush_timer.cancel() - self._flush_stats() - if self._etag_index_flush_timer is not None: - self._etag_index_flush_timer.cancel() - self._flush_etag_indexes() - - def delete_bucket(self, bucket_name: str) -> None: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path) - if has_objects: - raise StorageError("Bucket not empty") - if has_versions: - raise StorageError("Bucket contains archived object versions") - if has_multipart: - raise StorageError("Bucket has active multipart uploads") - bucket_id = bucket_path.name - self._remove_tree(bucket_path) - self._remove_tree(self._system_bucket_root(bucket_id)) - self._remove_tree(self._multipart_bucket_root(bucket_id)) - self._bucket_config_cache.pop(bucket_id, None) - with self._obj_cache_lock: - self._object_cache.pop(bucket_id, None) - self._cache_version.pop(bucket_id, None) - self._sorted_key_cache.pop(bucket_id, None) - with self._meta_cache_lock: - stale = [k for k in self._meta_read_cache if k[0] == bucket_id] - for k in stale: - del self._meta_read_cache[k] - with self._stats_lock: - self._stats_mem.pop(bucket_id, None) - self._stats_mem_time.pop(bucket_id, None) - self._stats_serial.pop(bucket_id, None) - self._stats_dirty.discard(bucket_id) - self._etag_index_dirty.discard(bucket_id) - - def list_objects( - self, - bucket_name: str, - *, - max_keys: int = 1000, - continuation_token: Optional[str] = None, - prefix: Optional[str] = None, - ) -> ListObjectsResult: - """List objects in a bucket with pagination support. - - Args: - bucket_name: Name of the bucket - max_keys: Maximum number of objects to return (default 1000) - continuation_token: Token from previous request for pagination - prefix: Filter objects by key prefix - - Returns: - ListObjectsResult with objects, truncation status, and continuation token - """ - import bisect - - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - - object_cache = self._get_object_cache(bucket_id, bucket_path) - - cache_version = self._cache_version.get(bucket_id, 0) - cached_entry = self._sorted_key_cache.get(bucket_id) - if cached_entry and cached_entry[1] == cache_version: - all_keys = cached_entry[0] - else: - all_keys = sorted(object_cache.keys()) - self._sorted_key_cache[bucket_id] = (all_keys, cache_version) - - if prefix: - lo = bisect.bisect_left(all_keys, prefix) - hi = len(all_keys) - for i in range(lo, len(all_keys)): - if not all_keys[i].startswith(prefix): - hi = i - break - all_keys = all_keys[lo:hi] - - total_count = len(all_keys) - start_index = 0 - if continuation_token: - start_index = bisect.bisect_right(all_keys, continuation_token) - if start_index >= total_count: - return ListObjectsResult( - objects=[], - is_truncated=False, - next_continuation_token=None, - total_count=total_count, - ) - - end_index = start_index + max_keys - keys_slice = all_keys[start_index:end_index] - is_truncated = end_index < total_count - - objects: List[ObjectMeta] = [] - for key in keys_slice: - obj = object_cache.get(key) - if obj: - objects.append(obj) - - next_token = keys_slice[-1] if is_truncated and keys_slice else None - - return ListObjectsResult( - objects=objects, - is_truncated=is_truncated, - next_continuation_token=next_token, - total_count=total_count, - ) - - def list_objects_all(self, bucket_name: str) -> List[ObjectMeta]: - """List all objects in a bucket (no pagination). Use with caution for large buckets.""" - result = self.list_objects(bucket_name, max_keys=100000) - return result.objects - - def list_objects_shallow( - self, - bucket_name: str, - *, - prefix: str = "", - delimiter: str = "/", - max_keys: int = 1000, - continuation_token: Optional[str] = None, - ) -> ShallowListResult: - import bisect - - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - - if delimiter != "/" or (prefix and not prefix.endswith(delimiter)): - return self._shallow_via_full_scan( - bucket_name, prefix=prefix, delimiter=delimiter, - max_keys=max_keys, continuation_token=continuation_token, - ) - - target_dir = bucket_path - if prefix: - safe_prefix_path = Path(prefix.rstrip("/")) - if ".." in safe_prefix_path.parts: - return ShallowListResult( - objects=[], common_prefixes=[], - is_truncated=False, next_continuation_token=None, - ) - target_dir = bucket_path / safe_prefix_path - try: - resolved = target_dir.resolve() - bucket_resolved = bucket_path.resolve() - if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved: - return ShallowListResult( - objects=[], common_prefixes=[], - is_truncated=False, next_continuation_token=None, - ) - except (OSError, ValueError): - return ShallowListResult( - objects=[], common_prefixes=[], - is_truncated=False, next_continuation_token=None, - ) - - if not target_dir.exists() or not target_dir.is_dir(): - return ShallowListResult( - objects=[], common_prefixes=[], - is_truncated=False, next_continuation_token=None, - ) - - meta_cache: Dict[str, str] = self._get_etag_index(bucket_id) - - entries_files: list[tuple[str, int, float, Optional[str]]] = [] - entries_dirs: list[str] = [] - - if _HAS_RUST: - try: - raw = _rc.shallow_scan(str(target_dir), prefix, json.dumps(meta_cache)) - entries_files = [] - for key, size, mtime, etag in raw["files"]: - if etag is None: - safe_key = PurePosixPath(key) - meta = self._read_metadata(bucket_id, Path(safe_key)) - etag = meta.get("__etag__") if meta else None - entries_files.append((key, size, mtime, etag)) - entries_dirs = raw["dirs"] - all_items = raw["merged_keys"] - except OSError: - return ShallowListResult( - objects=[], common_prefixes=[], - is_truncated=False, next_continuation_token=None, - ) - else: - try: - with os.scandir(str(target_dir)) as it: - for entry in it: - name = entry.name - if name in self.INTERNAL_FOLDERS: - continue - if entry.is_dir(follow_symlinks=False): - cp = prefix + name + delimiter - entries_dirs.append(cp) - elif entry.is_file(follow_symlinks=False): - key = prefix + name - try: - st = entry.stat() - etag = meta_cache.get(key) - if etag is None: - safe_key = PurePosixPath(key) - meta = self._read_metadata(bucket_id, Path(safe_key)) - etag = meta.get("__etag__") if meta else None - entries_files.append((key, st.st_size, st.st_mtime, etag)) - except OSError: - pass - except OSError: - return ShallowListResult( - objects=[], common_prefixes=[], - is_truncated=False, next_continuation_token=None, - ) - - entries_dirs.sort() - entries_files.sort(key=lambda x: x[0]) - - all_items: list[tuple[str, bool]] = [] - fi, di = 0, 0 - while fi < len(entries_files) and di < len(entries_dirs): - if entries_files[fi][0] <= entries_dirs[di]: - all_items.append((entries_files[fi][0], False)) - fi += 1 - else: - all_items.append((entries_dirs[di], True)) - di += 1 - while fi < len(entries_files): - all_items.append((entries_files[fi][0], False)) - fi += 1 - while di < len(entries_dirs): - all_items.append((entries_dirs[di], True)) - di += 1 - - files_map = {e[0]: e for e in entries_files} - - start_index = 0 - if continuation_token: - all_keys = [item[0] for item in all_items] - start_index = bisect.bisect_right(all_keys, continuation_token) - - selected = all_items[start_index:start_index + max_keys] - is_truncated = (start_index + max_keys) < len(all_items) - - result_objects: list[ObjectMeta] = [] - result_prefixes: list[str] = [] - for item_key, is_dir in selected: - if is_dir: - result_prefixes.append(item_key) - else: - fdata = files_map[item_key] - result_objects.append(ObjectMeta( - key=fdata[0], - size=fdata[1], - last_modified=datetime.fromtimestamp(fdata[2], timezone.utc), - etag=fdata[3], - metadata=None, - )) - - next_token = None - if is_truncated and selected: - next_token = selected[-1][0] - - return ShallowListResult( - objects=result_objects, - common_prefixes=result_prefixes, - is_truncated=is_truncated, - next_continuation_token=next_token, - ) - - def iter_objects_shallow( - self, - bucket_name: str, - *, - prefix: str = "", - delimiter: str = "/", - ) -> Generator[tuple[str, ObjectMeta | str], None, None]: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - - target_dir = bucket_path - if prefix: - safe_prefix_path = Path(prefix.rstrip("/")) - if ".." in safe_prefix_path.parts: - return - target_dir = bucket_path / safe_prefix_path - try: - resolved = target_dir.resolve() - bucket_resolved = bucket_path.resolve() - if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved: - return - except (OSError, ValueError): - return - - if not target_dir.exists() or not target_dir.is_dir(): - return - - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" - meta_cache: Dict[str, str] = {} - if etag_index_path.exists(): - try: - with open(etag_index_path, 'r', encoding='utf-8') as f: - meta_cache = json.load(f) - except (OSError, json.JSONDecodeError): - pass - - try: - with os.scandir(str(target_dir)) as it: - for entry in it: - name = entry.name - if name in self.INTERNAL_FOLDERS: - continue - if entry.is_dir(follow_symlinks=False): - yield ("folder", prefix + name + delimiter) - elif entry.is_file(follow_symlinks=False): - key = prefix + name - try: - st = entry.stat() - etag = meta_cache.get(key) - if etag is None: - safe_key = PurePosixPath(key) - meta = self._read_metadata(bucket_id, Path(safe_key)) - etag = meta.get("__etag__") if meta else None - yield ("object", ObjectMeta( - key=key, - size=st.st_size, - last_modified=datetime.fromtimestamp(st.st_mtime, timezone.utc), - etag=etag, - metadata=None, - )) - except OSError: - pass - except OSError: - return - - def _shallow_via_full_scan( - self, - bucket_name: str, - *, - prefix: str = "", - delimiter: str = "/", - max_keys: int = 1000, - continuation_token: Optional[str] = None, - ) -> ShallowListResult: - list_result = self.list_objects( - bucket_name, - max_keys=max_keys * 10, - continuation_token=continuation_token, - prefix=prefix or None, - ) - - common_prefixes: list[str] = [] - filtered_objects: list[ObjectMeta] = [] - seen_prefixes: set[str] = set() - - for obj in list_result.objects: - key_after_prefix = obj.key[len(prefix):] if prefix else obj.key - if delimiter in key_after_prefix: - cp = prefix + key_after_prefix.split(delimiter)[0] + delimiter - if cp not in seen_prefixes: - seen_prefixes.add(cp) - common_prefixes.append(cp) - else: - filtered_objects.append(obj) - - common_prefixes.sort() - total_items = len(filtered_objects) + len(common_prefixes) - is_truncated = total_items > max_keys or list_result.is_truncated - - if len(filtered_objects) >= max_keys: - filtered_objects = filtered_objects[:max_keys] - common_prefixes = [] - else: - remaining = max_keys - len(filtered_objects) - common_prefixes = common_prefixes[:remaining] - - next_token = None - if is_truncated: - if filtered_objects: - next_token = filtered_objects[-1].key - elif common_prefixes: - next_token = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1] - - return ShallowListResult( - objects=filtered_objects, - common_prefixes=common_prefixes, - is_truncated=is_truncated, - next_continuation_token=next_token, - ) - - def search_objects( - self, - bucket_name: str, - query: str, - *, - prefix: str = "", - limit: int = 500, - ) -> Dict[str, Any]: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.is_dir(): - raise BucketNotFoundError("Bucket does not exist") - - if prefix: - search_root = bucket_path / prefix.replace("/", os.sep) - if not search_root.is_dir(): - return {"results": [], "truncated": False} - resolved = search_root.resolve() - if not str(resolved).startswith(str(bucket_path.resolve())): - return {"results": [], "truncated": False} - else: - search_root = bucket_path - - if _HAS_RUST: - raw = _rc.search_objects_scan( - str(bucket_path), str(search_root), query, limit - ) - results = [ - { - "key": k, - "size": s, - "last_modified": datetime.fromtimestamp( - m, tz=timezone.utc - ).strftime("%Y-%m-%dT%H:%M:%S.000Z"), - } - for k, s, m in raw["results"] - ] - return {"results": results, "truncated": raw["truncated"]} - - query_lower = query.lower() - results: list[Dict[str, Any]] = [] - internal = self.INTERNAL_FOLDERS - bucket_str = str(bucket_path) - bucket_len = len(bucket_str) + 1 - meta_root = self._bucket_meta_root(bucket_name) - scan_limit = limit * 4 - - matched = 0 - scanned = 0 - search_str = str(search_root) - stack = [search_str] - while stack: - current = stack.pop() - try: - with os.scandir(current) as it: - for entry in it: - if current == bucket_str and entry.name in internal: - continue - if entry.is_dir(follow_symlinks=False): - stack.append(entry.path) - elif entry.is_file(follow_symlinks=False): - scanned += 1 - key = entry.path[bucket_len:].replace(os.sep, "/") - if query_lower in key.lower(): - st = entry.stat(follow_symlinks=False) - meta_path = meta_root / (key + ".meta.json") - last_modified = "" - try: - if meta_path.exists(): - md = json.loads(meta_path.read_text(encoding="utf-8")) - last_modified = md.get("last_modified", "") - except (OSError, json.JSONDecodeError): - pass - if not last_modified: - last_modified = datetime.fromtimestamp( - st.st_mtime, tz=timezone.utc - ).strftime("%Y-%m-%dT%H:%M:%S.000Z") - results.append({ - "key": key, - "size": st.st_size, - "last_modified": last_modified, - }) - matched += 1 - if matched >= scan_limit: - break - except PermissionError: - continue - if matched >= scan_limit: - break - - results.sort(key=lambda r: r["key"]) - truncated = len(results) > limit - return {"results": results[:limit], "truncated": truncated} - - def put_object( - self, - bucket_name: str, - object_key: str, - stream: BinaryIO, - *, - metadata: Optional[Dict[str, str]] = None, - enforce_quota: bool = True, - ) -> ObjectMeta: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - destination = bucket_path / safe_key - destination.parent.mkdir(parents=True, exist_ok=True) - - is_overwrite = destination.exists() - existing_size = destination.stat().st_size if is_overwrite else 0 - - tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR - tmp_dir.mkdir(parents=True, exist_ok=True) - - if _HAS_RUST: - tmp_path = None - try: - tmp_path_str, etag, new_size = _rc.stream_to_file_with_md5( - stream, str(tmp_dir) - ) - tmp_path = Path(tmp_path_str) - - size_delta = new_size - existing_size - object_delta = 0 if is_overwrite else 1 - - if enforce_quota: - quota_check = self.check_quota( - bucket_name, - additional_bytes=max(0, size_delta), - additional_objects=object_delta, - ) - if not quota_check["allowed"]: - raise QuotaExceededError( - quota_check["message"] or "Quota exceeded", - quota_check["quota"], - quota_check["usage"], - ) - except BaseException: - if tmp_path: - try: - tmp_path.unlink(missing_ok=True) - except OSError: - pass - raise - else: - tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp" - try: - checksum = hashlib.md5() - with tmp_path.open("wb") as target: - shutil.copyfileobj(_HashingReader(stream, checksum), target) - target.flush() - os.fsync(target.fileno()) - - new_size = tmp_path.stat().st_size - size_delta = new_size - existing_size - object_delta = 0 if is_overwrite else 1 - - if enforce_quota: - quota_check = self.check_quota( - bucket_name, - additional_bytes=max(0, size_delta), - additional_objects=object_delta, - ) - if not quota_check["allowed"]: - raise QuotaExceededError( - quota_check["message"] or "Quota exceeded", - quota_check["quota"], - quota_check["usage"], - ) - - etag = checksum.hexdigest() - except BaseException: - try: - tmp_path.unlink(missing_ok=True) - except OSError: - pass - raise - - lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock" - try: - with _atomic_lock_file(lock_file_path): - archived_version_size = 0 - if self._is_versioning_enabled(bucket_path) and is_overwrite: - archived_version_size = existing_size - self._archive_current_version(bucket_id, safe_key, reason="overwrite") - - shutil.move(str(tmp_path), str(destination)) - tmp_path = None - - stat = destination.stat() - - internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)} - combined_meta = {**internal_meta, **(metadata or {})} - self._write_metadata(bucket_id, safe_key, combined_meta) - except BlockingIOError: - try: - if tmp_path: - tmp_path.unlink(missing_ok=True) - except OSError: - pass - raise StorageError("Another upload to this key is in progress") - finally: - if tmp_path: - try: - tmp_path.unlink(missing_ok=True) - except OSError: - pass - - self._update_bucket_stats_cache( - bucket_id, - bytes_delta=size_delta, - objects_delta=object_delta, - version_bytes_delta=archived_version_size, - version_count_delta=1 if archived_version_size > 0 else 0, - ) - - obj_meta = ObjectMeta( - key=safe_key.as_posix(), - size=stat.st_size, - last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc), - etag=etag, - metadata=metadata, - ) - self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta) - - return obj_meta - - def get_object_path(self, bucket_name: str, object_key: str) -> Path: - path = self._object_path(bucket_name, object_key) - if not path.is_file(): - raise ObjectNotFoundError("Object not found") - return path - - def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - return {} - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - return self._read_metadata(bucket_path.name, safe_key) or {} - - def heal_missing_etag(self, bucket_name: str, object_key: str, etag: str) -> None: - """Persist a computed ETag back to metadata (self-heal on read).""" - try: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - return - bucket_id = bucket_path.name - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - existing = self._read_metadata(bucket_id, safe_key) or {} - if existing.get("__etag__"): - return - existing["__etag__"] = etag - self._write_metadata(bucket_id, safe_key, existing) - with self._obj_cache_lock: - cached = self._object_cache.get(bucket_id) - if cached: - obj = cached[0].get(safe_key.as_posix()) - if obj and not obj.etag: - obj.etag = etag - self._etag_index_dirty.add(bucket_id) - self._schedule_etag_index_flush() - except Exception: - logger.warning("Failed to heal missing ETag for %s/%s", bucket_name, object_key) - - def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None: - """Remove empty parent directories in a background thread. - - On Windows/OneDrive, directories may be locked briefly after file deletion. - Running this in the background avoids blocking the request thread with retries. - """ - self._cleanup_executor.submit(self._do_cleanup_empty_parents, path, stop_at) - - def _do_cleanup_empty_parents(self, path: Path, stop_at: Path) -> None: - for parent in path.parents: - if parent == stop_at: - break - for attempt in range(3): - try: - if parent.exists() and not any(parent.iterdir()): - parent.rmdir() - break - except OSError: - if attempt < 2: - time.sleep(0.1) - break - - def delete_object(self, bucket_name: str, object_key: str) -> None: - bucket_path = self._bucket_path(bucket_name) - path = self._object_path(bucket_name, object_key) - if not path.exists(): - return - deleted_size = path.stat().st_size - safe_key = path.relative_to(bucket_path) - bucket_id = bucket_path.name - archived_version_size = 0 - if self._is_versioning_enabled(bucket_path): - archived_version_size = deleted_size - self._archive_current_version(bucket_id, safe_key, reason="delete") - rel = path.relative_to(bucket_path) - self._safe_unlink(path) - self._delete_metadata(bucket_id, rel) - - self._update_bucket_stats_cache( - bucket_id, - bytes_delta=-deleted_size, - objects_delta=-1, - version_bytes_delta=archived_version_size, - version_count_delta=1 if archived_version_size > 0 else 0, - ) - self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None) - self._cleanup_empty_parents(path, bucket_path) - - def purge_object(self, bucket_name: str, object_key: str) -> None: - bucket_path = self._bucket_path(bucket_name) - target = self._object_path(bucket_name, object_key) - bucket_id = bucket_path.name - if target.exists(): - rel = target.relative_to(bucket_path) - self._safe_unlink(target) - self._delete_metadata(bucket_id, rel) - else: - rel = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - self._delete_metadata(bucket_id, rel) - version_dir = self._version_dir(bucket_id, rel) - if version_dir.exists(): - shutil.rmtree(version_dir, ignore_errors=True) - legacy_version_dir = self._legacy_version_dir(bucket_id, rel) - if legacy_version_dir.exists(): - shutil.rmtree(legacy_version_dir, ignore_errors=True) - - self._invalidate_bucket_stats_cache(bucket_id) - self._update_object_cache_entry(bucket_id, rel.as_posix(), None) - self._cleanup_empty_parents(target, bucket_path) - - def is_versioning_enabled(self, bucket_name: str) -> bool: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - return self._is_versioning_enabled(bucket_path) - - def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None: - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - config["versioning_enabled"] = bool(enabled) - self._write_bucket_config(bucket_path.name, config) - - def get_bucket_tags(self, bucket_name: str) -> List[Dict[str, str]]: - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - raw_tags = config.get("tags") - if not isinstance(raw_tags, list): - return [] - tags: List[Dict[str, str]] = [] - for entry in raw_tags: - if not isinstance(entry, dict): - continue - key = str(entry.get("Key", "")).strip() - if not key: - continue - value = str(entry.get("Value", "")) - tags.append({"Key": key, "Value": value}) - return tags - - def set_bucket_tags(self, bucket_name: str, tags: Optional[List[Dict[str, str]]]) -> None: - bucket_path = self._require_bucket_path(bucket_name) - if not tags: - self._set_bucket_config_entry(bucket_path.name, "tags", None) - return - clean: List[Dict[str, str]] = [] - for entry in tags: - if not isinstance(entry, dict): - continue - key = str(entry.get("Key", "")).strip() - if not key: - continue - clean.append({"Key": key, "Value": str(entry.get("Value", ""))}) - self._set_bucket_config_entry(bucket_path.name, "tags", clean or None) - - def get_bucket_cors(self, bucket_name: str) -> List[Dict[str, Any]]: - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - cors_rules = config.get("cors") - return cors_rules if isinstance(cors_rules, list) else [] - - def set_bucket_cors(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None: - bucket_path = self._require_bucket_path(bucket_name) - self._set_bucket_config_entry(bucket_path.name, "cors", rules or None) - - def get_bucket_encryption(self, bucket_name: str) -> Dict[str, Any]: - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - payload = config.get("encryption") - return payload if isinstance(payload, dict) else {} - - def set_bucket_encryption(self, bucket_name: str, config_payload: Optional[Dict[str, Any]]) -> None: - bucket_path = self._require_bucket_path(bucket_name) - self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None) - - def get_bucket_lifecycle(self, bucket_name: str) -> Optional[List[Dict[str, Any]]]: - """Get lifecycle configuration for bucket.""" - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - lifecycle = config.get("lifecycle") - return lifecycle if isinstance(lifecycle, list) else None - - def set_bucket_lifecycle(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None: - bucket_path = self._require_bucket_path(bucket_name) - self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules) - - def get_bucket_website(self, bucket_name: str) -> Optional[Dict[str, Any]]: - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - website = config.get("website") - return website if isinstance(website, dict) else None - - def set_bucket_website(self, bucket_name: str, website_config: Optional[Dict[str, Any]]) -> None: - bucket_path = self._require_bucket_path(bucket_name) - self._set_bucket_config_entry(bucket_path.name, "website", website_config) - - def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]: - """Get quota configuration for bucket. - - Returns: - Dict with 'max_bytes' and 'max_objects' (None if unlimited). - """ - bucket_path = self._require_bucket_path(bucket_name) - config = self._read_bucket_config(bucket_path.name) - quota = config.get("quota") - if isinstance(quota, dict): - return { - "max_bytes": quota.get("max_bytes"), - "max_objects": quota.get("max_objects"), - } - return {"max_bytes": None, "max_objects": None} - - def set_bucket_quota( - self, - bucket_name: str, - *, - max_bytes: Optional[int] = None, - max_objects: Optional[int] = None, - ) -> None: - """Set quota limits for a bucket. - - Args: - bucket_name: Name of the bucket - max_bytes: Maximum total size in bytes (None to remove limit) - max_objects: Maximum number of objects (None to remove limit) - """ - bucket_path = self._require_bucket_path(bucket_name) - - if max_bytes is None and max_objects is None: - self._set_bucket_config_entry(bucket_path.name, "quota", None) - return - - quota: Dict[str, Any] = {} - if max_bytes is not None: - if max_bytes < 0: - raise StorageError("max_bytes must be non-negative") - quota["max_bytes"] = max_bytes - if max_objects is not None: - if max_objects < 0: - raise StorageError("max_objects must be non-negative") - quota["max_objects"] = max_objects - - self._set_bucket_config_entry(bucket_path.name, "quota", quota) - - def check_quota( - self, - bucket_name: str, - additional_bytes: int = 0, - additional_objects: int = 0, - ) -> Dict[str, Any]: - """Check if an operation would exceed bucket quota. - - Args: - bucket_name: Name of the bucket - additional_bytes: Bytes that would be added - additional_objects: Objects that would be added - - Returns: - Dict with 'allowed' (bool), 'quota' (current limits), - 'usage' (current usage), and 'message' (if not allowed). - """ - quota = self.get_bucket_quota(bucket_name) - if not quota: - return { - "allowed": True, - "quota": None, - "usage": None, - "message": None, - } - - stats = self.bucket_stats(bucket_name) - current_bytes = stats.get("total_bytes", stats.get("bytes", 0)) - current_objects = stats.get("total_objects", stats.get("objects", 0)) - - result = { - "allowed": True, - "quota": quota, - "usage": { - "bytes": current_bytes, - "objects": current_objects, - "version_count": stats.get("version_count", 0), - "version_bytes": stats.get("version_bytes", 0), - }, - "message": None, - } - - max_bytes_limit = quota.get("max_bytes") - max_objects = quota.get("max_objects") - - if max_bytes_limit is not None: - projected_bytes = current_bytes + additional_bytes - if projected_bytes > max_bytes_limit: - result["allowed"] = False - result["message"] = ( - f"Quota exceeded: adding {additional_bytes} bytes would result in " - f"{projected_bytes} bytes, exceeding limit of {max_bytes_limit} bytes" - ) - return result - - if max_objects is not None: - projected_objects = current_objects + additional_objects - if projected_objects > max_objects: - result["allowed"] = False - result["message"] = ( - f"Quota exceeded: adding {additional_objects} objects would result in " - f"{projected_objects} objects, exceeding limit of {max_objects} objects" - ) - return result - - return result - - def get_object_tags(self, bucket_name: str, object_key: str) -> List[Dict[str, str]]: - """Get tags for an object.""" - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - object_path = bucket_path / safe_key - if not object_path.exists(): - raise ObjectNotFoundError("Object does not exist") - - entry = self._read_index_entry(bucket_path.name, safe_key) - if entry is not None: - tags = entry.get("tags") - return tags if isinstance(tags, list) else [] - for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)): - if not meta_file.exists(): - continue - try: - payload = json.loads(meta_file.read_text(encoding="utf-8")) - tags = payload.get("tags") - if isinstance(tags, list): - return tags - return [] - except (OSError, json.JSONDecodeError): - return [] - return [] - - def set_object_tags(self, bucket_name: str, object_key: str, tags: Optional[List[Dict[str, str]]]) -> None: - """Set tags for an object.""" - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - object_path = bucket_path / safe_key - if not object_path.exists(): - raise ObjectNotFoundError("Object does not exist") - - bucket_id = bucket_path.name - existing_entry = self._read_index_entry(bucket_id, safe_key) or {} - if not existing_entry: - meta_file = self._metadata_file(bucket_id, safe_key) - if meta_file.exists(): - try: - existing_entry = json.loads(meta_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - pass - - if tags: - existing_entry["tags"] = tags - else: - existing_entry.pop("tags", None) - - if existing_entry.get("metadata") or existing_entry.get("tags"): - self._write_index_entry(bucket_id, safe_key, existing_entry) - else: - self._delete_index_entry(bucket_id, safe_key) - old_meta = self._metadata_file(bucket_id, safe_key) - try: - if old_meta.exists(): - old_meta.unlink() - except OSError: - pass - - def delete_object_tags(self, bucket_name: str, object_key: str) -> None: - """Delete all tags from an object.""" - self.set_object_tags(bucket_name, object_key, None) - - def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - version_dir = self._version_dir(bucket_id, safe_key) - if not version_dir.exists(): - version_dir = self._legacy_version_dir(bucket_id, safe_key) - if not version_dir.exists(): - return [] - versions: List[Dict[str, Any]] = [] - for meta_file in version_dir.glob("*.json"): - try: - payload = json.loads(meta_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - continue - if not isinstance(payload, dict): - continue - payload.setdefault("version_id", meta_file.stem) - versions.append(payload) - versions.sort(key=lambda item: item.get("archived_at") or "1970-01-01T00:00:00Z", reverse=True) - return versions - - def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - version_dir = self._version_dir(bucket_id, safe_key) - data_path = version_dir / f"{version_id}.bin" - meta_path = version_dir / f"{version_id}.json" - if not data_path.exists() or not meta_path.exists(): - raise StorageError("Version not found") - try: - payload = json.loads(meta_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - payload = {} - metadata = payload.get("metadata") if isinstance(payload, dict) else {} - if not isinstance(metadata, dict): - metadata = {} - destination = bucket_path / safe_key - restored_size = data_path.stat().st_size - is_overwrite = destination.exists() - existing_size = destination.stat().st_size if is_overwrite else 0 - archived_version_size = 0 - if self._is_versioning_enabled(bucket_path) and is_overwrite: - archived_version_size = existing_size - self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite") - destination.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(data_path, destination) - if metadata: - self._write_metadata(bucket_id, safe_key, metadata) - else: - self._delete_metadata(bucket_id, safe_key) - stat = destination.stat() - self._update_bucket_stats_cache( - bucket_id, - bytes_delta=restored_size - existing_size, - objects_delta=0 if is_overwrite else 1, - version_bytes_delta=archived_version_size, - version_count_delta=1 if archived_version_size > 0 else 0, - ) - etag = self._compute_etag(destination) - internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)} - combined_meta = {**internal_meta, **(metadata or {})} - self._write_metadata(bucket_id, safe_key, combined_meta) - obj_meta = ObjectMeta( - key=safe_key.as_posix(), - size=stat.st_size, - last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc), - etag=etag, - metadata=metadata or None, - ) - self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta) - return obj_meta - - def delete_object_version(self, bucket_name: str, object_key: str, version_id: str) -> None: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - version_dir = self._version_dir(bucket_id, safe_key) - data_path = version_dir / f"{version_id}.bin" - meta_path = version_dir / f"{version_id}.json" - if not data_path.exists() and not meta_path.exists(): - legacy_version_dir = self._legacy_version_dir(bucket_id, safe_key) - data_path = legacy_version_dir / f"{version_id}.bin" - meta_path = legacy_version_dir / f"{version_id}.json" - if not data_path.exists() and not meta_path.exists(): - raise StorageError(f"Version {version_id} not found") - deleted_version_size = data_path.stat().st_size if data_path.exists() else 0 - if data_path.exists(): - data_path.unlink() - if meta_path.exists(): - meta_path.unlink() - parent = data_path.parent - if parent.exists() and not any(parent.iterdir()): - parent.rmdir() - if deleted_version_size > 0: - self._update_bucket_stats_cache( - bucket_id, - version_bytes_delta=-deleted_version_size, - version_count_delta=-1, - ) - - def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)] - if not any(root.exists() for root in version_roots): - return [] - aggregated: Dict[str, Dict[str, Any]] = {} - skipped: set[str] = set() - for version_root in version_roots: - if not version_root.exists(): - continue - for meta_file in version_root.glob("**/*.json"): - if not meta_file.is_file(): - continue - rel = meta_file.parent.relative_to(version_root) - rel_key = rel.as_posix() - if rel_key in skipped: - continue - object_path = bucket_path / rel - if object_path.exists(): - skipped.add(rel_key) - continue - try: - payload = json.loads(meta_file.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - payload = {} - version_id = payload.get("version_id") or meta_file.stem - archived_at = payload.get("archived_at") or "1970-01-01T00:00:00Z" - size = int(payload.get("size") or 0) - reason = payload.get("reason") or "update" - record = aggregated.setdefault( - rel_key, - { - "key": rel_key, - "versions": 0, - "total_size": 0, - "latest": None, - "_latest_sort": None, - }, - ) - record["versions"] += 1 - record["total_size"] += size - candidate = { - "version_id": version_id, - "archived_at": archived_at, - "size": size, - "reason": reason, - } - sort_key = ( - archived_at, - meta_file.stat().st_mtime, - ) - current_sort = record.get("_latest_sort") - if current_sort is None or sort_key > current_sort: - record["_latest_sort"] = sort_key - record["latest"] = candidate - for record in aggregated.values(): - record.pop("_latest_sort", None) - return sorted(aggregated.values(), key=lambda item: item["key"]) - - def initiate_multipart_upload( - self, - bucket_name: str, - object_key: str, - *, - metadata: Optional[Dict[str, str]] = None, - ) -> str: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - upload_id = uuid.uuid4().hex - upload_root = self._multipart_dir(bucket_id, upload_id) - upload_root.mkdir(parents=True, exist_ok=False) - manifest = { - "upload_id": upload_id, - "object_key": safe_key.as_posix(), - "metadata": self._normalize_metadata(metadata), - "parts": {}, - "created_at": _utc_isoformat(), - } - self._write_multipart_manifest(upload_root, manifest) - return upload_id - - def upload_multipart_part( - self, - bucket_name: str, - upload_id: str, - part_number: int, - stream: BinaryIO, - ) -> str: - """Upload a part for a multipart upload. - - Uses file locking to safely update the manifest and handle concurrent uploads. - """ - if part_number < 1 or part_number > 10000: - raise StorageError("part_number must be between 1 and 10000") - bucket_path = self._bucket_path(bucket_name) - - upload_root = self._multipart_dir(bucket_path.name, upload_id) - if not upload_root.exists(): - upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id) - if not upload_root.exists(): - raise StorageError("Multipart upload not found") - - part_filename = f"part-{part_number:05d}.part" - part_path = upload_root / part_filename - temp_path = upload_root / f".{part_filename}.tmp" - - try: - if _HAS_RUST: - with temp_path.open("wb") as target: - shutil.copyfileobj(stream, target) - part_etag = _rc.md5_file(str(temp_path)) - else: - checksum = hashlib.md5() - with temp_path.open("wb") as target: - shutil.copyfileobj(_HashingReader(stream, checksum), target) - target.flush() - os.fsync(target.fileno()) - part_etag = checksum.hexdigest() - temp_path.replace(part_path) - except OSError: - try: - temp_path.unlink(missing_ok=True) - except OSError: - pass - raise - - record = { - "etag": part_etag, - "size": part_path.stat().st_size, - "filename": part_filename, - } - - manifest_path = upload_root / self.MULTIPART_MANIFEST - lock_path = upload_root / ".manifest.lock" - - max_retries = 3 - for attempt in range(max_retries): - try: - with lock_path.open("w") as lock_file: - with _file_lock(lock_file): - try: - manifest = json.loads(manifest_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError) as exc: - if attempt < max_retries - 1: - time.sleep(0.1 * (attempt + 1)) - continue - raise StorageError("Multipart manifest unreadable") from exc - - parts = manifest.setdefault("parts", {}) - parts[str(part_number)] = record - self._atomic_write_json(manifest_path, manifest) - break - except OSError as exc: - if attempt < max_retries - 1: - time.sleep(0.1 * (attempt + 1)) - continue - raise StorageError(f"Failed to update multipart manifest: {exc}") from exc - - return record["etag"] - - def upload_part_copy( - self, - bucket_name: str, - upload_id: str, - part_number: int, - source_bucket: str, - source_key: str, - start_byte: Optional[int] = None, - end_byte: Optional[int] = None, - ) -> Dict[str, Any]: - """Copy a range from an existing object as a multipart part.""" - if part_number < 1 or part_number > 10000: - raise StorageError("part_number must be between 1 and 10000") - - source_path = self.get_object_path(source_bucket, source_key) - source_size = source_path.stat().st_size - - if start_byte is None: - start_byte = 0 - if end_byte is None: - end_byte = source_size - 1 - - if start_byte < 0 or end_byte >= source_size or start_byte > end_byte: - raise StorageError("Invalid byte range") - - bucket_path = self._bucket_path(bucket_name) - upload_root = self._multipart_dir(bucket_path.name, upload_id) - if not upload_root.exists(): - upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id) - if not upload_root.exists(): - raise StorageError("Multipart upload not found") - - checksum = hashlib.md5() - part_filename = f"part-{part_number:05d}.part" - part_path = upload_root / part_filename - temp_path = upload_root / f".{part_filename}.tmp" - - try: - with source_path.open("rb") as src: - src.seek(start_byte) - bytes_to_copy = end_byte - start_byte + 1 - with temp_path.open("wb") as target: - remaining = bytes_to_copy - while remaining > 0: - chunk_size = min(65536, remaining) - chunk = src.read(chunk_size) - if not chunk: - break - checksum.update(chunk) - target.write(chunk) - remaining -= len(chunk) - temp_path.replace(part_path) - except OSError: - try: - temp_path.unlink(missing_ok=True) - except OSError: - pass - raise - - record = { - "etag": checksum.hexdigest(), - "size": part_path.stat().st_size, - "filename": part_filename, - } - - manifest_path = upload_root / self.MULTIPART_MANIFEST - lock_path = upload_root / ".manifest.lock" - - max_retries = 3 - for attempt in range(max_retries): - try: - with lock_path.open("w") as lock_file: - with _file_lock(lock_file): - try: - manifest = json.loads(manifest_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError) as exc: - if attempt < max_retries - 1: - time.sleep(0.1 * (attempt + 1)) - continue - raise StorageError("Multipart manifest unreadable") from exc - - parts = manifest.setdefault("parts", {}) - parts[str(part_number)] = record - self._atomic_write_json(manifest_path, manifest) - break - except OSError as exc: - if attempt < max_retries - 1: - time.sleep(0.1 * (attempt + 1)) - continue - raise StorageError(f"Failed to update multipart manifest: {exc}") from exc - - return { - "etag": record["etag"], - "last_modified": datetime.fromtimestamp(part_path.stat().st_mtime, timezone.utc), - } - - def complete_multipart_upload( - self, - bucket_name: str, - upload_id: str, - ordered_parts: List[Dict[str, Any]], - enforce_quota: bool = True, - ) -> ObjectMeta: - if not ordered_parts: - raise StorageError("parts list required") - bucket_path = self._bucket_path(bucket_name) - bucket_id = bucket_path.name - manifest, upload_root = self._load_multipart_manifest(bucket_id, upload_id) - parts_map = manifest.get("parts") or {} - if not parts_map: - raise StorageError("No uploaded parts found") - validated: List[tuple[int, Dict[str, Any]]] = [] - total_size = 0 - for part in ordered_parts: - raw_number = part.get("part_number") - if raw_number is None: - raw_number = part.get("PartNumber") - try: - number = int(raw_number) - except (TypeError, ValueError) as exc: - raise StorageError("Each part must include part_number") from exc - if number < 1: - raise StorageError("part numbers must be >= 1") - key = str(number) - record = parts_map.get(key) - if not record: - raise StorageError(f"Part {number} missing from upload") - raw_etag = part.get("etag", part.get("ETag", "")) - supplied_etag = str(raw_etag).strip() or record.get("etag") - if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]: - raise StorageError(f"ETag mismatch for part {number}") - validated.append((number, record)) - total_size += record.get("size", 0) - validated.sort(key=lambda entry: entry[0]) - - safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes) - destination = bucket_path / safe_key - - is_overwrite = destination.exists() - existing_size = destination.stat().st_size if is_overwrite else 0 - size_delta = total_size - existing_size - object_delta = 0 if is_overwrite else 1 - versioning_enabled = self._is_versioning_enabled(bucket_path) - - if enforce_quota: - quota_check = self.check_quota( - bucket_name, - additional_bytes=max(0, size_delta), - additional_objects=object_delta, - ) - if not quota_check["allowed"]: - raise QuotaExceededError( - quota_check["message"] or "Quota exceeded", - quota_check["quota"], - quota_check["usage"], - ) - - destination.parent.mkdir(parents=True, exist_ok=True) - - lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock" - - archived_version_size = 0 - try: - with _atomic_lock_file(lock_file_path): - if versioning_enabled and destination.exists(): - archived_version_size = destination.stat().st_size - self._archive_current_version(bucket_id, safe_key, reason="overwrite") - if _HAS_RUST: - part_paths = [] - for _, record in validated: - pp = upload_root / record["filename"] - if not pp.exists(): - raise StorageError(f"Missing part file {record['filename']}") - part_paths.append(str(pp)) - checksum_hex = _rc.assemble_parts_with_md5(part_paths, str(destination)) - else: - checksum = hashlib.md5() - with destination.open("wb") as target: - for _, record in validated: - part_path = upload_root / record["filename"] - if not part_path.exists(): - raise StorageError(f"Missing part file {record['filename']}") - with part_path.open("rb") as chunk: - while True: - data = chunk.read(1024 * 1024) - if not data: - break - checksum.update(data) - target.write(data) - target.flush() - os.fsync(target.fileno()) - checksum_hex = checksum.hexdigest() - except BlockingIOError: - raise StorageError("Another upload to this key is in progress") - - shutil.rmtree(upload_root, ignore_errors=True) - - self._update_bucket_stats_cache( - bucket_id, - bytes_delta=size_delta, - objects_delta=object_delta, - version_bytes_delta=archived_version_size, - version_count_delta=1 if archived_version_size > 0 else 0, - ) - - stat = destination.stat() - etag = checksum_hex - metadata = manifest.get("metadata") - - internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)} - combined_meta = {**internal_meta, **(metadata or {})} - self._write_metadata(bucket_id, safe_key, combined_meta) - - obj_meta = ObjectMeta( - key=safe_key.as_posix(), - size=stat.st_size, - last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc), - etag=etag, - metadata=metadata, - ) - self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta) - - return obj_meta - - def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None: - bucket_path = self._bucket_path(bucket_name) - upload_root = self._multipart_dir(bucket_path.name, upload_id) - if upload_root.exists(): - shutil.rmtree(upload_root, ignore_errors=True) - return - legacy_root = self._legacy_multipart_dir(bucket_path.name, upload_id) - if legacy_root.exists(): - shutil.rmtree(legacy_root, ignore_errors=True) - - def list_multipart_parts(self, bucket_name: str, upload_id: str) -> List[Dict[str, Any]]: - """List uploaded parts for a multipart upload.""" - bucket_path = self._bucket_path(bucket_name) - manifest, upload_root = self._load_multipart_manifest(bucket_path.name, upload_id) - - parts = [] - parts_map = manifest.get("parts", {}) - for part_num_str, record in parts_map.items(): - part_num = int(part_num_str) - part_filename = record.get("filename") - if not part_filename: - continue - part_path = upload_root / part_filename - if not part_path.exists(): - continue - - stat = part_path.stat() - parts.append({ - "PartNumber": part_num, - "Size": stat.st_size, - "ETag": record.get("etag"), - "LastModified": datetime.fromtimestamp(stat.st_mtime, timezone.utc) - }) - - parts.sort(key=lambda x: x["PartNumber"]) - return parts - - def list_multipart_uploads(self, bucket_name: str, include_orphaned: bool = False) -> List[Dict[str, Any]]: - """List all active multipart uploads for a bucket. - - Args: - bucket_name: The bucket to list uploads for. - include_orphaned: If True, also include upload directories that have - files but no valid manifest.json (orphaned/interrupted uploads). - """ - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise BucketNotFoundError("Bucket does not exist") - bucket_id = bucket_path.name - uploads = [] - - for multipart_root in ( - self._multipart_bucket_root(bucket_id), - self._legacy_multipart_bucket_root(bucket_id), - ): - if not multipart_root.exists(): - continue - for upload_dir in multipart_root.iterdir(): - if not upload_dir.is_dir(): - continue - manifest_path = upload_dir / "manifest.json" - if manifest_path.exists(): - try: - manifest = json.loads(manifest_path.read_text(encoding="utf-8")) - uploads.append({ - "upload_id": manifest.get("upload_id", upload_dir.name), - "object_key": manifest.get("object_key", ""), - "created_at": manifest.get("created_at", ""), - }) - except (OSError, json.JSONDecodeError): - if include_orphaned: - has_files = any(upload_dir.rglob("*")) - if has_files: - uploads.append({ - "upload_id": upload_dir.name, - "object_key": "(unknown)", - "created_at": "", - "orphaned": True, - }) - elif include_orphaned: - has_files = any(f.is_file() for f in upload_dir.rglob("*")) - if has_files: - uploads.append({ - "upload_id": upload_dir.name, - "object_key": "(unknown)", - "created_at": "", - "orphaned": True, - }) - return uploads - - def _bucket_path(self, bucket_name: str) -> Path: - safe_name = self._sanitize_bucket_name(bucket_name) - return self.root / safe_name - - def _require_bucket_path(self, bucket_name: str) -> Path: - bucket_path = self._bucket_path(bucket_name) - if not bucket_path.exists(): - raise StorageError("Bucket does not exist") - return bucket_path - - def _object_path(self, bucket_name: str, object_key: str) -> Path: - bucket_path = self._bucket_path(bucket_name) - safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes) - return bucket_path / safe_key - - def _system_root_path(self) -> Path: - return self.root / self.SYSTEM_ROOT - - def _system_buckets_root(self) -> Path: - return self._system_root_path() / self.SYSTEM_BUCKETS_DIR - - def _system_bucket_root(self, bucket_name: str) -> Path: - return self._system_buckets_root() / bucket_name - - def _bucket_meta_root(self, bucket_name: str) -> Path: - return self._system_bucket_root(bucket_name) / self.BUCKET_META_DIR - - def _bucket_versions_root(self, bucket_name: str) -> Path: - return self._system_bucket_root(bucket_name) / self.BUCKET_VERSIONS_DIR - - def _multipart_root(self) -> Path: - return self._system_root_path() / self.SYSTEM_MULTIPART_DIR - - def _multipart_bucket_root(self, bucket_name: str) -> Path: - return self._multipart_root() / bucket_name - - def _legacy_metadata_file(self, bucket_name: str, key: Path) -> Path: - meta_root = self._legacy_meta_root(bucket_name) - meta_rel = Path(key.as_posix() + ".meta.json") - return meta_root / meta_rel - - def _legacy_meta_root(self, bucket_name: str) -> Path: - return self._bucket_path(bucket_name) / ".meta" - - def _legacy_versions_root(self, bucket_name: str) -> Path: - return self._bucket_path(bucket_name) / ".versions" - - def _legacy_version_dir(self, bucket_name: str, key: Path) -> Path: - return self._legacy_versions_root(bucket_name) / key - - def _legacy_multipart_bucket_root(self, bucket_name: str) -> Path: - return self._bucket_path(bucket_name) / ".multipart" - - def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path: - return self._legacy_multipart_bucket_root(bucket_name) / upload_id - - def _fast_list_keys(self, bucket_path: Path) -> List[str]: - """Fast directory walk using os.scandir instead of pathlib.rglob. - - This is significantly faster for large directories (10K+ files). - Returns just the keys (for backward compatibility). - """ - return list(self._build_object_cache(bucket_path).keys()) - - def _build_object_cache(self, bucket_path: Path) -> Dict[str, ObjectMeta]: - from concurrent.futures import ThreadPoolExecutor - - bucket_id = bucket_path.name - objects: Dict[str, ObjectMeta] = {} - bucket_str = str(bucket_path) - bucket_len = len(bucket_str) + 1 - - if _HAS_RUST: - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" - raw = _rc.build_object_cache( - bucket_str, - str(self._bucket_meta_root(bucket_id)), - str(etag_index_path), - ) - if raw["etag_cache_changed"] and raw["etag_cache"]: - try: - etag_index_path.parent.mkdir(parents=True, exist_ok=True) - with open(etag_index_path, 'w', encoding='utf-8') as f: - json.dump(raw["etag_cache"], f) - self._etag_index_mem[bucket_id] = (dict(raw["etag_cache"]), etag_index_path.stat().st_mtime) - except OSError: - pass - for key, size, mtime, etag in raw["objects"]: - objects[key] = ObjectMeta( - key=key, - size=size, - last_modified=datetime.fromtimestamp(mtime, timezone.utc), - etag=etag, - metadata=None, - ) - return objects - - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" - meta_cache: Dict[str, str] = {} - index_mtime: float = 0 - - if etag_index_path.exists(): - try: - index_mtime = etag_index_path.stat().st_mtime - with open(etag_index_path, 'r', encoding='utf-8') as f: - meta_cache = json.load(f) - except (OSError, json.JSONDecodeError): - meta_cache = {} - - meta_root = self._bucket_meta_root(bucket_id) - needs_rebuild = False - - if meta_root.exists() and index_mtime > 0: - def check_newer(dir_path: str) -> bool: - try: - with os.scandir(dir_path) as it: - for entry in it: - if entry.is_dir(follow_symlinks=False): - if check_newer(entry.path): - return True - elif entry.is_file(follow_symlinks=False) and (entry.name.endswith('.meta.json') or entry.name == '_index.json'): - if entry.stat().st_mtime > index_mtime: - return True - except OSError: - pass - return False - needs_rebuild = check_newer(str(meta_root)) - elif not meta_cache: - needs_rebuild = True - - if needs_rebuild and meta_root.exists(): - meta_str = str(meta_root) - meta_len = len(meta_str) + 1 - meta_files: list[tuple[str, str]] = [] - index_files: list[str] = [] - - def collect_meta_files(dir_path: str) -> None: - try: - with os.scandir(dir_path) as it: - for entry in it: - if entry.is_dir(follow_symlinks=False): - collect_meta_files(entry.path) - elif entry.is_file(follow_symlinks=False): - if entry.name == '_index.json': - index_files.append(entry.path) - elif entry.name.endswith('.meta.json'): - rel = entry.path[meta_len:] - key = rel[:-10].replace(os.sep, '/') - meta_files.append((key, entry.path)) - except OSError: - pass - - collect_meta_files(meta_str) - - meta_cache = {} - - for idx_path in index_files: - try: - with open(idx_path, 'r', encoding='utf-8') as f: - idx_data = json.load(f) - rel_dir = idx_path[meta_len:] - rel_dir = rel_dir.replace(os.sep, '/') - if rel_dir.endswith('/_index.json'): - dir_prefix = rel_dir[:-len('/_index.json')] - else: - dir_prefix = '' - for entry_name, entry_data in idx_data.items(): - if dir_prefix: - key = f"{dir_prefix}/{entry_name}" - else: - key = entry_name - meta = entry_data.get("metadata", {}) - etag = meta.get("__etag__") - if etag: - meta_cache[key] = etag - except (OSError, json.JSONDecodeError): - pass - - def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]: - key, path = item - try: - with open(path, 'rb') as f: - content = f.read() - etag_marker = b'"__etag__"' - idx = content.find(etag_marker) - if idx != -1: - start = content.find(b'"', idx + len(etag_marker) + 1) - if start != -1: - end = content.find(b'"', start + 1) - if end != -1: - return key, content[start+1:end].decode('utf-8') - return key, None - except (OSError, UnicodeDecodeError): - return key, None - - legacy_meta_files = [(k, p) for k, p in meta_files if k not in meta_cache] - if legacy_meta_files: - max_workers = min((os.cpu_count() or 4) * 2, len(legacy_meta_files), 16) - with ThreadPoolExecutor(max_workers=max_workers) as executor: - for key, etag in executor.map(read_meta_file, legacy_meta_files): - if etag: - meta_cache[key] = etag - - if meta_cache: - try: - etag_index_path.parent.mkdir(parents=True, exist_ok=True) - with open(etag_index_path, 'w', encoding='utf-8') as f: - json.dump(meta_cache, f) - self._etag_index_mem[bucket_id] = (dict(meta_cache), etag_index_path.stat().st_mtime) - except OSError: - pass - - def scan_dir(dir_path: str) -> None: - try: - with os.scandir(dir_path) as it: - for entry in it: - if entry.is_dir(follow_symlinks=False): - rel_start = entry.path[bucket_len:].split(os.sep)[0] if len(entry.path) > bucket_len else entry.name - if rel_start in self.INTERNAL_FOLDERS: - continue - scan_dir(entry.path) - elif entry.is_file(follow_symlinks=False): - rel = entry.path[bucket_len:] - first_part = rel.split(os.sep)[0] if os.sep in rel else rel - if first_part in self.INTERNAL_FOLDERS: - continue - - key = rel.replace(os.sep, '/') - try: - stat = entry.stat() - - etag = meta_cache.get(key) - - objects[key] = ObjectMeta( - key=key, - size=stat.st_size, - last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc), - etag=etag, - metadata=None, - ) - except OSError: - pass - except OSError: - pass - - scan_dir(bucket_str) - return objects - - def _get_object_cache(self, bucket_id: str, bucket_path: Path) -> Dict[str, ObjectMeta]: - """Get cached object metadata for a bucket, refreshing if stale. - - Uses LRU eviction to prevent unbounded cache growth. - Thread-safe with per-bucket locks to reduce contention. - Checks stats.json for cross-process cache invalidation. - """ - now = time.time() - current_stats_mtime = self._get_cache_marker_mtime(bucket_id) - - with self._obj_cache_lock: - cached = self._object_cache.get(bucket_id) - if cached: - objects, timestamp, cached_stats_mtime = cached - if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime: - self._object_cache.move_to_end(bucket_id) - return objects - - bucket_lock = self._get_bucket_lock(bucket_id) - with bucket_lock: - now = time.time() - current_stats_mtime = self._get_cache_marker_mtime(bucket_id) - with self._obj_cache_lock: - cached = self._object_cache.get(bucket_id) - if cached: - objects, timestamp, cached_stats_mtime = cached - if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime: - self._object_cache.move_to_end(bucket_id) - return objects - - objects = self._build_object_cache(bucket_path) - new_stats_mtime = self._get_cache_marker_mtime(bucket_id) - - with self._obj_cache_lock: - while len(self._object_cache) >= self._object_cache_max_size: - self._object_cache.popitem(last=False) - - self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime) - self._object_cache.move_to_end(bucket_id) - self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 - self._sorted_key_cache.pop(bucket_id, None) - - return objects - - def _invalidate_object_cache(self, bucket_id: str) -> None: - with self._obj_cache_lock: - self._object_cache.pop(bucket_id, None) - self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 - - self._etag_index_dirty.discard(bucket_id) - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" - try: - etag_index_path.unlink(missing_ok=True) - except OSError: - pass - - def _get_cache_marker_mtime(self, bucket_id: str) -> float: - return float(self._stats_serial.get(bucket_id, 0)) - - def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None: - with self._obj_cache_lock: - cached = self._object_cache.get(bucket_id) - if cached: - objects, timestamp, stats_mtime = cached - if meta is None: - objects.pop(key, None) - else: - objects[key] = meta - self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 - self._sorted_key_cache.pop(bucket_id, None) - - self._etag_index_dirty.add(bucket_id) - self._schedule_etag_index_flush() - - def _get_etag_index(self, bucket_id: str) -> Dict[str, str]: - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" - try: - current_mtime = etag_index_path.stat().st_mtime - except OSError: - return {} - cached = self._etag_index_mem.get(bucket_id) - if cached: - cache_dict, cached_mtime = cached - if current_mtime == cached_mtime: - return cache_dict - try: - with open(etag_index_path, 'r', encoding='utf-8') as f: - data = json.load(f) - self._etag_index_mem[bucket_id] = (data, current_mtime) - return data - except (OSError, json.JSONDecodeError): - return {} - - def _schedule_etag_index_flush(self) -> None: - if self._etag_index_flush_timer is None or not self._etag_index_flush_timer.is_alive(): - self._etag_index_flush_timer = threading.Timer(5.0, self._flush_etag_indexes) - self._etag_index_flush_timer.daemon = True - self._etag_index_flush_timer.start() - - def _flush_etag_indexes(self) -> None: - dirty = set(self._etag_index_dirty) - self._etag_index_dirty.clear() - for bucket_id in dirty: - with self._obj_cache_lock: - cached = self._object_cache.get(bucket_id) - if not cached: - continue - objects = cached[0] - index = {k: v.etag for k, v in objects.items() if v.etag} - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" - try: - self._atomic_write_json(etag_index_path, index, sync=False) - self._etag_index_mem[bucket_id] = (index, etag_index_path.stat().st_mtime) - except OSError: - logger.warning("Failed to flush etag index for bucket %s", bucket_id) - - def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None: - """Pre-warm the object cache for specified buckets or all buckets. - - This is called on startup to ensure the first request is fast. - """ - if bucket_names is None: - bucket_names = [b.name for b in self.list_buckets()] - - for bucket_name in bucket_names: - try: - bucket_path = self._bucket_path(bucket_name) - if bucket_path.exists(): - self._get_object_cache(bucket_path.name, bucket_path) - except Exception: - pass - - def warm_cache_async(self, bucket_names: Optional[List[str]] = None) -> threading.Thread: - """Start cache warming in a background thread. - - Returns the thread object so caller can optionally wait for it. - """ - thread = threading.Thread( - target=self.warm_cache, - args=(bucket_names,), - daemon=True, - name="cache-warmer", - ) - thread.start() - return thread - - def _ensure_system_roots(self) -> None: - for path in ( - self._system_root_path(), - self._system_buckets_root(), - self._multipart_root(), - self._system_root_path() / self.SYSTEM_TMP_DIR, - ): - path.mkdir(parents=True, exist_ok=True) - - @staticmethod - def _atomic_write_json(path: Path, data: Any, *, sync: bool = True) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - tmp_path = path.with_suffix(".tmp") - try: - with tmp_path.open("w", encoding="utf-8") as f: - json.dump(data, f) - if sync: - f.flush() - os.fsync(f.fileno()) - tmp_path.replace(path) - except BaseException: - try: - tmp_path.unlink(missing_ok=True) - except OSError: - pass - raise - - def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path: - return self._multipart_bucket_root(bucket_name) / upload_id - - def _version_dir(self, bucket_name: str, key: Path) -> Path: - return self._bucket_versions_root(bucket_name) / key - - def _bucket_config_path(self, bucket_name: str) -> Path: - return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE - - def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]: - now = time.time() - config_path = self._bucket_config_path(bucket_name) - cached = self._bucket_config_cache.get(bucket_name) - if cached: - config, cached_time, cached_mtime = cached - if now - cached_time < self._bucket_config_cache_ttl: - return config.copy() - - if not config_path.exists(): - self._bucket_config_cache[bucket_name] = ({}, now, 0.0) - return {} - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - config = data if isinstance(data, dict) else {} - mtime = config_path.stat().st_mtime - self._bucket_config_cache[bucket_name] = (config, now, mtime) - return config.copy() - except (OSError, json.JSONDecodeError): - self._bucket_config_cache[bucket_name] = ({}, now, 0.0) - return {} - - def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None: - config_path = self._bucket_config_path(bucket_name) - config_path.parent.mkdir(parents=True, exist_ok=True) - self._atomic_write_json(config_path, payload) - try: - mtime = config_path.stat().st_mtime - except OSError: - mtime = 0.0 - self._bucket_config_cache[bucket_name] = (payload.copy(), time.time(), mtime) - - def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None: - config = self._read_bucket_config(bucket_name) - if value is None: - config.pop(key, None) - else: - config[key] = value - self._write_bucket_config(bucket_name, config) - - def _is_versioning_enabled(self, bucket_path: Path) -> bool: - config = self._read_bucket_config(bucket_path.name) - return bool(config.get("versioning_enabled")) - - def _load_multipart_manifest(self, bucket_name: str, upload_id: str) -> tuple[dict[str, Any], Path]: - upload_root = self._multipart_dir(bucket_name, upload_id) - if not upload_root.exists(): - upload_root = self._legacy_multipart_dir(bucket_name, upload_id) - manifest_path = upload_root / self.MULTIPART_MANIFEST - if not manifest_path.exists(): - raise StorageError("Multipart upload not found") - try: - manifest = json.loads(manifest_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError) as exc: - raise StorageError("Multipart manifest unreadable") from exc - return manifest, upload_root - - def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None: - manifest_path = upload_root / self.MULTIPART_MANIFEST - self._atomic_write_json(manifest_path, manifest) - - def _metadata_file(self, bucket_name: str, key: Path) -> Path: - meta_root = self._bucket_meta_root(bucket_name) - meta_rel = Path(key.as_posix() + ".meta.json") - return meta_root / meta_rel - - def _index_file_for_key(self, bucket_name: str, key: Path) -> tuple[Path, str]: - meta_root = self._bucket_meta_root(bucket_name) - parent = key.parent - entry_name = key.name - if parent == Path("."): - return meta_root / "_index.json", entry_name - return meta_root / parent / "_index.json", entry_name - - def _get_meta_index_lock(self, index_path: str) -> threading.Lock: - with self._registry_lock: - if index_path not in self._meta_index_locks: - self._meta_index_locks[index_path] = threading.Lock() - return self._meta_index_locks[index_path] - - def _read_index_entry(self, bucket_name: str, key: Path) -> Optional[Dict[str, Any]]: - cache_key = (bucket_name, str(key)) - with self._meta_cache_lock: - hit = self._meta_read_cache.get(cache_key) - if hit is not None: - self._meta_read_cache.move_to_end(cache_key) - cached = hit[0] - return dict(cached) if cached is not None else None - - index_path, entry_name = self._index_file_for_key(bucket_name, key) - if _HAS_RUST: - result = _rc.read_index_entry(str(index_path), entry_name) - else: - if not index_path.exists(): - result = None - else: - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - result = index_data.get(entry_name) - except (OSError, json.JSONDecodeError): - result = None - - with self._meta_cache_lock: - while len(self._meta_read_cache) >= self._meta_read_cache_max: - self._meta_read_cache.popitem(last=False) - self._meta_read_cache[cache_key] = (dict(result) if result is not None else None,) - - return result - - def _invalidate_meta_read_cache(self, bucket_name: str, key: Path) -> None: - cache_key = (bucket_name, str(key)) - with self._meta_cache_lock: - self._meta_read_cache.pop(cache_key, None) - - def _write_index_entry(self, bucket_name: str, key: Path, entry: Dict[str, Any]) -> None: - index_path, entry_name = self._index_file_for_key(bucket_name, key) - lock = self._get_meta_index_lock(str(index_path)) - with lock: - if _HAS_RUST: - _rc.write_index_entry(str(index_path), entry_name, json.dumps(entry)) - else: - index_path.parent.mkdir(parents=True, exist_ok=True) - index_data: Dict[str, Any] = {} - if index_path.exists(): - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - pass - index_data[entry_name] = entry - self._atomic_write_json(index_path, index_data) - self._invalidate_meta_read_cache(bucket_name, key) - - def _delete_index_entry(self, bucket_name: str, key: Path) -> None: - index_path, entry_name = self._index_file_for_key(bucket_name, key) - if not index_path.exists(): - self._invalidate_meta_read_cache(bucket_name, key) - return - lock = self._get_meta_index_lock(str(index_path)) - with lock: - if _HAS_RUST: - _rc.delete_index_entry(str(index_path), entry_name) - else: - try: - index_data = json.loads(index_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - self._invalidate_meta_read_cache(bucket_name, key) - return - if entry_name in index_data: - del index_data[entry_name] - if index_data: - self._atomic_write_json(index_path, index_data) - else: - try: - index_path.unlink() - except OSError: - pass - self._invalidate_meta_read_cache(bucket_name, key) - - def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]: - if not metadata: - return None - clean = {str(k).strip(): str(v) for k, v in metadata.items() if str(k).strip()} - return clean or None - - def _write_metadata(self, bucket_name: str, key: Path, metadata: Dict[str, str]) -> None: - clean = self._normalize_metadata(metadata) - if not clean: - self._delete_metadata(bucket_name, key) - return - self._write_index_entry(bucket_name, key, {"metadata": clean}) - old_meta = self._metadata_file(bucket_name, key) - try: - if old_meta.exists(): - old_meta.unlink() - except OSError: - pass - - def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None: - bucket_path = self._bucket_path(bucket_name) - source = bucket_path / key - if not source.exists(): - return - version_dir = self._version_dir(bucket_name, key) - version_dir.mkdir(parents=True, exist_ok=True) - now = _utcnow() - version_id = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{uuid.uuid4().hex[:8]}" - data_path = version_dir / f"{version_id}.bin" - shutil.copy2(source, data_path) - metadata = self._read_metadata(bucket_name, key) - record = { - "version_id": version_id, - "key": key.as_posix(), - "size": source.stat().st_size, - "archived_at": now.isoformat().replace("+00:00", "Z"), - "etag": self._compute_etag(source), - "metadata": metadata or {}, - "reason": reason, - } - manifest_path = version_dir / f"{version_id}.json" - self._atomic_write_json(manifest_path, record) - - def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]: - entry = self._read_index_entry(bucket_name, key) - if entry is not None: - data = entry.get("metadata") - return data if isinstance(data, dict) else {} - for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)): - if not meta_file.exists(): - continue - try: - payload = json.loads(meta_file.read_text(encoding="utf-8")) - data = payload.get("metadata") - return data if isinstance(data, dict) else {} - except (OSError, json.JSONDecodeError): - return {} - return {} - - def _safe_unlink(self, path: Path) -> None: - attempts = 3 - last_error: PermissionError | None = None - for attempt in range(attempts): - try: - path.unlink() - return - except FileNotFoundError: - return - except PermissionError as exc: - last_error = exc - if os.name == "nt": - time.sleep(0.15 * (attempt + 1)) - except OSError as exc: - raise StorageError(f"Unable to delete object: {exc}") from exc - message = "Object file is currently in use. Close active previews or wait and try again." - raise StorageError(message) from last_error - - def _delete_metadata(self, bucket_name: str, key: Path) -> None: - self._delete_index_entry(bucket_name, key) - locations = ( - (self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)), - (self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)), - ) - for meta_file, meta_root in locations: - try: - if meta_file.exists(): - meta_file.unlink() - parent = meta_file.parent - while parent != meta_root and parent.exists() and not any(parent.iterdir()): - parent.rmdir() - parent = parent.parent - except OSError: - continue - - def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]: - bucket_name = bucket_path.name - - if _HAS_RUST: - return _rc.check_bucket_contents( - str(bucket_path), - [ - str(self._bucket_versions_root(bucket_name)), - str(self._legacy_versions_root(bucket_name)), - ], - [ - str(self._multipart_bucket_root(bucket_name)), - str(self._legacy_multipart_bucket_root(bucket_name)), - ], - ) - - has_objects = False - has_versions = False - has_multipart = False - - for path in bucket_path.rglob("*"): - if has_objects: - break - if not path.is_file(): - continue - rel = path.relative_to(bucket_path) - if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: - continue - has_objects = True - - for version_root in ( - self._bucket_versions_root(bucket_name), - self._legacy_versions_root(bucket_name), - ): - if has_versions: - break - if version_root.exists(): - for path in version_root.rglob("*"): - if path.is_file(): - has_versions = True - break - - for uploads_root in ( - self._multipart_bucket_root(bucket_name), - self._legacy_multipart_bucket_root(bucket_name), - ): - if has_multipart: - break - if uploads_root.exists(): - for path in uploads_root.rglob("*"): - if path.is_file(): - has_multipart = True - break - - return has_objects, has_versions, has_multipart - - def _has_visible_objects(self, bucket_path: Path) -> bool: - has_objects, _, _ = self._check_bucket_contents(bucket_path) - return has_objects - - def _has_archived_versions(self, bucket_path: Path) -> bool: - _, has_versions, _ = self._check_bucket_contents(bucket_path) - return has_versions - - def _has_active_multipart_uploads(self, bucket_path: Path) -> bool: - _, _, has_multipart = self._check_bucket_contents(bucket_path) - return has_multipart - - def _remove_tree(self, path: Path) -> None: - if not path.exists(): - return - def _handle_error(func, target_path, exc_info): - try: - os.chmod(target_path, stat.S_IRWXU) - func(target_path) - except Exception as exc: - raise StorageError(f"Unable to delete bucket contents: {exc}") from exc - - try: - shutil.rmtree(path, onerror=_handle_error) - except FileNotFoundError: - return - except PermissionError as exc: - raise StorageError("Bucket in use. Close open files and try again") from exc - - @staticmethod - def _sanitize_bucket_name(bucket_name: str) -> str: - if not bucket_name: - raise StorageError("Bucket name required") - - name = bucket_name.lower() - if len(name) < 3 or len(name) > 63: - raise StorageError("Bucket name must be between 3 and 63 characters") - - if name.startswith("-") or name.endswith("-"): - raise StorageError("Bucket name cannot start or end with a hyphen") - - if ".." in name: - raise StorageError("Bucket name cannot contain consecutive periods") - - if name.startswith("xn--"): - raise StorageError("Bucket name cannot start with 'xn--'") - - if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", name): - raise StorageError("Bucket name cannot be formatted like an IP address") - - if not re.fullmatch(r"[a-z0-9][a-z0-9.-]+[a-z0-9]", name): - raise StorageError("Bucket name can contain lowercase letters, numbers, dots, and hyphens") - - return name - - @staticmethod - def _sanitize_object_key(object_key: str, max_length_bytes: int = 1024) -> Path: - if _HAS_RUST: - error = _rc.validate_object_key(object_key, max_length_bytes, os.name == "nt") - if error: - raise StorageError(error) - normalized = unicodedata.normalize("NFC", object_key) - candidate = Path(normalized) - if candidate.is_absolute(): - raise StorageError("Absolute object keys are not allowed") - if getattr(candidate, "drive", ""): - raise StorageError("Object key cannot include a drive letter") - return Path(*candidate.parts) if candidate.parts else candidate - - if not object_key: - raise StorageError("Object key required") - if "\x00" in object_key: - raise StorageError("Object key contains null bytes") - object_key = unicodedata.normalize("NFC", object_key) - if len(object_key.encode("utf-8")) > max_length_bytes: - raise StorageError(f"Object key exceeds maximum length of {max_length_bytes} bytes") - if object_key.startswith(("/", "\\")): - raise StorageError("Object key cannot start with a slash") - - candidate = Path(object_key) - if ".." in candidate.parts: - raise StorageError("Object key contains parent directory references") - - if candidate.is_absolute(): - raise StorageError("Absolute object keys are not allowed") - if getattr(candidate, "drive", ""): - raise StorageError("Object key cannot include a drive letter") - parts = [] - for part in candidate.parts: - if part in ("", ".", ".."): - raise StorageError("Object key contains invalid segments") - if any(ord(ch) < 32 for ch in part): - raise StorageError("Object key contains control characters") - if os.name == "nt": - if any(ch in part for ch in "<>:\"/\\|?*"): - raise StorageError("Object key contains characters not supported on Windows filesystems") - if part.endswith((" ", ".")): - raise StorageError("Object key segments cannot end with spaces or periods on Windows") - trimmed = part.upper().rstrip(". ") - if trimmed in WINDOWS_RESERVED_NAMES: - raise StorageError(f"Invalid filename segment: {part}") - parts.append(part) - if parts: - top_level = parts[0] - if top_level in ObjectStorage.INTERNAL_FOLDERS or top_level == ObjectStorage.SYSTEM_ROOT: - raise StorageError("Object key uses a reserved prefix") - return Path(*parts) - - @staticmethod - def _compute_etag(path: Path) -> str: - if _HAS_RUST: - return _rc.md5_file(str(path)) - checksum = hashlib.md5() - with path.open("rb") as handle: - for chunk in iter(lambda: handle.read(8192), b""): - checksum.update(chunk) - return checksum.hexdigest() - - -class _HashingReader: - """Wraps a binary stream, updating the checksum as it is read.""" - - def __init__(self, stream: BinaryIO, checksum: Any) -> None: - self.stream = stream - self.checksum = checksum - - def read(self, size: int = -1) -> bytes: - data = self.stream.read(size) - if data: - self.checksum.update(data) - return data diff --git a/app/system_metrics.py b/app/system_metrics.py deleted file mode 100644 index 235710b..0000000 --- a/app/system_metrics.py +++ /dev/null @@ -1,215 +0,0 @@ -from __future__ import annotations - -import json -import logging -import threading -import time -from dataclasses import dataclass -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -import psutil - -if TYPE_CHECKING: - from .storage import ObjectStorage - -logger = logging.getLogger(__name__) - - -@dataclass -class SystemMetricsSnapshot: - timestamp: datetime - cpu_percent: float - memory_percent: float - disk_percent: float - storage_bytes: int - - def to_dict(self) -> Dict[str, Any]: - return { - "timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"), - "cpu_percent": round(self.cpu_percent, 2), - "memory_percent": round(self.memory_percent, 2), - "disk_percent": round(self.disk_percent, 2), - "storage_bytes": self.storage_bytes, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot": - timestamp_str = data["timestamp"] - if timestamp_str.endswith("Z"): - timestamp_str = timestamp_str[:-1] + "+00:00" - return cls( - timestamp=datetime.fromisoformat(timestamp_str), - cpu_percent=data.get("cpu_percent", 0.0), - memory_percent=data.get("memory_percent", 0.0), - disk_percent=data.get("disk_percent", 0.0), - storage_bytes=data.get("storage_bytes", 0), - ) - - -class SystemMetricsCollector: - def __init__( - self, - storage_root: Path, - interval_minutes: int = 5, - retention_hours: int = 24, - ): - self.storage_root = storage_root - self.interval_seconds = interval_minutes * 60 - self.retention_hours = retention_hours - self._lock = threading.Lock() - self._shutdown = threading.Event() - self._snapshots: List[SystemMetricsSnapshot] = [] - self._storage_ref: Optional["ObjectStorage"] = None - - self._load_history() - - self._snapshot_thread = threading.Thread( - target=self._snapshot_loop, - name="system-metrics-snapshot", - daemon=True, - ) - self._snapshot_thread.start() - - def set_storage(self, storage: "ObjectStorage") -> None: - with self._lock: - self._storage_ref = storage - - def _config_path(self) -> Path: - return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json" - - def _load_history(self) -> None: - config_path = self._config_path() - if not config_path.exists(): - return - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - history_data = data.get("history", []) - self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data] - self._prune_old_snapshots() - except (json.JSONDecodeError, OSError, KeyError) as e: - logger.warning(f"Failed to load system metrics history: {e}") - - def _save_history(self) -> None: - config_path = self._config_path() - config_path.parent.mkdir(parents=True, exist_ok=True) - try: - data = {"history": [s.to_dict() for s in self._snapshots]} - config_path.write_text(json.dumps(data, indent=2), encoding="utf-8") - except OSError as e: - logger.warning(f"Failed to save system metrics history: {e}") - - def _prune_old_snapshots(self) -> None: - if not self._snapshots: - return - cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600) - self._snapshots = [ - s for s in self._snapshots if s.timestamp.timestamp() > cutoff - ] - - def _snapshot_loop(self) -> None: - while not self._shutdown.is_set(): - self._shutdown.wait(timeout=self.interval_seconds) - if not self._shutdown.is_set(): - self._take_snapshot() - - def _take_snapshot(self) -> None: - try: - cpu_percent = psutil.cpu_percent(interval=0.1) - memory = psutil.virtual_memory() - disk = psutil.disk_usage(str(self.storage_root)) - - storage_bytes = 0 - with self._lock: - storage = self._storage_ref - if storage: - try: - buckets = storage.list_buckets() - for bucket in buckets: - stats = storage.bucket_stats(bucket.name, cache_ttl=60) - storage_bytes += stats.get("total_bytes", stats.get("bytes", 0)) - except Exception as e: - logger.warning(f"Failed to collect bucket stats: {e}") - - snapshot = SystemMetricsSnapshot( - timestamp=datetime.now(timezone.utc), - cpu_percent=cpu_percent, - memory_percent=memory.percent, - disk_percent=disk.percent, - storage_bytes=storage_bytes, - ) - - with self._lock: - self._snapshots.append(snapshot) - self._prune_old_snapshots() - self._save_history() - - logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%") - except Exception as e: - logger.warning(f"Failed to take system metrics snapshot: {e}") - - def get_current(self) -> Dict[str, Any]: - cpu_percent = psutil.cpu_percent(interval=0.1) - memory = psutil.virtual_memory() - disk = psutil.disk_usage(str(self.storage_root)) - boot_time = psutil.boot_time() - uptime_seconds = time.time() - boot_time - uptime_days = int(uptime_seconds / 86400) - - total_buckets = 0 - total_objects = 0 - total_bytes_used = 0 - total_versions = 0 - - with self._lock: - storage = self._storage_ref - if storage: - try: - buckets = storage.list_buckets() - total_buckets = len(buckets) - for bucket in buckets: - stats = storage.bucket_stats(bucket.name, cache_ttl=60) - total_objects += stats.get("total_objects", stats.get("objects", 0)) - total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0)) - total_versions += stats.get("version_count", 0) - except Exception as e: - logger.warning(f"Failed to collect current bucket stats: {e}") - - return { - "cpu_percent": round(cpu_percent, 2), - "memory": { - "total": memory.total, - "available": memory.available, - "used": memory.used, - "percent": round(memory.percent, 2), - }, - "disk": { - "total": disk.total, - "free": disk.free, - "used": disk.used, - "percent": round(disk.percent, 2), - }, - "app": { - "buckets": total_buckets, - "objects": total_objects, - "versions": total_versions, - "storage_bytes": total_bytes_used, - "uptime_days": uptime_days, - }, - } - - def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]: - with self._lock: - snapshots = list(self._snapshots) - - if hours: - cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600) - snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff] - - return [s.to_dict() for s in snapshots] - - def shutdown(self) -> None: - self._shutdown.set() - self._take_snapshot() - self._snapshot_thread.join(timeout=5.0) diff --git a/app/ui.py b/app/ui.py deleted file mode 100644 index 29ffd05..0000000 --- a/app/ui.py +++ /dev/null @@ -1,4309 +0,0 @@ -from __future__ import annotations - -import io -import json -import uuid -import psutil -import shutil -from datetime import datetime, timezone as dt_timezone -from pathlib import Path -from typing import Any -from urllib.parse import quote, urlparse -from zoneinfo import ZoneInfo - -import boto3 -import requests -from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError -from flask import ( - Blueprint, - Response, - current_app, - flash, - jsonify, - redirect, - render_template, - request, - send_file, - session, - url_for, -) -from flask_wtf.csrf import generate_csrf - -from .acl import AclService, create_canned_acl, CANNED_ACLS -from .bucket_policies import BucketPolicyStore -from .connections import ConnectionStore, RemoteConnection -from .extensions import limiter, csrf -from .iam import IamError -from .kms import KMSManager -from .replication import ReplicationManager, ReplicationRule -from .s3_client import ( - get_session_s3_client, - get_upload_registry, - handle_client_error, - handle_connection_error, - build_url_templates, - translate_list_objects, - get_versioning_via_s3, - stream_objects_ndjson, - format_datetime_display as _s3_format_display, - format_datetime_iso as _s3_format_iso, -) -from .secret_store import EphemeralSecretStore -from .site_registry import SiteRegistry, SiteInfo, PeerSite -from .storage import ObjectStorage, StorageError -from .website_domains import normalize_domain, is_valid_domain - -ui_bp = Blueprint("ui", __name__, template_folder="../templates", url_prefix="/ui") - - -def _convert_to_display_tz(dt: datetime, display_tz: str | None = None) -> datetime: - """Convert a datetime to the configured display timezone. - - Args: - dt: The datetime to convert - display_tz: Optional timezone string. If not provided, reads from current_app.config. - """ - if display_tz is None: - display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC") - if display_tz and display_tz != "UTC": - try: - tz = ZoneInfo(display_tz) - if dt.tzinfo is None: - dt = dt.replace(tzinfo=dt_timezone.utc) - dt = dt.astimezone(tz) - except (KeyError, ValueError): - pass - return dt - - -def _format_datetime_display(dt: datetime, display_tz: str | None = None) -> str: - """Format a datetime for display using the configured timezone. - - Args: - dt: The datetime to format - display_tz: Optional timezone string. If not provided, reads from current_app.config. - """ - dt = _convert_to_display_tz(dt, display_tz) - tz_abbr = dt.strftime("%Z") or "UTC" - return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})" - - -def _format_datetime_iso(dt: datetime, display_tz: str | None = None) -> str: - """Format a datetime as ISO format using the configured timezone. - - Args: - dt: The datetime to format - display_tz: Optional timezone string. If not provided, reads from current_app.config. - """ - dt = _convert_to_display_tz(dt, display_tz) - return dt.isoformat() - - - -def _storage() -> ObjectStorage: - return current_app.extensions["object_storage"] - - -def _replication_manager() -> ReplicationManager: - return current_app.extensions["replication"] - - -def _iam(): - return current_app.extensions["iam"] - - -def _kms() -> KMSManager | None: - return current_app.extensions.get("kms") - - -def _bucket_policies() -> BucketPolicyStore: - store: BucketPolicyStore = current_app.extensions["bucket_policies"] - store.maybe_reload() - return store - - -def _build_policy_context() -> dict[str, Any]: - ctx: dict[str, Any] = {} - if request.headers.get("Referer"): - ctx["aws:Referer"] = request.headers.get("Referer") - if request.access_route: - ctx["aws:SourceIp"] = request.access_route[0] - elif request.remote_addr: - ctx["aws:SourceIp"] = request.remote_addr - ctx["aws:SecureTransport"] = str(request.is_secure).lower() - if request.headers.get("User-Agent"): - ctx["aws:UserAgent"] = request.headers.get("User-Agent") - return ctx - - -def _connections() -> ConnectionStore: - return current_app.extensions["connections"] - - -def _replication() -> ReplicationManager: - return current_app.extensions["replication"] - - -def _secret_store() -> EphemeralSecretStore: - store: EphemeralSecretStore = current_app.extensions["secret_store"] - store.purge_expired() - return store - - -def _acl() -> AclService: - return current_app.extensions["acl"] - - -def _operation_metrics(): - return current_app.extensions.get("operation_metrics") - - -def _site_registry() -> SiteRegistry: - return current_app.extensions["site_registry"] - - -def _format_bytes(num: int) -> str: - step = 1024 - units = ["B", "KB", "MB", "GB", "TB", "PB"] - value = float(num) - for unit in units: - if value < step or unit == units[-1]: - if unit == "B": - return f"{int(value)} B" - return f"{value:.1f} {unit}" - value /= step - return f"{value:.1f} PB" - - -def _friendly_error_message(exc: Exception) -> str: - message = str(exc) or "An unexpected error occurred" - if isinstance(exc, IamError): - return f"Access issue: {message}" - if isinstance(exc, StorageError): - return f"Storage issue: {message}" - return message - - -def _wants_json() -> bool: - return request.accept_mimetypes.best_match( - ["application/json", "text/html"] - ) == "application/json" - - -def _policy_allows_public_read(policy: dict[str, Any]) -> bool: - statements = policy.get("Statement", []) - if isinstance(statements, dict): - statements = [statements] - list_allowed = False - get_allowed = False - for statement in statements: - if not isinstance(statement, dict): - continue - if statement.get("Effect") != "Allow": - continue - if statement.get("Condition"): - continue - principal = statement.get("Principal") - principal_all = principal == "*" or ( - isinstance(principal, dict) - and any(value == "*" or value == ["*"] for value in principal.values()) - ) - if not principal_all: - continue - actions = statement.get("Action", []) - if isinstance(actions, str): - actions = [actions] - normalized = {action.lower() for action in actions} - if not list_allowed: - list_allowed = any(action in {"*", "s3:*", "s3:listbucket"} for action in normalized) - if not get_allowed: - get_allowed = any(action in {"*", "s3:*", "s3:getobject"} for action in normalized) - if list_allowed and get_allowed: - return True - return False - - -def _bucket_access_descriptor(policy: dict[str, Any] | None) -> tuple[str, str]: - if not policy: - return ("IAM only", "text-bg-secondary") - if _policy_allows_public_read(policy): - return ("Public read", "text-bg-warning") - return ("Custom policy", "text-bg-info") - - -def _current_principal(): - token = session.get("cred_token") - creds = _secret_store().peek(token) if token else None - if not creds: - return None - try: - return _iam().authenticate(creds["access_key"], creds["secret_key"]) - except IamError: - session.pop("cred_token", None) - if token: - _secret_store().pop(token) - return None - - -def _authorize_ui(principal, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None: - iam_allowed = True - iam_error: IamError | None = None - try: - _iam().authorize(principal, bucket_name, action) - except IamError as exc: - iam_allowed = False - iam_error = exc - decision = None - enforce_bucket_policies = current_app.config.get("UI_ENFORCE_BUCKET_POLICIES", True) - if bucket_name and enforce_bucket_policies: - access_key = principal.access_key if principal else None - policy_context = _build_policy_context() - decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action, policy_context) - if decision == "deny": - raise IamError("Access denied by bucket policy") - if not iam_allowed and decision != "allow": - raise iam_error or IamError("Access denied") - - -def _api_headers() -> dict[str, str]: - token = session.get("cred_token") - creds = _secret_store().peek(token) or {} - return { - "X-Access-Key": creds.get("access_key", ""), - "X-Secret-Key": creds.get("secret_key", ""), - } - - -@ui_bp.app_context_processor -def inject_nav_state() -> dict[str, Any]: - principal = _current_principal() - can_manage = False - if principal: - try: - _iam().authorize(principal, None, "iam:list_users") - can_manage = True - except IamError: - can_manage = False - return { - "principal": principal, - "can_manage_iam": can_manage, - "can_view_metrics": can_manage, - "website_hosting_nav": can_manage and current_app.config.get("WEBSITE_HOSTING_ENABLED", False), - "csrf_token": generate_csrf, - } - - -@ui_bp.before_request -def ensure_authenticated(): - exempt = {"ui.login"} - if request.endpoint in exempt or request.endpoint is None: - return None - if _current_principal() is None: - return redirect(url_for("ui.login")) - return None - - -@ui_bp.route("/login", methods=["GET", "POST"]) -def login(): - if request.method == "POST": - access_key = request.form.get("access_key", "").strip() - secret_key = request.form.get("secret_key", "").strip() - try: - principal = _iam().authenticate(access_key, secret_key) - except IamError as exc: - flash(_friendly_error_message(exc), "danger") - return render_template("login.html") - creds = {"access_key": access_key, "secret_key": secret_key} - ttl = int(current_app.permanent_session_lifetime.total_seconds()) - token = _secret_store().remember(creds, ttl=ttl) - session["cred_token"] = token - session.permanent = True - flash(f"Welcome back, {principal.display_name}", "success") - return redirect(url_for("ui.buckets_overview")) - return render_template("login.html") - - -@ui_bp.post("/logout") -def logout(): - token = session.pop("cred_token", None) - if token: - _secret_store().pop(token) - flash("Signed out", "info") - return redirect(url_for("ui.login")) - - -@ui_bp.get("/docs") -def docs_page(): - principal = _current_principal() - api_base = current_app.config.get("API_BASE_URL") or "http://127.0.0.1:5000" - api_base = api_base.rstrip("/") - parsed = urlparse(api_base) - api_host = parsed.netloc or parsed.path or api_base - return render_template( - "docs.html", - principal=principal, - api_base=api_base, - api_host=api_host, - ) - - -@ui_bp.get("/") -def buckets_overview(): - principal = _current_principal() - try: - client = get_session_s3_client() - resp = client.list_buckets() - bucket_names = [b["Name"] for b in resp.get("Buckets", [])] - bucket_creation = {b["Name"]: b.get("CreationDate") for b in resp.get("Buckets", [])} - except PermissionError: - return redirect(url_for("ui.login")) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - flash(exc.response.get("Error", {}).get("Message", "S3 operation failed"), "danger") - else: - flash("S3 API server is unreachable. Ensure the API server is running.", "danger") - return render_template("buckets.html", buckets=[], principal=principal) - - allowed_names = set(_iam().buckets_for_principal(principal, bucket_names)) - visible_buckets = [] - policy_store = _bucket_policies() - for name in bucket_names: - if name not in allowed_names: - continue - policy = policy_store.get_policy(name) - cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60) - stats = _storage().bucket_stats(name, cache_ttl=cache_ttl) - access_label, access_badge = _bucket_access_descriptor(policy) - - class _BucketMeta: - def __init__(self, n, cd): - self.name = n - self.creation_date = cd - meta = _BucketMeta(name, bucket_creation.get(name)) - - visible_buckets.append({ - "meta": meta, - "summary": { - "objects": stats["total_objects"], - "total_bytes": stats["total_bytes"], - "human_size": _format_bytes(stats["total_bytes"]), - }, - "access_label": access_label, - "access_badge": access_badge, - "has_policy": bool(policy), - "detail_url": url_for("ui.bucket_detail", bucket_name=name), - }) - return render_template("buckets.html", buckets=visible_buckets, principal=principal) - -@ui_bp.get("/buckets") -def buckets_redirect(): - return redirect(url_for("ui.buckets_overview")) - -@ui_bp.post("/buckets") -def create_bucket(): - principal = _current_principal() - bucket_name = request.form.get("bucket_name", "").strip() - if not bucket_name: - if _wants_json(): - return jsonify({"error": "Bucket name is required"}), 400 - flash("Bucket name is required", "danger") - return redirect(url_for("ui.buckets_overview")) - try: - _authorize_ui(principal, bucket_name, "write") - client = get_session_s3_client() - client.create_bucket(Bucket=bucket_name) - if _wants_json(): - return jsonify({"success": True, "message": f"Bucket '{bucket_name}' created", "bucket_name": bucket_name}) - flash(f"Bucket '{bucket_name}' created", "success") - except PermissionError: - return redirect(url_for("ui.login")) - except IamError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - else: - msg = "S3 API server is unreachable" - if _wants_json(): - return jsonify({"error": msg}), 502 - flash(msg, "danger") - return redirect(url_for("ui.buckets_overview")) - - -@ui_bp.get("/buckets/") -def bucket_detail(bucket_name: str): - principal = _current_principal() - storage = _storage() - try: - _authorize_ui(principal, bucket_name, "list") - if not storage.bucket_exists(bucket_name): - raise StorageError("Bucket does not exist") - except (StorageError, IamError) as exc: - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.buckets_overview")) - bucket_policy = _bucket_policies().get_policy(bucket_name) - policy_text = json.dumps(bucket_policy, indent=2) if bucket_policy else "" - default_policy = json.dumps( - { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "AllowList", - "Effect": "Allow", - "Principal": "*", - "Action": ["s3:ListBucket"], - "Resource": [f"arn:aws:s3:::{bucket_name}"], - }, - { - "Sid": "AllowRead", - "Effect": "Allow", - "Principal": "*", - "Action": ["s3:GetObject"], - "Resource": [f"arn:aws:s3:::{bucket_name}/*"], - }, - ], - }, - indent=2, - ) - iam = _iam() - bucket_perms = iam.check_permissions( - principal, bucket_name, ["policy", "lifecycle", "cors", "write", "replication"], - ) if principal else {} - admin_perms = iam.check_permissions( - principal, None, ["iam:list_users"], - ) if principal else {} - - can_edit_policy = bucket_perms.get("policy", False) - can_manage_lifecycle = bucket_perms.get("lifecycle", False) - can_manage_cors = bucket_perms.get("cors", False) - can_manage_versioning = bucket_perms.get("write", False) - can_manage_replication = bucket_perms.get("replication", False) - is_replication_admin = admin_perms.get("iam:list_users", False) - - try: - versioning_enabled = storage.is_versioning_enabled(bucket_name) - except StorageError: - versioning_enabled = False - - replication_rule = _replication().get_rule(bucket_name) - connections = _connections().list() if (is_replication_admin or replication_rule) else [] - - encryption_config = storage.get_bucket_encryption(bucket_name) - kms_manager = _kms() - kms_keys = kms_manager.list_keys() if kms_manager else [] - kms_enabled = current_app.config.get("KMS_ENABLED", False) - encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False) - lifecycle_enabled = current_app.config.get("LIFECYCLE_ENABLED", False) - site_sync_enabled = current_app.config.get("SITE_SYNC_ENABLED", False) - website_hosting_enabled = current_app.config.get("WEBSITE_HOSTING_ENABLED", False) - can_manage_encryption = can_manage_versioning - - bucket_quota = storage.get_bucket_quota(bucket_name) - bucket_stats = storage.bucket_stats(bucket_name) - can_manage_quota = is_replication_admin - - website_config = None - website_domains = [] - if website_hosting_enabled: - try: - website_config = storage.get_bucket_website(bucket_name) - except StorageError: - website_config = None - domain_store = current_app.extensions.get("website_domains") - if domain_store: - website_domains = domain_store.get_domains_for_bucket(bucket_name) - - objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name) - objects_stream_url = url_for("ui.stream_bucket_objects", bucket_name=bucket_name) - - lifecycle_url = url_for("ui.bucket_lifecycle", bucket_name=bucket_name) - cors_url = url_for("ui.bucket_cors", bucket_name=bucket_name) - acl_url = url_for("ui.bucket_acl", bucket_name=bucket_name) - folders_url = url_for("ui.create_folder", bucket_name=bucket_name) - buckets_for_copy_url = url_for("ui.list_buckets_for_copy", bucket_name=bucket_name) - - return render_template( - "bucket_detail.html", - bucket_name=bucket_name, - objects_api_url=objects_api_url, - objects_stream_url=objects_stream_url, - lifecycle_url=lifecycle_url, - cors_url=cors_url, - acl_url=acl_url, - folders_url=folders_url, - buckets_for_copy_url=buckets_for_copy_url, - principal=principal, - bucket_policy_text=policy_text, - bucket_policy=bucket_policy, - can_edit_policy=can_edit_policy, - can_manage_lifecycle=can_manage_lifecycle, - can_manage_cors=can_manage_cors, - can_manage_versioning=can_manage_versioning, - can_manage_replication=can_manage_replication, - can_manage_encryption=can_manage_encryption, - is_replication_admin=is_replication_admin, - default_policy=default_policy, - versioning_enabled=versioning_enabled, - replication_rule=replication_rule, - connections=connections, - encryption_config=encryption_config, - kms_keys=kms_keys, - kms_enabled=kms_enabled, - encryption_enabled=encryption_enabled, - lifecycle_enabled=lifecycle_enabled, - bucket_quota=bucket_quota, - bucket_stats=bucket_stats, - can_manage_quota=can_manage_quota, - site_sync_enabled=site_sync_enabled, - website_hosting_enabled=website_hosting_enabled, - website_config=website_config, - website_domains=website_domains, - can_manage_website=can_edit_policy, - ) - - -@ui_bp.get("/buckets//objects") -def list_bucket_objects(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "list") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - max_keys = max(1, min(int(request.args.get("max_keys", 1000)), 100000)) - except ValueError: - return jsonify({"error": "max_keys must be an integer"}), 400 - continuation_token = request.args.get("continuation_token") or None - prefix = request.args.get("prefix") or None - - try: - client = get_session_s3_client() - kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": max_keys} - if continuation_token: - kwargs["ContinuationToken"] = continuation_token - if prefix: - kwargs["Prefix"] = prefix - boto_resp = client.list_objects_v2(**kwargs) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - versioning_enabled = get_versioning_via_s3(client, bucket_name) - url_templates = build_url_templates(bucket_name) - display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC") - data = translate_list_objects(boto_resp, url_templates, display_tz, versioning_enabled) - response = jsonify(data) - response.headers["Cache-Control"] = "no-store" - return response - - -@ui_bp.get("/buckets//objects/stream") -def stream_bucket_objects(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "list") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - prefix = request.args.get("prefix") or None - delimiter = request.args.get("delimiter") or None - - storage = _storage() - try: - versioning_enabled = storage.is_versioning_enabled(bucket_name) - except StorageError: - versioning_enabled = False - url_templates = build_url_templates(bucket_name) - display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC") - - def generate(): - yield json.dumps({ - "type": "meta", - "versioning_enabled": versioning_enabled, - "url_templates": url_templates, - }) + "\n" - yield json.dumps({"type": "count", "total_count": 0}) + "\n" - - running_count = 0 - try: - if delimiter: - for item_type, item in storage.iter_objects_shallow( - bucket_name, prefix=prefix or "", delimiter=delimiter, - ): - if item_type == "folder": - yield json.dumps({"type": "folder", "prefix": item}) + "\n" - else: - last_mod = item.last_modified - yield json.dumps({ - "type": "object", - "key": item.key, - "size": item.size, - "last_modified": last_mod.isoformat(), - "last_modified_display": _format_datetime_display(last_mod, display_tz), - "last_modified_iso": _format_datetime_iso(last_mod, display_tz), - "etag": item.etag or "", - }) + "\n" - running_count += 1 - if running_count % 1000 == 0: - yield json.dumps({"type": "count", "total_count": running_count}) + "\n" - else: - continuation_token = None - while True: - result = storage.list_objects( - bucket_name, - max_keys=1000, - continuation_token=continuation_token, - prefix=prefix, - ) - for obj in result.objects: - last_mod = obj.last_modified - yield json.dumps({ - "type": "object", - "key": obj.key, - "size": obj.size, - "last_modified": last_mod.isoformat(), - "last_modified_display": _format_datetime_display(last_mod, display_tz), - "last_modified_iso": _format_datetime_iso(last_mod, display_tz), - "etag": obj.etag or "", - }) + "\n" - running_count += len(result.objects) - yield json.dumps({"type": "count", "total_count": running_count}) + "\n" - if not result.is_truncated: - break - continuation_token = result.next_continuation_token - except StorageError as exc: - yield json.dumps({"type": "error", "error": str(exc)}) + "\n" - return - yield json.dumps({"type": "count", "total_count": running_count}) + "\n" - yield json.dumps({"type": "done"}) + "\n" - - return Response( - generate(), - mimetype='application/x-ndjson', - headers={ - 'Cache-Control': 'no-cache', - 'X-Accel-Buffering': 'no', - 'X-Stream-Response': 'true', - } - ) - - -@ui_bp.get("/buckets//objects/search") -@limiter.limit("30 per minute") -def search_bucket_objects(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "list") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - query = request.args.get("q", "").strip() - if not query: - return jsonify({"results": [], "truncated": False}) - - try: - limit = max(1, min(int(request.args.get("limit", 500)), 1000)) - except (ValueError, TypeError): - limit = 500 - - prefix = request.args.get("prefix", "").strip() - - storage = _storage() - try: - return jsonify(storage.search_objects(bucket_name, query, prefix=prefix, limit=limit)) - except StorageError as exc: - return jsonify({"error": str(exc)}), 404 - - -@ui_bp.post("/buckets//upload") -@limiter.limit("30 per minute") -def upload_object(bucket_name: str): - principal = _current_principal() - file = request.files.get("object") - object_key = request.form.get("object_key") - metadata_raw = (request.form.get("metadata") or "").strip() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" - - def _response(success: bool, message: str, status: int = 200): - if wants_json: - payload = {"status": "ok" if success else "error", "message": message} - return jsonify(payload), status - flash(message, "success" if success else "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="objects")) - - if file and not object_key: - object_key = file.filename - if not object_key: - return _response(False, "Object key is required", 400) - if not file: - return _response(False, "Choose a file to upload", 400) - - metadata = None - if metadata_raw: - try: - parsed = json.loads(metadata_raw) - if not isinstance(parsed, dict): - raise ValueError - metadata = {str(k): str(v) for k, v in parsed.items()} - except ValueError: - return _response(False, "Metadata must be a JSON object", 400) - - try: - _authorize_ui(principal, bucket_name, "write") - client = get_session_s3_client() - put_kwargs: dict[str, Any] = { - "Bucket": bucket_name, - "Key": object_key, - "Body": file.stream, - } - if file.content_type: - put_kwargs["ContentType"] = file.content_type - if metadata: - put_kwargs["Metadata"] = metadata - client.put_object(**put_kwargs) - _replication().trigger_replication(bucket_name, object_key) - - message = f"Uploaded '{object_key}'" - if metadata: - message += " with metadata" - return _response(True, message) - except PermissionError as exc: - return _response(False, str(exc), 401) - except IamError as exc: - return _response(False, _friendly_error_message(exc), 400) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return _response(False, err["error"], status) - return _response(False, "S3 API server is unreachable", 502) - - -@ui_bp.post("/buckets//multipart/initiate") -def initiate_multipart_upload(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - payload = request.get_json(silent=True) or {} - object_key = str(payload.get("object_key", "")).strip() - if not object_key: - return jsonify({"error": "object_key is required"}), 400 - if "\x00" in object_key: - return jsonify({"error": "Object key cannot contain null bytes"}), 400 - max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024) - if len(object_key.encode("utf-8")) > max_key_len: - return jsonify({"error": f"Object key exceeds maximum length of {max_key_len} bytes"}), 400 - metadata_payload = payload.get("metadata") - metadata = None - if metadata_payload is not None: - if not isinstance(metadata_payload, dict): - return jsonify({"error": "metadata must be an object"}), 400 - metadata = {str(k): str(v) for k, v in metadata_payload.items()} - try: - client = get_session_s3_client() - create_kwargs: dict[str, Any] = {"Bucket": bucket_name, "Key": object_key} - if metadata: - create_kwargs["Metadata"] = metadata - resp = client.create_multipart_upload(**create_kwargs) - upload_id = resp["UploadId"] - get_upload_registry().register(upload_id, bucket_name, object_key) - return jsonify({"upload_id": upload_id}) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.put("/buckets//multipart//parts") -@csrf.exempt -def upload_multipart_part(bucket_name: str, upload_id: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - try: - part_number = int(request.args.get("partNumber", "0")) - except ValueError: - return jsonify({"error": "partNumber must be an integer"}), 400 - if part_number < 1 or part_number > 10000: - return jsonify({"error": "partNumber must be between 1 and 10000"}), 400 - object_key = get_upload_registry().get_key(upload_id, bucket_name) - if not object_key: - return jsonify({"error": "Unknown upload ID or upload expired"}), 404 - try: - data = request.get_data() - if not data: - return jsonify({"error": "Empty request body"}), 400 - client = get_session_s3_client() - resp = client.upload_part( - Bucket=bucket_name, - Key=object_key, - UploadId=upload_id, - PartNumber=part_number, - Body=data, - ) - etag = resp.get("ETag", "").strip('"') - return jsonify({"etag": etag, "part_number": part_number}) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.post("/buckets//multipart//complete") -def complete_multipart_upload(bucket_name: str, upload_id: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - payload = request.get_json(silent=True) or {} - parts_payload = payload.get("parts") - if not isinstance(parts_payload, list) or not parts_payload: - return jsonify({"error": "parts array required"}), 400 - normalized = [] - for part in parts_payload: - if not isinstance(part, dict): - return jsonify({"error": "Each part must be an object"}), 400 - raw_number = part.get("part_number") or part.get("PartNumber") - try: - number = int(raw_number) - except (TypeError, ValueError): - return jsonify({"error": "Each part must include part_number"}), 400 - etag = str(part.get("etag") or part.get("ETag") or "").strip() - normalized.append({"PartNumber": number, "ETag": etag}) - object_key = get_upload_registry().get_key(upload_id, bucket_name) - if not object_key: - return jsonify({"error": "Unknown upload ID or upload expired"}), 404 - try: - client = get_session_s3_client() - resp = client.complete_multipart_upload( - Bucket=bucket_name, - Key=object_key, - UploadId=upload_id, - MultipartUpload={"Parts": normalized}, - ) - get_upload_registry().remove(upload_id) - result_key = resp.get("Key", object_key) - _replication().trigger_replication(bucket_name, result_key) - return jsonify({ - "key": result_key, - "size": 0, - "etag": resp.get("ETag", "").strip('"'), - "last_modified": None, - }) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - code = exc.response.get("Error", {}).get("Code", "") - if code in ("NoSuchUpload",): - get_upload_registry().remove(upload_id) - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.delete("/buckets//multipart/") -def abort_multipart_upload(bucket_name: str, upload_id: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - object_key = get_upload_registry().get_key(upload_id, bucket_name) - if not object_key: - return jsonify({"error": "Unknown upload ID or upload expired"}), 404 - try: - client = get_session_s3_client() - client.abort_multipart_upload(Bucket=bucket_name, Key=object_key, UploadId=upload_id) - get_upload_registry().remove(upload_id) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - code = exc.response.get("Error", {}).get("Code", "") - if code in ("NoSuchUpload",): - get_upload_registry().remove(upload_id) - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - return jsonify({"status": "aborted"}) - - -@ui_bp.post("/buckets//delete") -@limiter.limit("20 per minute") -def delete_bucket(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "delete") - client = get_session_s3_client() - client.delete_bucket(Bucket=bucket_name) - try: - _bucket_policies().delete_policy(bucket_name) - except Exception: - pass - try: - _replication_manager().delete_rule(bucket_name) - except Exception: - pass - if _wants_json(): - return jsonify({"success": True, "message": f"Bucket '{bucket_name}' removed"}) - flash(f"Bucket '{bucket_name}' removed", "success") - except PermissionError: - return redirect(url_for("ui.login")) - except IamError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - else: - msg = "S3 API server is unreachable" - if _wants_json(): - return jsonify({"error": msg}), 502 - flash(msg, "danger") - return redirect(url_for("ui.buckets_overview")) - - -@ui_bp.post("/buckets//objects//delete") -@limiter.limit("60 per minute") -def delete_object(bucket_name: str, object_key: str): - principal = _current_principal() - purge_versions = request.form.get("purge_versions") == "1" - try: - _authorize_ui(principal, bucket_name, "delete", object_key=object_key) - if purge_versions: - _storage().purge_object(bucket_name, object_key) - message = f"Permanently deleted '{object_key}' and all versions" - else: - client = get_session_s3_client() - client.delete_object(Bucket=bucket_name, Key=object_key) - _replication_manager().trigger_replication(bucket_name, object_key, action="delete") - message = f"Deleted '{object_key}'" - if _wants_json(): - return jsonify({"success": True, "message": message}) - flash(message, "success") - except PermissionError: - return redirect(url_for("ui.login")) - except IamError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - except StorageError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - else: - err, status = handle_connection_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - - -@ui_bp.post("/buckets//objects/bulk-delete") -@limiter.limit("40 per minute") -def bulk_delete_objects(bucket_name: str): - principal = _current_principal() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" or request.is_json - payload = request.get_json(silent=True) or {} - keys_payload = payload.get("keys") - purge_versions = bool(payload.get("purge_versions")) - - def _respond(success: bool, message: str, *, deleted=None, errors=None, status_code: int = 200): - if wants_json: - body = { - "status": "ok" if success else "partial", - "message": message, - "deleted": deleted or [], - "errors": errors or [], - } - if not success and not errors: - body["status"] = "error" - return jsonify(body), status_code - flash(message, "success" if success and not errors else "warning") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - - if not isinstance(keys_payload, list): - return _respond(False, "keys must be provided as a JSON array", status_code=400) - - cleaned: list[str] = [] - for entry in keys_payload: - if isinstance(entry, str): - candidate = entry.strip() - if candidate: - cleaned.append(candidate) - if not cleaned: - return _respond(False, "Select at least one object to delete", status_code=400) - - MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500) - if len(cleaned) > MAX_KEYS: - return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400) - - unique_keys = list(dict.fromkeys(cleaned)) - - folder_prefixes = [k for k in unique_keys if k.endswith("/")] - if folder_prefixes: - try: - client = get_session_s3_client() - for prefix in folder_prefixes: - unique_keys.remove(prefix) - paginator = client.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): - for obj in page.get("Contents", []): - if obj["Key"] not in unique_keys: - unique_keys.append(obj["Key"]) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return _respond(False, err["error"], status_code=status) - return _respond(False, "S3 API server is unreachable", status_code=502) - - if not unique_keys: - return _respond(False, "No objects found under the selected folders", status_code=400) - - try: - _authorize_ui(principal, bucket_name, "delete") - except IamError as exc: - return _respond(False, _friendly_error_message(exc), status_code=403) - - authorized_keys = [] - denied_keys = [] - for key in unique_keys: - try: - _authorize_ui(principal, bucket_name, "delete", object_key=key) - authorized_keys.append(key) - except IamError: - denied_keys.append(key) - if not authorized_keys: - return _respond(False, "Access denied for all selected objects", status_code=403) - unique_keys = authorized_keys - - if purge_versions: - storage = _storage() - deleted: list[str] = [] - errors: list[dict[str, str]] = [] - for key in unique_keys: - try: - storage.purge_object(bucket_name, key) - deleted.append(key) - except StorageError as exc: - errors.append({"key": key, "error": str(exc)}) - else: - try: - client = get_session_s3_client() - deleted = [] - errors = [] - for i in range(0, len(unique_keys), 1000): - batch = unique_keys[i:i + 1000] - objects_to_delete = [{"Key": k} for k in batch] - resp = client.delete_objects( - Bucket=bucket_name, - Delete={"Objects": objects_to_delete, "Quiet": False}, - ) - deleted.extend(d["Key"] for d in resp.get("Deleted", [])) - errors.extend({"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", [])) - for key in deleted: - _replication_manager().trigger_replication(bucket_name, key, action="delete") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return _respond(False, err["error"], status_code=status) - return _respond(False, "S3 API server is unreachable", status_code=502) - - if not deleted and errors: - return _respond(False, "Unable to delete the selected objects", deleted=deleted, errors=errors, status_code=400) - - message = f"Deleted {len(deleted)} object{'s' if len(deleted) != 1 else ''}" - if purge_versions and deleted: - message += " (including archived versions)" - if errors: - message += f"; {len(errors)} failed" - return _respond(not errors, message, deleted=deleted, errors=errors) - - -@ui_bp.post("/buckets//objects/bulk-download") -@limiter.limit("10 per minute") -def bulk_download_objects(bucket_name: str): - import io - import zipfile - - principal = _current_principal() - payload = request.get_json(silent=True) or {} - keys_payload = payload.get("keys") - - if not isinstance(keys_payload, list): - return jsonify({"error": "keys must be provided as a JSON array"}), 400 - - cleaned: list[str] = [] - for entry in keys_payload: - if isinstance(entry, str): - candidate = entry.strip() - if candidate: - cleaned.append(candidate) - if not cleaned: - return jsonify({"error": "Select at least one object to download"}), 400 - - MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500) - if len(cleaned) > MAX_KEYS: - return jsonify({"error": f"A maximum of {MAX_KEYS} objects can be downloaded per request"}), 400 - - unique_keys = list(dict.fromkeys(cleaned)) - storage = _storage() - - try: - _authorize_ui(principal, bucket_name, "read") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - max_total_bytes = current_app.config.get("BULK_DOWNLOAD_MAX_BYTES", 1024 * 1024 * 1024) - total_size = 0 - for key in unique_keys: - try: - path = storage.get_object_path(bucket_name, key) - total_size += path.stat().st_size - except (StorageError, OSError): - continue - if total_size > max_total_bytes: - limit_mb = max_total_bytes // (1024 * 1024) - return jsonify({"error": f"Total download size exceeds {limit_mb} MB limit. Select fewer objects."}), 400 - - buffer = io.BytesIO() - with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: - for key in unique_keys: - try: - _authorize_ui(principal, bucket_name, "read", object_key=key) - - metadata = storage.get_object_metadata(bucket_name, key) - is_encrypted = "x-amz-server-side-encryption" in metadata - - if is_encrypted and hasattr(storage, 'get_object_data'): - data, _ = storage.get_object_data(bucket_name, key) - zf.writestr(key, data) - else: - path = storage.get_object_path(bucket_name, key) - zf.write(path, arcname=key) - except (StorageError, IamError): - continue - - buffer.seek(0) - return send_file( - buffer, - as_attachment=True, - download_name=f"{bucket_name}-download.zip", - mimetype="application/zip" - ) - - -@ui_bp.post("/buckets//objects//purge") -@limiter.limit("30 per minute") -def purge_object_versions(bucket_name: str, object_key: str): - principal = _current_principal() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" - try: - _authorize_ui(principal, bucket_name, "delete", object_key=object_key) - _storage().purge_object(bucket_name, object_key) - except IamError as exc: - if wants_json: - return jsonify({"error": str(exc)}), 403 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - except StorageError as exc: - if wants_json: - return jsonify({"error": str(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - message = f"Removed archived versions for '{object_key}'" - if wants_json: - return jsonify({"status": "ok", "message": message}) - flash(message, "success") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - - -@ui_bp.get("/buckets//objects//preview") -def object_preview(bucket_name: str, object_key: str) -> Response: - import mimetypes as _mimetypes - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "read", object_key=object_key) - except IamError as exc: - return Response(str(exc), status=403) - - download = request.args.get("download") == "1" - raw_filename = object_key.rsplit("/", 1)[-1] or object_key - safe_filename = raw_filename.replace('"', "'").replace("\\", "_") - safe_filename = "".join(c for c in safe_filename if c.isprintable() and c not in "\r\n") - if not safe_filename: - safe_filename = "download" - try: - safe_filename.encode("latin-1") - ascii_safe = True - except UnicodeEncodeError: - ascii_safe = False - - range_header = request.headers.get("Range") - - try: - client = get_session_s3_client() - get_kwargs: dict[str, Any] = {"Bucket": bucket_name, "Key": object_key} - if range_header: - get_kwargs["Range"] = range_header - resp = client.get_object(**get_kwargs) - except PermissionError as exc: - return Response(str(exc), status=401) - except ClientError as exc: - code = exc.response.get("Error", {}).get("Code", "") - status = 404 if code == "NoSuchKey" else 400 - return Response(exc.response.get("Error", {}).get("Message", "S3 operation failed"), status=status) - except (EndpointConnectionError, ConnectionClosedError): - return Response("S3 API server is unreachable", status=502) - - content_type = resp.get("ContentType") or _mimetypes.guess_type(object_key)[0] or "application/octet-stream" - content_length = resp.get("ContentLength", 0) - body_stream = resp["Body"] - is_partial = resp.get("ResponseMetadata", {}).get("HTTPStatusCode") == 206 - content_range = resp.get("ContentRange") - - _DANGEROUS_TYPES = { - "text/html", "text/xml", "application/xhtml+xml", - "application/xml", "image/svg+xml", - } - base_ct = content_type.split(";")[0].strip().lower() - if not download and base_ct in _DANGEROUS_TYPES: - content_type = "text/plain; charset=utf-8" - - def generate(): - try: - for chunk in body_stream.iter_chunks(chunk_size=65536): - yield chunk - finally: - body_stream.close() - - status_code = 206 if is_partial else 200 - headers = { - "Content-Type": content_type, - "X-Content-Type-Options": "nosniff", - "Accept-Ranges": "bytes", - } - if content_length: - headers["Content-Length"] = str(content_length) - if content_range: - headers["Content-Range"] = content_range - disposition = "attachment" if download else "inline" - if ascii_safe: - headers["Content-Disposition"] = f'{disposition}; filename="{safe_filename}"' - else: - from urllib.parse import quote - encoded = quote(safe_filename, safe="") - ascii_fallback = safe_filename.encode("ascii", "replace").decode("ascii").replace("?", "_") - headers["Content-Disposition"] = f'{disposition}; filename="{ascii_fallback}"; filename*=UTF-8\'\'{encoded}' - - return Response(generate(), status=status_code, headers=headers) - - -@ui_bp.post("/buckets//objects//presign") -def object_presign(bucket_name: str, object_key: str): - principal = _current_principal() - payload = request.get_json(silent=True) or {} - method = str(payload.get("method", "GET")).upper() - allowed_methods = {"GET", "PUT", "DELETE"} - if method not in allowed_methods: - return jsonify({"error": "Method must be GET, PUT, or DELETE"}), 400 - action = "read" if method == "GET" else ("delete" if method == "DELETE" else "write") - try: - _authorize_ui(principal, bucket_name, action, object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - try: - expires = int(payload.get("expires_in", 900)) - except (TypeError, ValueError): - return jsonify({"error": "expires_in must be an integer"}), 400 - min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1) - max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800) - expires = max(min_expiry, min(expires, max_expiry)) - - method_to_client_method = {"GET": "get_object", "PUT": "put_object", "DELETE": "delete_object"} - client_method = method_to_client_method[method] - - try: - client = get_session_s3_client() - url = client.generate_presigned_url( - ClientMethod=client_method, - Params={"Bucket": bucket_name, "Key": object_key}, - ExpiresIn=expires, - ) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - current_app.logger.info( - "Presigned URL generated", - extra={"bucket": bucket_name, "key": object_key, "method": method}, - ) - return jsonify({"url": url, "method": method, "expires_in": expires}) - - -@ui_bp.get("/buckets//objects//metadata") -def object_metadata(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "read", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - try: - client = get_session_s3_client() - resp = client.head_object(Bucket=bucket_name, Key=object_key) - metadata = resp.get("Metadata", {}) - if resp.get("ContentType"): - metadata["Content-Type"] = resp["ContentType"] - if resp.get("ContentLength") is not None: - metadata["Content-Length"] = str(resp["ContentLength"]) - if resp.get("ServerSideEncryption"): - metadata["x-amz-server-side-encryption"] = resp["ServerSideEncryption"] - return jsonify({"metadata": metadata}) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except ClientError as exc: - code = exc.response.get("Error", {}).get("Code", "") - if code in ("NoSuchKey", "404", "NotFound"): - return jsonify({"error": "Object not found"}), 404 - err, status = handle_client_error(exc) - return jsonify(err), status - except (EndpointConnectionError, ConnectionClosedError) as exc: - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.get("/buckets//objects//versions") -def object_versions(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "read", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - try: - client = get_session_s3_client() - resp = client.list_object_versions(Bucket=bucket_name, Prefix=object_key, MaxKeys=1000) - versions = [] - for v in resp.get("Versions", []): - if v.get("Key") != object_key: - continue - if v.get("IsLatest", False): - continue - versions.append({ - "version_id": v.get("VersionId", ""), - "last_modified": v["LastModified"].isoformat() if v.get("LastModified") else None, - "size": v.get("Size", 0), - "etag": v.get("ETag", "").strip('"'), - "is_latest": False, - }) - return jsonify({"versions": versions}) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.get("/buckets//archived") -def archived_objects(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "list") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - try: - entries = _storage().list_orphaned_objects(bucket_name) - except StorageError as exc: - return jsonify({"error": str(exc)}), 400 - payload: list[dict[str, Any]] = [] - for entry in entries: - latest = entry.get("latest") or {} - restore_url = None - if latest.get("version_id"): - restore_url = url_for( - "ui.restore_object_version", - bucket_name=bucket_name, - object_key=entry["key"], - version_id=latest["version_id"], - ) - purge_url = url_for("ui.purge_object_versions", bucket_name=bucket_name, object_key=entry["key"]) - payload.append( - { - "key": entry["key"], - "versions": entry.get("versions", 0), - "total_size": entry.get("total_size", 0), - "latest": entry.get("latest"), - "restore_url": restore_url, - "purge_url": purge_url, - } - ) - return jsonify({"objects": payload}) - - -@ui_bp.post("/buckets//objects//versions//restore") -def restore_object_version(bucket_name: str, object_key: str, version_id: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - try: - meta = _storage().restore_object_version(bucket_name, object_key, version_id) - except StorageError as exc: - return jsonify({"error": str(exc)}), 400 - message = f"Restored '{meta.key}'" if meta else "Object restored" - return jsonify({"status": "ok", "message": message}) - - -@ui_bp.post("/buckets//policy") -@limiter.limit("10 per minute") -def update_bucket_policy(bucket_name: str): - principal = _current_principal() - action = request.form.get("mode", "upsert") - try: - _authorize_ui(principal, bucket_name, "policy") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - - try: - client = get_session_s3_client() - except (PermissionError, RuntimeError) as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) - - if action == "delete": - try: - client.delete_bucket_policy(Bucket=bucket_name) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - else: - err, status = handle_connection_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) - if _wants_json(): - return jsonify({"success": True, "message": "Bucket policy removed"}) - flash("Bucket policy removed", "info") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) - - document = request.form.get("policy_document", "").strip() - if not document: - if _wants_json(): - return jsonify({"error": "Provide a JSON policy document"}), 400 - flash("Provide a JSON policy document", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) - try: - json.loads(document) - except json.JSONDecodeError as exc: - if _wants_json(): - return jsonify({"error": f"Policy error: {exc}"}), 400 - flash(f"Policy error: {exc}", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) - try: - client.put_bucket_policy(Bucket=bucket_name, Policy=document) - if _wants_json(): - return jsonify({"success": True, "message": "Bucket policy saved"}) - flash("Bucket policy saved", "success") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - else: - err, status = handle_connection_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) - - -@ui_bp.post("/buckets//versioning") -def update_bucket_versioning(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 403 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - state = request.form.get("state", "enable") - if state not in ("enable", "suspend"): - if _wants_json(): - return jsonify({"error": "state must be 'enable' or 'suspend'"}), 400 - flash("Invalid versioning state", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - enable = state == "enable" - try: - client = get_session_s3_client() - client.put_bucket_versioning( - Bucket=bucket_name, - VersioningConfiguration={"Status": "Enabled" if enable else "Suspended"}, - ) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - else: - err, status = handle_connection_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - message = "Versioning enabled" if enable else "Versioning suspended" - if _wants_json(): - return jsonify({"success": True, "message": message, "enabled": enable}) - flash(message, "success") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - -@ui_bp.post("/buckets//quota") -def update_bucket_quota(bucket_name: str): - """Update bucket quota configuration (admin only).""" - principal = _current_principal() - - is_admin = False - try: - _iam().authorize(principal, None, "iam:list_users") - is_admin = True - except IamError: - pass - - if not is_admin: - if _wants_json(): - return jsonify({"error": "Only administrators can manage bucket quotas"}), 403 - flash("Only administrators can manage bucket quotas", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - action = request.form.get("action", "set") - - if action == "remove": - try: - _storage().set_bucket_quota(bucket_name, max_bytes=None, max_objects=None) - if _wants_json(): - return jsonify({"success": True, "message": "Bucket quota removed"}) - flash("Bucket quota removed", "info") - except StorageError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - max_mb_str = request.form.get("max_mb", "").strip() - max_objects_str = request.form.get("max_objects", "").strip() - - max_bytes = None - max_objects = None - - if max_mb_str: - try: - max_mb = int(max_mb_str) - if max_mb < 1: - raise ValueError("Size must be at least 1 MB") - max_bytes = max_mb * 1024 * 1024 - except ValueError as exc: - if _wants_json(): - return jsonify({"error": f"Invalid size value: {exc}"}), 400 - flash(f"Invalid size value: {exc}", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - if max_objects_str: - try: - max_objects = int(max_objects_str) - if max_objects < 0: - raise ValueError("Object count must be non-negative") - except ValueError as exc: - if _wants_json(): - return jsonify({"error": f"Invalid object count: {exc}"}), 400 - flash(f"Invalid object count: {exc}", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - try: - _storage().set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects) - if max_bytes is None and max_objects is None: - message = "Bucket quota removed" - else: - message = "Bucket quota updated" - if _wants_json(): - return jsonify({ - "success": True, - "message": message, - "max_bytes": max_bytes, - "max_objects": max_objects, - "has_quota": max_bytes is not None or max_objects is not None - }) - flash(message, "success" if max_bytes or max_objects else "info") - except StorageError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - -@ui_bp.post("/buckets//encryption") -def update_bucket_encryption(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 403 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - action = request.form.get("action", "enable") - - if action == "disable": - try: - client = get_session_s3_client() - client.delete_bucket_encryption(Bucket=bucket_name) - if _wants_json(): - return jsonify({"success": True, "message": "Default encryption disabled", "enabled": False}) - flash("Default encryption disabled", "info") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - else: - err, status = handle_connection_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - algorithm = request.form.get("algorithm", "AES256") - kms_key_id = request.form.get("kms_key_id", "").strip() or None - - if algorithm not in ("AES256", "aws:kms"): - if _wants_json(): - return jsonify({"error": "Invalid encryption algorithm"}), 400 - flash("Invalid encryption algorithm", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - sse_rule: dict[str, Any] = {"SSEAlgorithm": algorithm} - if algorithm == "aws:kms" and kms_key_id: - sse_rule["KMSMasterKeyID"] = kms_key_id - - try: - client = get_session_s3_client() - client.put_bucket_encryption( - Bucket=bucket_name, - ServerSideEncryptionConfiguration={ - "Rules": [{"ApplyServerSideEncryptionByDefault": sse_rule}] - }, - ) - if algorithm == "aws:kms": - message = "Default KMS encryption enabled" - else: - message = "Default AES-256 encryption enabled" - if _wants_json(): - return jsonify({"success": True, "message": message, "enabled": True, "algorithm": algorithm}) - flash(message, "success") - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - else: - err, status = handle_connection_error(exc) - if _wants_json(): - return jsonify(err), status - flash(err["error"], "danger") - - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - -@ui_bp.post("/buckets//website") -def update_bucket_website(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "policy") - except IamError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 403 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - if _wants_json(): - return jsonify({"error": "Website hosting is not enabled"}), 400 - flash("Website hosting is not enabled", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - action = request.form.get("action", "enable") - - if action == "disable": - try: - _storage().set_bucket_website(bucket_name, None) - if _wants_json(): - return jsonify({"success": True, "message": "Static website hosting disabled", "enabled": False}) - flash("Static website hosting disabled", "info") - except StorageError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - index_document = request.form.get("index_document", "").strip() - error_document = request.form.get("error_document", "").strip() - - if not index_document: - if _wants_json(): - return jsonify({"error": "Index document is required"}), 400 - flash("Index document is required", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - if "/" in index_document: - if _wants_json(): - return jsonify({"error": "Index document must not contain '/'"}), 400 - flash("Index document must not contain '/'", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - website_cfg: dict[str, Any] = {"index_document": index_document} - if error_document: - website_cfg["error_document"] = error_document - - try: - _storage().set_bucket_website(bucket_name, website_cfg) - if _wants_json(): - return jsonify({ - "success": True, - "message": "Static website hosting enabled", - "enabled": True, - "index_document": index_document, - "error_document": error_document, - }) - flash("Static website hosting enabled", "success") - except StorageError as exc: - if _wants_json(): - return jsonify({"error": _friendly_error_message(exc)}), 400 - flash(_friendly_error_message(exc), "danger") - - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) - - -@ui_bp.get("/iam") -def iam_dashboard(): - principal = _current_principal() - iam_service = _iam() - secret_token = request.args.get("secret_token") - disclosed_secret: dict[str, str] | None = None - if secret_token: - payload = _secret_store().pop(secret_token) - if isinstance(payload, dict): - access_key = str(payload.get("access_key", "")) - secret_key = payload.get("secret_key") - if secret_key: - disclosed_secret = { - "access_key": access_key, - "secret_key": str(secret_key), - "operation": str(payload.get("operation", "create")), - } - locked = False - locked_reason = None - try: - iam_service.authorize(principal, None, "iam:list_users") - except IamError as exc: - locked = True - locked_reason = str(exc) - users = iam_service.list_users() if not locked else [] - config_summary = iam_service.config_summary() - config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2) - from datetime import datetime as _dt, timedelta as _td, timezone as _tz - _now = _dt.now(_tz.utc) - now_iso = _now.isoformat() - soon_iso = (_now + _td(days=7)).isoformat() - return render_template( - "iam.html", - users=users, - principal=principal, - iam_locked=locked, - locked_reason=locked_reason, - config_summary=config_summary, - config_document=config_document, - disclosed_secret=disclosed_secret, - now_iso=now_iso, - soon_iso=soon_iso, - ) - - -@ui_bp.post("/iam/users") -def create_iam_user(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:create_user") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - display_name = request.form.get("display_name", "").strip() or "Unnamed" - if len(display_name) > 64: - if _wants_json(): - return jsonify({"error": "Display name must be 64 characters or fewer"}), 400 - flash("Display name must be 64 characters or fewer", "danger") - return redirect(url_for("ui.iam_dashboard")) - custom_access_key = request.form.get("access_key", "").strip() or None - custom_secret_key = request.form.get("secret_key", "").strip() or None - policies_text = request.form.get("policies", "").strip() - policies = None - if policies_text: - try: - policies = json.loads(policies_text) - except json.JSONDecodeError as exc: - if _wants_json(): - return jsonify({"error": f"Invalid JSON: {exc}"}), 400 - flash(f"Invalid JSON: {exc}", "danger") - return redirect(url_for("ui.iam_dashboard")) - expires_at = request.form.get("expires_at", "").strip() or None - if expires_at: - try: - from datetime import datetime as _dt, timezone as _tz - exp_dt = _dt.fromisoformat(expires_at) - if exp_dt.tzinfo is None: - exp_dt = exp_dt.replace(tzinfo=_tz.utc) - expires_at = exp_dt.isoformat() - except (ValueError, TypeError): - if _wants_json(): - return jsonify({"error": "Invalid expiry date format"}), 400 - flash("Invalid expiry date format", "danger") - return redirect(url_for("ui.iam_dashboard")) - try: - created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at) - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - token = _secret_store().remember( - { - "access_key": created["access_key"], - "secret_key": created["secret_key"], - "operation": "create", - } - ) - if _wants_json(): - return jsonify({ - "success": True, - "message": f"Created user {created['access_key']}", - "access_key": created["access_key"], - "secret_key": created["secret_key"], - "display_name": display_name, - "policies": policies or [] - }) - flash(f"Created user {created['access_key']}. Copy the secret below.", "success") - return redirect(url_for("ui.iam_dashboard", secret_token=token)) - - -@ui_bp.post("/iam/users//rotate") -def rotate_iam_secret(access_key: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:rotate_key") - except IamError as exc: - if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - try: - new_secret = _iam().rotate_secret(access_key) - if principal and principal.access_key == access_key: - creds = session.get("credentials", {}) - creds["secret_key"] = new_secret - session["credentials"] = creds - session.modified = True - except IamError as exc: - if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: - return jsonify({ - "access_key": access_key, - "secret_key": new_secret, - "message": f"Secret rotated for {access_key}", - }) - - token = _secret_store().remember( - { - "access_key": access_key, - "secret_key": new_secret, - "operation": "rotate", - } - ) - flash(f"Rotated secret for {access_key}. Copy the secret below.", "info") - return redirect(url_for("ui.iam_dashboard", secret_token=token)) - - -@ui_bp.post("/iam/users//update") -def update_iam_user(access_key: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:create_user") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - display_name = request.form.get("display_name", "").strip() - if display_name: - if len(display_name) > 64: - if _wants_json(): - return jsonify({"error": "Display name must be 64 characters or fewer"}), 400 - flash("Display name must be 64 characters or fewer", "danger") - else: - try: - _iam().update_user(access_key, display_name) - if _wants_json(): - return jsonify({"success": True, "message": f"Updated user {access_key}", "display_name": display_name}) - flash(f"Updated user {access_key}", "success") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - - return redirect(url_for("ui.iam_dashboard")) - - -@ui_bp.post("/iam/users//delete") -def delete_iam_user(access_key: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:delete_user") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - if access_key == principal.access_key: - try: - _iam().delete_user(access_key) - session.pop("credentials", None) - if _wants_json(): - return jsonify({"success": True, "message": "Your account has been deleted", "redirect": url_for("ui.login")}) - flash("Your account has been deleted.", "info") - return redirect(url_for("ui.login")) - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - try: - _iam().delete_user(access_key) - if _wants_json(): - return jsonify({"success": True, "message": f"Deleted user {access_key}"}) - flash(f"Deleted user {access_key}", "success") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - -@ui_bp.post("/iam/users//policies") -def update_iam_policies(access_key: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:update_policy") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - policies_raw = request.form.get("policies", "").strip() - if not policies_raw: - policies = [] - else: - try: - policies = json.loads(policies_raw) - if not isinstance(policies, list): - raise ValueError("Policies must be a list") - except (ValueError, json.JSONDecodeError): - if _wants_json(): - return jsonify({"error": "Invalid JSON format for policies"}), 400 - flash("Invalid JSON format for policies", "danger") - return redirect(url_for("ui.iam_dashboard")) - - try: - _iam().update_user_policies(access_key, policies) - if _wants_json(): - return jsonify({"success": True, "message": f"Updated policies for {access_key}", "policies": policies}) - flash(f"Updated policies for {access_key}", "success") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - - return redirect(url_for("ui.iam_dashboard")) - - -@ui_bp.post("/iam/users//expiry") -def update_iam_expiry(access_key: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:update_policy") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.iam_dashboard")) - - expires_at = request.form.get("expires_at", "").strip() or None - if expires_at: - try: - from datetime import datetime as _dt, timezone as _tz - exp_dt = _dt.fromisoformat(expires_at) - if exp_dt.tzinfo is None: - exp_dt = exp_dt.replace(tzinfo=_tz.utc) - expires_at = exp_dt.isoformat() - except (ValueError, TypeError): - if _wants_json(): - return jsonify({"error": "Invalid expiry date format"}), 400 - flash("Invalid expiry date format", "danger") - return redirect(url_for("ui.iam_dashboard")) - - try: - _iam().update_user_expiry(access_key, expires_at) - if _wants_json(): - return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at}) - label = expires_at if expires_at else "never" - flash(f"Expiry for {access_key} set to {label}", "success") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 400 - flash(str(exc), "danger") - - return redirect(url_for("ui.iam_dashboard")) - - -@ui_bp.post("/connections") -def create_connection(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - if _wants_json(): - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.buckets_overview")) - - name = request.form.get("name", "").strip() - endpoint = request.form.get("endpoint_url", "").strip() - access_key = request.form.get("access_key", "").strip() - secret_key = request.form.get("secret_key", "").strip() - region = request.form.get("region", "us-east-1").strip() - - if not all([name, endpoint, access_key, secret_key]): - if _wants_json(): - return jsonify({"error": "All fields are required"}), 400 - flash("All fields are required", "danger") - return redirect(url_for("ui.connections_dashboard")) - - conn = RemoteConnection( - id=str(uuid.uuid4()), - name=name, - endpoint_url=endpoint, - access_key=access_key, - secret_key=secret_key, - region=region - ) - _connections().add(conn) - if _wants_json(): - return jsonify({"success": True, "message": f"Connection '{name}' created", "connection_id": conn.id}) - flash(f"Connection '{name}' created", "success") - return redirect(url_for("ui.connections_dashboard")) - - -@ui_bp.post("/connections/test") -def test_connection(): - from botocore.config import Config as BotoConfig - from botocore.exceptions import ConnectTimeoutError, EndpointConnectionError, ReadTimeoutError - - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"status": "error", "message": "Access denied"}), 403 - - data = request.get_json(silent=True) or request.form - endpoint = data.get("endpoint_url", "").strip() - access_key = data.get("access_key", "").strip() - secret_key = data.get("secret_key", "").strip() - region = data.get("region", "us-east-1").strip() - - if not all([endpoint, access_key, secret_key]): - return jsonify({"status": "error", "message": "Missing credentials"}), 400 - - try: - config = BotoConfig( - connect_timeout=5, - read_timeout=10, - retries={'max_attempts': 1} - ) - s3 = boto3.client( - "s3", - endpoint_url=endpoint, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - region_name=region, - config=config, - ) - - s3.list_buckets() - return jsonify({"status": "ok", "message": "Connection successful"}) - except (ConnectTimeoutError, ReadTimeoutError): - return jsonify({"status": "error", "message": f"Connection timed out - endpoint may be down or unreachable: {endpoint}"}), 400 - except EndpointConnectionError: - return jsonify({"status": "error", "message": f"Could not connect to endpoint: {endpoint}"}), 400 - except ClientError as e: - error_code = e.response.get('Error', {}).get('Code', 'Unknown') - error_msg = e.response.get('Error', {}).get('Message', str(e)) - return jsonify({"status": "error", "message": f"Connection failed ({error_code}): {error_msg}"}), 400 - except Exception as e: - return jsonify({"status": "error", "message": f"Connection failed: {str(e)}"}), 400 - - -@ui_bp.post("/connections//update") -def update_connection(connection_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - if _wants_json(): - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.buckets_overview")) - - conn = _connections().get(connection_id) - if not conn: - if _wants_json(): - return jsonify({"error": "Connection not found"}), 404 - flash("Connection not found", "danger") - return redirect(url_for("ui.connections_dashboard")) - - name = request.form.get("name", "").strip() - endpoint = request.form.get("endpoint_url", "").strip() - access_key = request.form.get("access_key", "").strip() - secret_key = request.form.get("secret_key", "").strip() - region = request.form.get("region", "us-east-1").strip() - - if not all([name, endpoint, access_key]): - if _wants_json(): - return jsonify({"error": "Name, endpoint, and access key are required"}), 400 - flash("Name, endpoint, and access key are required", "danger") - return redirect(url_for("ui.connections_dashboard")) - - conn.name = name - conn.endpoint_url = endpoint - conn.access_key = access_key - if secret_key: - conn.secret_key = secret_key - conn.region = region - - _connections().save() - if _wants_json(): - return jsonify({ - "success": True, - "message": f"Connection '{name}' updated", - "connection": { - "id": connection_id, - "name": name, - "endpoint_url": endpoint, - "access_key": access_key, - "region": region - } - }) - flash(f"Connection '{name}' updated", "success") - return redirect(url_for("ui.connections_dashboard")) - - -@ui_bp.post("/connections//delete") -def delete_connection(connection_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - if _wants_json(): - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.buckets_overview")) - - _connections().delete(connection_id) - if _wants_json(): - return jsonify({"success": True, "message": "Connection deleted"}) - flash("Connection deleted", "success") - return redirect(url_for("ui.connections_dashboard")) - - -@ui_bp.post("/buckets//replication") -def update_bucket_replication(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError as exc: - if _wants_json(): - return jsonify({"error": str(exc)}), 403 - flash(str(exc), "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) - - is_admin = False - try: - _iam().authorize(principal, None, "iam:list_users") - is_admin = True - except IamError: - is_admin = False - - action = request.form.get("action") - - if action == "delete": - if not is_admin: - if _wants_json(): - return jsonify({"error": "Only administrators can remove replication configuration"}), 403 - flash("Only administrators can remove replication configuration", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) - _replication().delete_rule(bucket_name) - if _wants_json(): - return jsonify({"success": True, "message": "Replication configuration removed", "action": "delete"}) - flash("Replication configuration removed", "info") - elif action == "pause": - rule = _replication().get_rule(bucket_name) - if rule: - rule.enabled = False - _replication().set_rule(rule) - if _wants_json(): - return jsonify({"success": True, "message": "Replication paused", "action": "pause", "enabled": False}) - flash("Replication paused", "info") - else: - if _wants_json(): - return jsonify({"error": "No replication configuration to pause"}), 404 - flash("No replication configuration to pause", "warning") - elif action == "resume": - from .replication import REPLICATION_MODE_ALL - rule = _replication().get_rule(bucket_name) - if rule: - rule.enabled = True - _replication().set_rule(rule) - if rule.mode == REPLICATION_MODE_ALL: - _replication().replicate_existing_objects(bucket_name) - message = "Replication resumed. Syncing pending objects in background." - else: - message = "Replication resumed" - if _wants_json(): - return jsonify({"success": True, "message": message, "action": "resume", "enabled": True}) - flash(message, "success") - else: - if _wants_json(): - return jsonify({"error": "No replication configuration to resume"}), 404 - flash("No replication configuration to resume", "warning") - elif action == "create": - if not is_admin: - if _wants_json(): - return jsonify({"error": "Only administrators can configure replication settings"}), 403 - flash("Only administrators can configure replication settings", "danger") - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) - - from .replication import REPLICATION_MODE_NEW_ONLY, REPLICATION_MODE_ALL - import time - - target_conn_id = request.form.get("target_connection_id") - target_bucket = request.form.get("target_bucket", "").strip() - replication_mode = request.form.get("replication_mode", REPLICATION_MODE_NEW_ONLY) - - if not target_conn_id or not target_bucket: - if _wants_json(): - return jsonify({"error": "Target connection and bucket are required"}), 400 - flash("Target connection and bucket are required", "danger") - else: - rule = ReplicationRule( - bucket_name=bucket_name, - target_connection_id=target_conn_id, - target_bucket=target_bucket, - enabled=True, - mode=replication_mode, - created_at=time.time(), - ) - _replication().set_rule(rule) - - if replication_mode == REPLICATION_MODE_ALL: - _replication().replicate_existing_objects(bucket_name) - message = "Replication configured. Existing objects are being replicated in the background." - else: - message = "Replication configured. Only new uploads will be replicated." - if _wants_json(): - return jsonify({"success": True, "message": message, "action": "create", "enabled": True}) - flash(message, "success") - else: - if _wants_json(): - return jsonify({"error": "Invalid action"}), 400 - flash("Invalid action", "danger") - - return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) - - -@ui_bp.get("/buckets//replication/status") -def get_replication_status(bucket_name: str): - """Async endpoint to fetch replication sync status without blocking page load.""" - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - rule = _replication().get_rule(bucket_name) - if not rule: - return jsonify({"error": "No replication rule"}), 404 - - connection = _connections().get(rule.target_connection_id) - endpoint_healthy = False - endpoint_error = None - if connection: - endpoint_healthy = _replication().check_endpoint_health(connection) - if not endpoint_healthy: - endpoint_error = f"Cannot reach endpoint: {connection.endpoint_url}" - else: - endpoint_error = "Target connection not found" - - stats = None - if endpoint_healthy: - stats = _replication().get_sync_status(bucket_name) - - if not stats: - return jsonify({ - "objects_synced": 0, - "objects_pending": 0, - "objects_orphaned": 0, - "bytes_synced": 0, - "last_sync_at": rule.stats.last_sync_at if rule.stats else None, - "last_sync_key": rule.stats.last_sync_key if rule.stats else None, - "endpoint_healthy": endpoint_healthy, - "endpoint_error": endpoint_error, - }) - - return jsonify({ - "objects_synced": stats.objects_synced, - "objects_pending": stats.objects_pending, - "objects_orphaned": stats.objects_orphaned, - "bytes_synced": stats.bytes_synced, - "last_sync_at": stats.last_sync_at, - "last_sync_key": stats.last_sync_key, - "endpoint_healthy": endpoint_healthy, - "endpoint_error": endpoint_error, - }) - - -@ui_bp.get("/buckets//replication/failures") -def get_replication_failures(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - limit = request.args.get("limit", 50, type=int) - offset = request.args.get("offset", 0, type=int) - - failures = _replication().get_failed_items(bucket_name, limit, offset) - total = _replication().get_failure_count(bucket_name) - - return jsonify({ - "failures": [f.to_dict() for f in failures], - "total": total, - "limit": limit, - "offset": offset, - }) - - -@ui_bp.post("/buckets//replication/failures//retry") -def retry_replication_failure(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - success = _replication().retry_failed_item(bucket_name, object_key) - if success: - return jsonify({"status": "submitted", "object_key": object_key}) - return jsonify({"error": "Failed to submit retry"}), 400 - - -@ui_bp.post("/buckets//replication/failures/retry-all") -def retry_all_replication_failures(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - result = _replication().retry_all_failed(bucket_name) - return jsonify({ - "status": "submitted", - "submitted": result["submitted"], - "skipped": result["skipped"], - }) - - -@ui_bp.delete("/buckets//replication/failures/") -def dismiss_replication_failure(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - success = _replication().dismiss_failure(bucket_name, object_key) - if success: - return jsonify({"status": "dismissed", "object_key": object_key}) - return jsonify({"error": "Failure not found"}), 404 - - -@ui_bp.delete("/buckets//replication/failures") -def clear_replication_failures(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "replication") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - _replication().clear_failures(bucket_name) - return jsonify({"status": "cleared"}) - - -@ui_bp.get("/connections//health") -def check_connection_health(connection_id: str): - """Check if a connection endpoint is reachable.""" - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - conn = _connections().get(connection_id) - if not conn: - return jsonify({"healthy": False, "error": "Connection not found"}), 404 - - healthy = _replication().check_endpoint_health(conn) - return jsonify({ - "healthy": healthy, - "error": None if healthy else f"Cannot reach endpoint: {conn.endpoint_url}" - }) - - -@ui_bp.get("/connections") -def connections_dashboard(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - flash("Access denied", "danger") - return redirect(url_for("ui.buckets_overview")) - - connections = _connections().list() - return render_template("connections.html", connections=connections, principal=principal) - - -@ui_bp.get("/website-domains") -def website_domains_dashboard(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - flash("Access denied", "danger") - return redirect(url_for("ui.buckets_overview")) - - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - flash("Website hosting is not enabled", "warning") - return redirect(url_for("ui.buckets_overview")) - - store = current_app.extensions.get("website_domains") - mappings = store.list_all() if store else [] - storage = _storage() - buckets = [ - b.name for b in storage.list_buckets() - if storage.get_bucket_website(b.name) - ] - return render_template( - "website_domains.html", - mappings=mappings, - buckets=buckets, - principal=principal, - can_manage_iam=True, - ) - - -@ui_bp.post("/website-domains/create") -def create_website_domain(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - if _wants_json(): - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False): - if _wants_json(): - return jsonify({"error": "Website hosting is not enabled"}), 400 - flash("Website hosting is not enabled", "warning") - return redirect(url_for("ui.buckets_overview")) - - domain = normalize_domain(request.form.get("domain") or "") - bucket = (request.form.get("bucket") or "").strip() - - if not domain: - if _wants_json(): - return jsonify({"error": "Domain is required"}), 400 - flash("Domain is required", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - if not is_valid_domain(domain): - if _wants_json(): - return jsonify({"error": f"Invalid domain format: '{domain}'"}), 400 - flash(f"Invalid domain format: '{domain}'. Use a hostname like www.example.com", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - if not bucket: - if _wants_json(): - return jsonify({"error": "Bucket is required"}), 400 - flash("Bucket is required", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - storage = _storage() - if not storage.bucket_exists(bucket): - if _wants_json(): - return jsonify({"error": f"Bucket '{bucket}' does not exist"}), 404 - flash(f"Bucket '{bucket}' does not exist", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - store = current_app.extensions.get("website_domains") - if store.get_bucket(domain): - if _wants_json(): - return jsonify({"error": f"Domain '{domain}' is already mapped"}), 409 - flash(f"Domain '{domain}' is already mapped", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - store.set_mapping(domain, bucket) - if _wants_json(): - return jsonify({"success": True, "domain": domain, "bucket": bucket}), 201 - flash(f"Domain '{domain}' mapped to bucket '{bucket}'", "success") - return redirect(url_for("ui.website_domains_dashboard")) - - -@ui_bp.post("/website-domains//update") -def update_website_domain(domain: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - if _wants_json(): - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - domain = normalize_domain(domain) - bucket = (request.form.get("bucket") or "").strip() - if not bucket: - if _wants_json(): - return jsonify({"error": "Bucket is required"}), 400 - flash("Bucket is required", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - storage = _storage() - if not storage.bucket_exists(bucket): - if _wants_json(): - return jsonify({"error": f"Bucket '{bucket}' does not exist"}), 404 - flash(f"Bucket '{bucket}' does not exist", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - store = current_app.extensions.get("website_domains") - if not store.get_bucket(domain): - if _wants_json(): - return jsonify({"error": f"No mapping for domain '{domain}'"}), 404 - flash(f"No mapping for domain '{domain}'", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - store.set_mapping(domain, bucket) - if _wants_json(): - return jsonify({"success": True, "domain": domain, "bucket": bucket}) - flash(f"Domain '{domain}' updated to bucket '{bucket}'", "success") - return redirect(url_for("ui.website_domains_dashboard")) - - -@ui_bp.post("/website-domains//delete") -def delete_website_domain(domain: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - if _wants_json(): - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - domain = normalize_domain(domain) - store = current_app.extensions.get("website_domains") - if not store.delete_mapping(domain): - if _wants_json(): - return jsonify({"error": f"No mapping for domain '{domain}'"}), 404 - flash(f"No mapping for domain '{domain}'", "danger") - return redirect(url_for("ui.website_domains_dashboard")) - - if _wants_json(): - return jsonify({"success": True}) - flash(f"Domain '{domain}' mapping deleted", "success") - return redirect(url_for("ui.website_domains_dashboard")) - - -@ui_bp.get("/metrics") -def metrics_dashboard(): - principal = _current_principal() - - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - flash("Access denied: Metrics require admin permissions", "danger") - return redirect(url_for("ui.buckets_overview")) - - from app.version import APP_VERSION - import time - - cpu_percent = psutil.cpu_percent(interval=0.1) - memory = psutil.virtual_memory() - - storage_root = current_app.config["STORAGE_ROOT"] - disk = psutil.disk_usage(storage_root) - - storage = _storage() - buckets = storage.list_buckets() - total_buckets = len(buckets) - - total_objects = 0 - total_bytes_used = 0 - total_versions = 0 - - cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60) - for bucket in buckets: - stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl) - total_objects += stats.get("total_objects", stats.get("objects", 0)) - total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0)) - total_versions += stats.get("version_count", 0) - - boot_time = psutil.boot_time() - uptime_seconds = time.time() - boot_time - uptime_days = int(uptime_seconds / 86400) - - return render_template( - "metrics.html", - principal=principal, - cpu_percent=round(cpu_percent, 2), - memory={ - "total": _format_bytes(memory.total), - "available": _format_bytes(memory.available), - "used": _format_bytes(memory.used), - "percent": round(memory.percent, 2), - }, - disk={ - "total": _format_bytes(disk.total), - "free": _format_bytes(disk.free), - "used": _format_bytes(disk.used), - "percent": round(disk.percent, 2), - }, - app={ - "buckets": total_buckets, - "objects": total_objects, - "versions": total_versions, - "storage_used": _format_bytes(total_bytes_used), - "storage_raw": total_bytes_used, - "version": APP_VERSION, - "uptime_days": uptime_days, - }, - metrics_history_enabled=current_app.config.get("METRICS_HISTORY_ENABLED", False), - operation_metrics_enabled=current_app.config.get("OPERATION_METRICS_ENABLED", False), - ) - - -@ui_bp.route("/metrics/api") -def metrics_api(): - principal = _current_principal() - - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - import time - - cpu_percent = psutil.cpu_percent(interval=0.1) - memory = psutil.virtual_memory() - - storage_root = current_app.config["STORAGE_ROOT"] - disk = psutil.disk_usage(storage_root) - - storage = _storage() - buckets = storage.list_buckets() - total_buckets = len(buckets) - - total_objects = 0 - total_bytes_used = 0 - total_versions = 0 - - cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60) - for bucket in buckets: - stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl) - total_objects += stats.get("total_objects", stats.get("objects", 0)) - total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0)) - total_versions += stats.get("version_count", 0) - - boot_time = psutil.boot_time() - uptime_seconds = time.time() - boot_time - uptime_days = int(uptime_seconds / 86400) - - return jsonify({ - "cpu_percent": round(cpu_percent, 2), - "memory": { - "total": _format_bytes(memory.total), - "available": _format_bytes(memory.available), - "used": _format_bytes(memory.used), - "percent": round(memory.percent, 2), - }, - "disk": { - "total": _format_bytes(disk.total), - "free": _format_bytes(disk.free), - "used": _format_bytes(disk.used), - "percent": round(disk.percent, 2), - }, - "app": { - "buckets": total_buckets, - "objects": total_objects, - "versions": total_versions, - "storage_used": _format_bytes(total_bytes_used), - "storage_raw": total_bytes_used, - "uptime_days": uptime_days, - } - }) - - -@ui_bp.route("/metrics/history") -def metrics_history(): - principal = _current_principal() - - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - system_metrics = current_app.extensions.get("system_metrics") - if not system_metrics: - return jsonify({"enabled": False, "history": []}) - - hours = request.args.get("hours", type=int) - if hours is None: - hours = current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24) - - history = system_metrics.get_history(hours=hours) - - return jsonify({ - "enabled": True, - "retention_hours": current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24), - "interval_minutes": current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5), - "history": history, - }) - - -@ui_bp.route("/metrics/settings", methods=["GET", "PUT"]) -def metrics_settings(): - principal = _current_principal() - - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - if request.method == "GET": - return jsonify({ - "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False), - "retention_hours": current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24), - "interval_minutes": current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5), - }) - - data = request.get_json() or {} - - if "enabled" in data: - current_app.config["METRICS_HISTORY_ENABLED"] = bool(data["enabled"]) - if "retention_hours" in data: - current_app.config["METRICS_HISTORY_RETENTION_HOURS"] = max(1, int(data["retention_hours"])) - if "interval_minutes" in data: - current_app.config["METRICS_HISTORY_INTERVAL_MINUTES"] = max(1, int(data["interval_minutes"])) - - return jsonify({ - "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False), - "retention_hours": current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24), - "interval_minutes": current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5), - }) - - -@ui_bp.get("/metrics/operations") -def metrics_operations(): - principal = _current_principal() - - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - collector = _operation_metrics() - if not collector: - return jsonify({ - "enabled": False, - "stats": None, - }) - - return jsonify({ - "enabled": True, - "stats": collector.get_current_stats(), - }) - - -@ui_bp.get("/metrics/operations/history") -def metrics_operations_history(): - principal = _current_principal() - - try: - _iam().authorize(principal, None, "iam:list_users") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - collector = _operation_metrics() - if not collector: - return jsonify({ - "enabled": False, - "history": [], - }) - - hours = request.args.get("hours", type=int) - return jsonify({ - "enabled": True, - "history": collector.get_history(hours), - "interval_minutes": current_app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5), - }) - - -@ui_bp.route("/buckets//lifecycle", methods=["GET", "POST", "DELETE"]) -def bucket_lifecycle(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "lifecycle") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - client = get_session_s3_client() - except (PermissionError, RuntimeError) as exc: - return jsonify({"error": str(exc)}), 403 - - if request.method == "GET": - try: - resp = client.get_bucket_lifecycle_configuration(Bucket=bucket_name) - rules = resp.get("Rules", []) - except ClientError as exc: - code = exc.response.get("Error", {}).get("Code", "") - if code == "NoSuchLifecycleConfiguration": - rules = [] - else: - err, status = handle_client_error(exc) - return jsonify(err), status - except (EndpointConnectionError, ConnectionClosedError) as exc: - return jsonify(*handle_connection_error(exc)) - return jsonify({"rules": rules}) - - if request.method == "DELETE": - try: - client.delete_bucket_lifecycle(Bucket=bucket_name) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - return jsonify({"status": "ok", "message": "Lifecycle configuration deleted"}) - - payload = request.get_json(silent=True) or {} - rules = payload.get("rules", []) - if not isinstance(rules, list): - return jsonify({"error": "rules must be a list"}), 400 - - validated_rules = [] - for i, rule in enumerate(rules): - if not isinstance(rule, dict): - return jsonify({"error": f"Rule {i} must be an object"}), 400 - validated = { - "ID": str(rule.get("ID", f"rule-{i+1}")), - "Status": "Enabled" if rule.get("Status", "Enabled") == "Enabled" else "Disabled", - } - filt = {} - if rule.get("Prefix"): - filt["Prefix"] = str(rule["Prefix"]) - if filt: - validated["Filter"] = filt - if rule.get("Expiration"): - exp = rule["Expiration"] - if isinstance(exp, dict) and exp.get("Days"): - validated["Expiration"] = {"Days": int(exp["Days"])} - if rule.get("NoncurrentVersionExpiration"): - nve = rule["NoncurrentVersionExpiration"] - if isinstance(nve, dict) and nve.get("NoncurrentDays"): - validated["NoncurrentVersionExpiration"] = {"NoncurrentDays": int(nve["NoncurrentDays"])} - if rule.get("AbortIncompleteMultipartUpload"): - aimu = rule["AbortIncompleteMultipartUpload"] - if isinstance(aimu, dict) and aimu.get("DaysAfterInitiation"): - validated["AbortIncompleteMultipartUpload"] = {"DaysAfterInitiation": int(aimu["DaysAfterInitiation"])} - validated_rules.append(validated) - - try: - if validated_rules: - client.put_bucket_lifecycle_configuration( - Bucket=bucket_name, - LifecycleConfiguration={"Rules": validated_rules}, - ) - else: - client.delete_bucket_lifecycle(Bucket=bucket_name) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - return jsonify({"status": "ok", "message": "Lifecycle configuration saved", "rules": validated_rules}) - - -@ui_bp.get("/buckets//lifecycle/history") -def get_lifecycle_history(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "lifecycle") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - limit = request.args.get("limit", 50, type=int) - offset = request.args.get("offset", 0, type=int) - - lifecycle_manager = current_app.extensions.get("lifecycle") - if not lifecycle_manager: - return jsonify({ - "executions": [], - "total": 0, - "limit": limit, - "offset": offset, - "enabled": False, - }) - - records = lifecycle_manager.get_execution_history(bucket_name, limit, offset) - return jsonify({ - "executions": [r.to_dict() for r in records], - "total": len(lifecycle_manager.get_execution_history(bucket_name, 1000, 0)), - "limit": limit, - "offset": offset, - "enabled": True, - }) - - -@ui_bp.route("/buckets//cors", methods=["GET", "POST", "DELETE"]) -def bucket_cors(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "cors") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - client = get_session_s3_client() - except (PermissionError, RuntimeError) as exc: - return jsonify({"error": str(exc)}), 403 - - if request.method == "GET": - try: - resp = client.get_bucket_cors(Bucket=bucket_name) - rules = resp.get("CORSRules", []) - except ClientError as exc: - code = exc.response.get("Error", {}).get("Code", "") - if code == "NoSuchCORSConfiguration": - rules = [] - else: - err, status = handle_client_error(exc) - return jsonify(err), status - except (EndpointConnectionError, ConnectionClosedError) as exc: - return jsonify(*handle_connection_error(exc)) - return jsonify({"rules": rules}) - - if request.method == "DELETE": - try: - client.delete_bucket_cors(Bucket=bucket_name) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - return jsonify({"status": "ok", "message": "CORS configuration deleted"}) - - payload = request.get_json(silent=True) or {} - rules = payload.get("rules", []) - if not isinstance(rules, list): - return jsonify({"error": "rules must be a list"}), 400 - - validated_rules = [] - for i, rule in enumerate(rules): - if not isinstance(rule, dict): - return jsonify({"error": f"Rule {i} must be an object"}), 400 - origins = rule.get("AllowedOrigins", []) - methods = rule.get("AllowedMethods", []) - if not origins or not methods: - return jsonify({"error": f"Rule {i} must have AllowedOrigins and AllowedMethods"}), 400 - validated = { - "AllowedOrigins": [str(o) for o in origins if o], - "AllowedMethods": [str(m).upper() for m in methods if m], - } - if rule.get("AllowedHeaders"): - validated["AllowedHeaders"] = [str(h) for h in rule["AllowedHeaders"] if h] - if rule.get("ExposeHeaders"): - validated["ExposeHeaders"] = [str(h) for h in rule["ExposeHeaders"] if h] - if rule.get("MaxAgeSeconds") is not None: - try: - validated["MaxAgeSeconds"] = int(rule["MaxAgeSeconds"]) - except (ValueError, TypeError): - pass - validated_rules.append(validated) - - try: - if validated_rules: - client.put_bucket_cors( - Bucket=bucket_name, - CORSConfiguration={"CORSRules": validated_rules}, - ) - else: - client.delete_bucket_cors(Bucket=bucket_name) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - return jsonify({"status": "ok", "message": "CORS configuration saved", "rules": validated_rules}) - - -@ui_bp.route("/buckets//acl", methods=["GET", "POST"]) -def bucket_acl(bucket_name: str): - principal = _current_principal() - action = "read" if request.method == "GET" else "write" - try: - _authorize_ui(principal, bucket_name, action) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - client = get_session_s3_client() - except (PermissionError, RuntimeError) as exc: - return jsonify({"error": str(exc)}), 403 - - owner_id = principal.access_key if principal else "anonymous" - - if request.method == "GET": - try: - resp = client.get_bucket_acl(Bucket=bucket_name) - owner = resp.get("Owner", {}).get("ID", owner_id) - grants = [] - for grant in resp.get("Grants", []): - grantee = grant.get("Grantee", {}) - grantee_display = grantee.get("DisplayName") or grantee.get("ID", "") - if not grantee_display: - uri = grantee.get("URI", "") - if "AllUsers" in uri: - grantee_display = "Everyone (public)" - elif "AuthenticatedUsers" in uri: - grantee_display = "Authenticated users" - else: - grantee_display = uri or "unknown" - grants.append({ - "grantee": grantee_display, - "permission": grant.get("Permission", ""), - }) - return jsonify({ - "owner": owner, - "grants": grants, - "canned_acls": list(CANNED_ACLS.keys()), - }) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - payload = request.get_json(silent=True) or {} - canned_acl = payload.get("canned_acl") - if canned_acl: - if canned_acl not in CANNED_ACLS: - return jsonify({"error": f"Invalid canned ACL: {canned_acl}"}), 400 - try: - client.put_bucket_acl(Bucket=bucket_name, ACL=canned_acl) - return jsonify({"status": "ok", "message": f"ACL set to {canned_acl}"}) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - return jsonify({"error": "canned_acl is required"}), 400 - - -@ui_bp.route("/buckets//objects//tags", methods=["GET", "POST"]) -def object_tags(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "read", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - client = get_session_s3_client() - except (PermissionError, RuntimeError) as exc: - return jsonify({"error": str(exc)}), 403 - - if request.method == "GET": - try: - resp = client.get_object_tagging(Bucket=bucket_name, Key=object_key) - tags = resp.get("TagSet", []) - return jsonify({"tags": tags}) - except ClientError as exc: - code = exc.response.get("Error", {}).get("Code", "") - if code == "NoSuchKey": - return jsonify({"error": "Object not found"}), 404 - err, status = handle_client_error(exc) - return jsonify(err), status - except (EndpointConnectionError, ConnectionClosedError) as exc: - return jsonify(*handle_connection_error(exc)) - - try: - _authorize_ui(principal, bucket_name, "write", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - payload = request.get_json(silent=True) or {} - tags = payload.get("tags", []) - if not isinstance(tags, list): - return jsonify({"error": "tags must be a list"}), 400 - tag_limit = current_app.config.get("OBJECT_TAG_LIMIT", 50) - if len(tags) > tag_limit: - return jsonify({"error": f"Maximum {tag_limit} tags allowed"}), 400 - - validated_tags = [] - for i, tag in enumerate(tags): - if not isinstance(tag, dict) or not tag.get("Key"): - return jsonify({"error": f"Tag at index {i} must have a Key field"}), 400 - validated_tags.append({ - "Key": str(tag["Key"]), - "Value": str(tag.get("Value", "")) - }) - - try: - if validated_tags: - client.put_object_tagging( - Bucket=bucket_name, - Key=object_key, - Tagging={"TagSet": validated_tags}, - ) - else: - client.delete_object_tagging(Bucket=bucket_name, Key=object_key) - return jsonify({"status": "ok", "message": "Tags saved", "tags": validated_tags}) - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.post("/buckets//folders") -def create_folder(bucket_name: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "write") - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - payload = request.get_json(silent=True) or {} - folder_name = str(payload.get("folder_name", "")).strip() - prefix = str(payload.get("prefix", "")).strip() - - if not folder_name: - return jsonify({"error": "folder_name is required"}), 400 - - folder_name = folder_name.rstrip("/") - if "/" in folder_name: - return jsonify({"error": "Folder name cannot contain /"}), 400 - if "\x00" in folder_name or "\x00" in prefix: - return jsonify({"error": "Null bytes not allowed"}), 400 - if ".." in prefix.split("/"): - return jsonify({"error": "Invalid prefix"}), 400 - - folder_key = f"{prefix}{folder_name}/" if prefix else f"{folder_name}/" - - max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024) - if len(folder_key.encode("utf-8")) > max_key_len: - return jsonify({"error": f"Key exceeds maximum length of {max_key_len} bytes"}), 400 - - try: - client = get_session_s3_client() - client.put_object(Bucket=bucket_name, Key=folder_key, Body=b"") - return jsonify({"status": "ok", "message": f"Folder '{folder_name}' created", "key": folder_key}) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.post("/buckets//objects//copy") -def copy_object(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "read", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - payload = request.get_json(silent=True) or {} - dest_bucket = str(payload.get("dest_bucket", bucket_name)).strip() - dest_key = str(payload.get("dest_key", "")).strip() - - if not dest_key: - return jsonify({"error": "dest_key is required"}), 400 - if "\x00" in dest_key: - return jsonify({"error": "Destination key cannot contain null bytes"}), 400 - max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024) - if len(dest_key.encode("utf-8")) > max_key_len: - return jsonify({"error": f"Destination key exceeds maximum length of {max_key_len} bytes"}), 400 - - try: - _authorize_ui(principal, dest_bucket, "write", object_key=dest_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - client = get_session_s3_client() - client.copy_object( - Bucket=dest_bucket, - Key=dest_key, - CopySource={"Bucket": bucket_name, "Key": object_key}, - ) - return jsonify({ - "status": "ok", - "message": f"Copied to {dest_bucket}/{dest_key}", - "dest_bucket": dest_bucket, - "dest_key": dest_key, - }) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - -@ui_bp.post("/buckets//objects//move") -def move_object(bucket_name: str, object_key: str): - principal = _current_principal() - try: - _authorize_ui(principal, bucket_name, "read", object_key=object_key) - _authorize_ui(principal, bucket_name, "delete", object_key=object_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - payload = request.get_json(silent=True) or {} - dest_bucket = str(payload.get("dest_bucket", bucket_name)).strip() - dest_key = str(payload.get("dest_key", "")).strip() - - if not dest_key: - return jsonify({"error": "dest_key is required"}), 400 - if "\x00" in dest_key: - return jsonify({"error": "Destination key cannot contain null bytes"}), 400 - max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024) - if len(dest_key.encode("utf-8")) > max_key_len: - return jsonify({"error": f"Destination key exceeds maximum length of {max_key_len} bytes"}), 400 - - if dest_bucket == bucket_name and dest_key == object_key: - return jsonify({"error": "Cannot move object to the same location"}), 400 - - try: - _authorize_ui(principal, dest_bucket, "write", object_key=dest_key) - except IamError as exc: - return jsonify({"error": str(exc)}), 403 - - try: - client = get_session_s3_client() - client.copy_object( - Bucket=dest_bucket, - Key=dest_key, - CopySource={"Bucket": bucket_name, "Key": object_key}, - ) - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc: - if isinstance(exc, ClientError): - err, status = handle_client_error(exc) - return jsonify(err), status - return jsonify(*handle_connection_error(exc)) - - try: - client.delete_object(Bucket=bucket_name, Key=object_key) - except (ClientError, EndpointConnectionError, ConnectionClosedError): - return jsonify({ - "status": "partial", - "message": f"Copied to {dest_bucket}/{dest_key} but failed to delete source", - "dest_bucket": dest_bucket, - "dest_key": dest_key, - }), 200 - - return jsonify({ - "status": "ok", - "message": f"Moved to {dest_bucket}/{dest_key}", - "dest_bucket": dest_bucket, - "dest_key": dest_key, - }) - - -@ui_bp.get("/buckets//list-for-copy") -def list_buckets_for_copy(bucket_name: str): - principal = _current_principal() - try: - client = get_session_s3_client() - resp = client.list_buckets() - except PermissionError as exc: - return jsonify({"error": str(exc)}), 401 - except ClientError as exc: - return jsonify(*handle_client_error(exc)) - except (EndpointConnectionError, ConnectionClosedError) as exc: - return jsonify(*handle_connection_error(exc)) - allowed = [] - for b in resp.get("Buckets", []): - try: - _authorize_ui(principal, b["Name"], "write") - allowed.append(b["Name"]) - except IamError: - pass - return jsonify({"buckets": allowed}) - - -@ui_bp.get("/sites") -def sites_dashboard(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - flash("Access denied: Site management requires admin permissions", "danger") - return redirect(url_for("ui.buckets_overview")) - - registry = _site_registry() - local_site = registry.get_local_site() - peers = registry.list_peers() - connections = _connections().list() - - replication = _replication() - all_rules = replication.list_rules() - - peers_with_stats = [] - for peer in peers: - buckets_syncing = 0 - has_bidirectional = False - if peer.connection_id: - for rule in all_rules: - if rule.target_connection_id == peer.connection_id: - buckets_syncing += 1 - if rule.mode == "bidirectional": - has_bidirectional = True - peers_with_stats.append({ - "peer": peer, - "buckets_syncing": buckets_syncing, - "has_connection": bool(peer.connection_id), - "has_bidirectional": has_bidirectional, - }) - - return render_template( - "sites.html", - principal=principal, - local_site=local_site, - peers=peers, - peers_with_stats=peers_with_stats, - connections=connections, - config_site_id=current_app.config.get("SITE_ID"), - config_site_endpoint=current_app.config.get("SITE_ENDPOINT"), - config_site_region=current_app.config.get("SITE_REGION", "us-east-1"), - ) - - -@ui_bp.post("/sites/local") -def update_local_site(): - principal = _current_principal() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - if wants_json: - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.sites_dashboard")) - - site_id = request.form.get("site_id", "").strip() - endpoint = request.form.get("endpoint", "").strip() - region = request.form.get("region", "us-east-1").strip() - priority = request.form.get("priority", "100") - display_name = request.form.get("display_name", "").strip() - - if not site_id: - if wants_json: - return jsonify({"error": "Site ID is required"}), 400 - flash("Site ID is required", "danger") - return redirect(url_for("ui.sites_dashboard")) - - try: - priority_int = int(priority) - except ValueError: - priority_int = 100 - - registry = _site_registry() - existing = registry.get_local_site() - - site = SiteInfo( - site_id=site_id, - endpoint=endpoint, - region=region, - priority=priority_int, - display_name=display_name or site_id, - created_at=existing.created_at if existing else None, - ) - registry.set_local_site(site) - - if wants_json: - return jsonify({"message": "Local site configuration updated"}) - flash("Local site configuration updated", "success") - return redirect(url_for("ui.sites_dashboard")) - - -@ui_bp.post("/sites/peers") -def add_peer_site(): - principal = _current_principal() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - if wants_json: - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.sites_dashboard")) - - site_id = request.form.get("site_id", "").strip() - endpoint = request.form.get("endpoint", "").strip() - region = request.form.get("region", "us-east-1").strip() - priority = request.form.get("priority", "100") - display_name = request.form.get("display_name", "").strip() - connection_id = request.form.get("connection_id", "").strip() or None - - if not site_id: - if wants_json: - return jsonify({"error": "Site ID is required"}), 400 - flash("Site ID is required", "danger") - return redirect(url_for("ui.sites_dashboard")) - if not endpoint: - if wants_json: - return jsonify({"error": "Endpoint is required"}), 400 - flash("Endpoint is required", "danger") - return redirect(url_for("ui.sites_dashboard")) - - try: - priority_int = int(priority) - except ValueError: - priority_int = 100 - - registry = _site_registry() - - if registry.get_peer(site_id): - if wants_json: - return jsonify({"error": f"Peer site '{site_id}' already exists"}), 409 - flash(f"Peer site '{site_id}' already exists", "danger") - return redirect(url_for("ui.sites_dashboard")) - - if connection_id and not _connections().get(connection_id): - if wants_json: - return jsonify({"error": f"Connection '{connection_id}' not found"}), 404 - flash(f"Connection '{connection_id}' not found", "danger") - return redirect(url_for("ui.sites_dashboard")) - - peer = PeerSite( - site_id=site_id, - endpoint=endpoint, - region=region, - priority=priority_int, - display_name=display_name or site_id, - connection_id=connection_id, - ) - registry.add_peer(peer) - - if wants_json: - redirect_url = None - if connection_id: - redirect_url = url_for("ui.replication_wizard", site_id=site_id) - return jsonify({"message": f"Peer site '{site_id}' added", "redirect": redirect_url}) - flash(f"Peer site '{site_id}' added", "success") - - if connection_id: - return redirect(url_for("ui.replication_wizard", site_id=site_id)) - return redirect(url_for("ui.sites_dashboard")) - - -@ui_bp.post("/sites/peers//update") -def update_peer_site(site_id: str): - principal = _current_principal() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - if wants_json: - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.sites_dashboard")) - - registry = _site_registry() - existing = registry.get_peer(site_id) - - if not existing: - if wants_json: - return jsonify({"error": f"Peer site '{site_id}' not found"}), 404 - flash(f"Peer site '{site_id}' not found", "danger") - return redirect(url_for("ui.sites_dashboard")) - - endpoint = request.form.get("endpoint", existing.endpoint).strip() - region = request.form.get("region", existing.region).strip() - priority = request.form.get("priority", str(existing.priority)) - display_name = request.form.get("display_name", existing.display_name).strip() - if "connection_id" in request.form: - connection_id = request.form["connection_id"].strip() or None - else: - connection_id = existing.connection_id - - try: - priority_int = int(priority) - except ValueError: - priority_int = existing.priority - - if connection_id and not _connections().get(connection_id): - if wants_json: - return jsonify({"error": f"Connection '{connection_id}' not found"}), 404 - flash(f"Connection '{connection_id}' not found", "danger") - return redirect(url_for("ui.sites_dashboard")) - - peer = PeerSite( - site_id=site_id, - endpoint=endpoint, - region=region, - priority=priority_int, - display_name=display_name or site_id, - connection_id=connection_id, - created_at=existing.created_at, - is_healthy=existing.is_healthy, - last_health_check=existing.last_health_check, - ) - registry.update_peer(peer) - - if wants_json: - return jsonify({"message": f"Peer site '{site_id}' updated"}) - flash(f"Peer site '{site_id}' updated", "success") - return redirect(url_for("ui.sites_dashboard")) - - -@ui_bp.post("/sites/peers//delete") -def delete_peer_site(site_id: str): - principal = _current_principal() - wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - if wants_json: - return jsonify({"error": "Access denied"}), 403 - flash("Access denied", "danger") - return redirect(url_for("ui.sites_dashboard")) - - registry = _site_registry() - if registry.delete_peer(site_id): - if wants_json: - return jsonify({"message": f"Peer site '{site_id}' deleted"}) - flash(f"Peer site '{site_id}' deleted", "success") - else: - if wants_json: - return jsonify({"error": f"Peer site '{site_id}' not found"}), 404 - flash(f"Peer site '{site_id}' not found", "danger") - - return redirect(url_for("ui.sites_dashboard")) - - -@ui_bp.get("/sites/peers//health") -def check_peer_site_health(site_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - registry = _site_registry() - peer = registry.get_peer(site_id) - - if not peer: - return jsonify({"error": f"Peer site '{site_id}' not found"}), 404 - - is_healthy = False - error_message = None - - if peer.connection_id: - connection = _connections().get(peer.connection_id) - if connection: - is_healthy = _replication().check_endpoint_health(connection) - else: - error_message = f"Connection '{peer.connection_id}' not found" - else: - error_message = "No connection configured for this peer" - - registry.update_health(site_id, is_healthy) - - result = { - "site_id": site_id, - "is_healthy": is_healthy, - } - if error_message: - result["error"] = error_message - - return jsonify(result) - - -@ui_bp.get("/sites/peers//bidirectional-status") -def check_peer_bidirectional_status(site_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - registry = _site_registry() - peer = registry.get_peer(site_id) - - if not peer: - return jsonify({"error": f"Peer site '{site_id}' not found"}), 404 - - local_site = registry.get_local_site() - replication = _replication() - local_rules = replication.list_rules() - - local_bidir_rules = [] - for rule in local_rules: - if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional": - local_bidir_rules.append({ - "bucket_name": rule.bucket_name, - "target_bucket": rule.target_bucket, - "enabled": rule.enabled, - }) - - result = { - "site_id": site_id, - "local_site_id": local_site.site_id if local_site else None, - "local_endpoint": local_site.endpoint if local_site else None, - "local_bidirectional_rules": local_bidir_rules, - "local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False), - "remote_status": None, - "issues": [], - "is_fully_configured": False, - } - - if not local_site or not local_site.site_id: - result["issues"].append({ - "code": "NO_LOCAL_SITE_ID", - "message": "Local site identity not configured", - "severity": "error", - }) - - if not local_site or not local_site.endpoint: - result["issues"].append({ - "code": "NO_LOCAL_ENDPOINT", - "message": "Local site endpoint not configured (remote site cannot reach back)", - "severity": "error", - }) - - if not peer.connection_id: - result["issues"].append({ - "code": "NO_CONNECTION", - "message": "No connection configured for this peer", - "severity": "error", - }) - return jsonify(result) - - connection = _connections().get(peer.connection_id) - if not connection: - result["issues"].append({ - "code": "CONNECTION_NOT_FOUND", - "message": f"Connection '{peer.connection_id}' not found", - "severity": "error", - }) - return jsonify(result) - - if not local_bidir_rules: - result["issues"].append({ - "code": "NO_LOCAL_BIDIRECTIONAL_RULES", - "message": "No bidirectional replication rules configured on this site", - "severity": "warning", - }) - - if not result["local_site_sync_enabled"]: - result["issues"].append({ - "code": "SITE_SYNC_DISABLED", - "message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.", - "severity": "warning", - }) - - if not replication.check_endpoint_health(connection): - result["issues"].append({ - "code": "REMOTE_UNREACHABLE", - "message": "Remote endpoint is not reachable", - "severity": "error", - }) - return jsonify(result) - - try: - parsed = urlparse(peer.endpoint) - hostname = parsed.hostname or "" - import ipaddress - cloud_metadata_hosts = {"metadata.google.internal", "169.254.169.254"} - if hostname.lower() in cloud_metadata_hosts: - result["issues"].append({ - "code": "ENDPOINT_NOT_ALLOWED", - "message": "Peer endpoint points to cloud metadata service (SSRF protection)", - "severity": "error", - }) - return jsonify(result) - allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False) - if not allow_internal: - try: - ip = ipaddress.ip_address(hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved or ip.is_link_local: - result["issues"].append({ - "code": "ENDPOINT_NOT_ALLOWED", - "message": "Peer endpoint points to internal or private address (set ALLOW_INTERNAL_ENDPOINTS=true for self-hosted deployments)", - "severity": "error", - }) - return jsonify(result) - except ValueError: - blocked_patterns = ["localhost", "127.", "10.", "192.168.", "172.16."] - if any(hostname.startswith(p) or hostname == p.rstrip(".") for p in blocked_patterns): - result["issues"].append({ - "code": "ENDPOINT_NOT_ALLOWED", - "message": "Peer endpoint points to internal or private address (set ALLOW_INTERNAL_ENDPOINTS=true for self-hosted deployments)", - "severity": "error", - }) - return jsonify(result) - except Exception: - pass - - try: - admin_url = peer.endpoint.rstrip("/") + "/admin/sites" - resp = requests.get( - admin_url, - timeout=10, - headers={ - "Accept": "application/json", - "X-Access-Key": connection.access_key, - "X-Secret-Key": connection.secret_key, - }, - ) - - if resp.status_code == 200: - try: - remote_data = resp.json() - if not isinstance(remote_data, dict): - raise ValueError("Expected JSON object") - remote_local = remote_data.get("local") - if remote_local is not None and not isinstance(remote_local, dict): - raise ValueError("Expected 'local' to be an object") - remote_peers = remote_data.get("peers", []) - if not isinstance(remote_peers, list): - raise ValueError("Expected 'peers' to be a list") - except (ValueError, json.JSONDecodeError) as e: - result["remote_status"] = {"reachable": True, "invalid_response": True} - result["issues"].append({ - "code": "REMOTE_INVALID_RESPONSE", - "message": "Remote admin API returned invalid JSON", - "severity": "warning", - }) - return jsonify(result) - - result["remote_status"] = { - "reachable": True, - "local_site": remote_local, - "site_sync_enabled": None, - "has_peer_for_us": False, - "peer_connection_configured": False, - "has_bidirectional_rules_for_us": False, - } - - for rp in remote_peers: - if not isinstance(rp, dict): - continue - if local_site and ( - rp.get("site_id") == local_site.site_id or - rp.get("endpoint") == local_site.endpoint - ): - result["remote_status"]["has_peer_for_us"] = True - result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id")) - break - - if not result["remote_status"]["has_peer_for_us"]: - result["issues"].append({ - "code": "REMOTE_NO_PEER_FOR_US", - "message": "Remote site does not have this site registered as a peer", - "severity": "error", - }) - elif not result["remote_status"]["peer_connection_configured"]: - result["issues"].append({ - "code": "REMOTE_NO_CONNECTION_FOR_US", - "message": "Remote site has us as peer but no connection configured (cannot push back)", - "severity": "error", - }) - elif resp.status_code == 401 or resp.status_code == 403: - result["remote_status"] = { - "reachable": True, - "admin_access_denied": True, - } - result["issues"].append({ - "code": "REMOTE_ADMIN_ACCESS_DENIED", - "message": "Cannot verify remote configuration (admin access denied)", - "severity": "warning", - }) - else: - result["remote_status"] = { - "reachable": True, - "admin_api_error": resp.status_code, - } - result["issues"].append({ - "code": "REMOTE_ADMIN_API_ERROR", - "message": f"Remote admin API returned status {resp.status_code}", - "severity": "warning", - }) - except requests.RequestException: - result["remote_status"] = { - "reachable": False, - "error": "Connection failed", - } - result["issues"].append({ - "code": "REMOTE_ADMIN_UNREACHABLE", - "message": "Could not reach remote admin API", - "severity": "warning", - }) - except Exception: - result["issues"].append({ - "code": "VERIFICATION_ERROR", - "message": "Internal error during verification", - "severity": "warning", - }) - - error_issues = [i for i in result["issues"] if i["severity"] == "error"] - result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0 - - return jsonify(result) - - -@ui_bp.get("/sites/peers//replication-wizard") -def replication_wizard(site_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - flash("Access denied", "danger") - return redirect(url_for("ui.sites_dashboard")) - - registry = _site_registry() - peer = registry.get_peer(site_id) - if not peer: - flash(f"Peer site '{site_id}' not found", "danger") - return redirect(url_for("ui.sites_dashboard")) - - if not peer.connection_id: - flash("This peer has no connection configured. Add a connection first to set up replication.", "warning") - return redirect(url_for("ui.sites_dashboard")) - - connection = _connections().get(peer.connection_id) - if not connection: - flash(f"Connection '{peer.connection_id}' not found", "danger") - return redirect(url_for("ui.sites_dashboard")) - - buckets = _storage().list_buckets() - replication = _replication() - - bucket_info = [] - for bucket in buckets: - existing_rule = replication.get_rule(bucket.name) - has_rule_for_peer = ( - existing_rule and - existing_rule.target_connection_id == peer.connection_id - ) - bucket_info.append({ - "name": bucket.name, - "has_rule": has_rule_for_peer, - "existing_mode": existing_rule.mode if has_rule_for_peer else None, - "existing_target": existing_rule.target_bucket if has_rule_for_peer else None, - }) - - local_site = registry.get_local_site() - - return render_template( - "replication_wizard.html", - principal=principal, - peer=peer, - connection=connection, - buckets=bucket_info, - local_site=local_site, - csrf_token=generate_csrf, - ) - - -@ui_bp.post("/sites/peers//replication-rules") -def create_peer_replication_rules(site_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - flash("Access denied", "danger") - return redirect(url_for("ui.sites_dashboard")) - - registry = _site_registry() - peer = registry.get_peer(site_id) - if not peer or not peer.connection_id: - flash("Invalid peer site or no connection configured", "danger") - return redirect(url_for("ui.sites_dashboard")) - - from .replication import REPLICATION_MODE_NEW_ONLY, REPLICATION_MODE_ALL - import time as time_module - - selected_buckets = request.form.getlist("buckets") - mode = request.form.get("mode", REPLICATION_MODE_NEW_ONLY) - - if not selected_buckets: - flash("No buckets selected", "warning") - return redirect(url_for("ui.sites_dashboard")) - - created = 0 - failed = 0 - replication = _replication() - - for bucket_name in selected_buckets: - target_bucket = request.form.get(f"target_{bucket_name}", bucket_name).strip() - if not target_bucket: - target_bucket = bucket_name - - try: - rule = ReplicationRule( - bucket_name=bucket_name, - target_connection_id=peer.connection_id, - target_bucket=target_bucket, - enabled=True, - mode=mode, - created_at=time_module.time(), - ) - replication.set_rule(rule) - - if mode == REPLICATION_MODE_ALL: - replication.replicate_existing_objects(bucket_name) - - created += 1 - except Exception: - failed += 1 - - if created > 0: - flash(f"Created {created} replication rule(s) for {peer.display_name or peer.site_id}", "success") - if failed > 0: - flash(f"Failed to create {failed} rule(s)", "danger") - - return redirect(url_for("ui.sites_dashboard")) - - -@ui_bp.get("/sites/peers//sync-stats") -def get_peer_sync_stats(site_id: str): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - registry = _site_registry() - peer = registry.get_peer(site_id) - if not peer: - return jsonify({"error": "Peer not found"}), 404 - - if not peer.connection_id: - return jsonify({"error": "No connection configured"}), 400 - - replication = _replication() - all_rules = replication.list_rules() - - stats = { - "buckets_syncing": 0, - "objects_synced": 0, - "objects_pending": 0, - "objects_failed": 0, - "bytes_synced": 0, - "last_sync_at": None, - "buckets": [], - } - - for rule in all_rules: - if rule.target_connection_id != peer.connection_id: - continue - - stats["buckets_syncing"] += 1 - - bucket_stats = { - "bucket_name": rule.bucket_name, - "target_bucket": rule.target_bucket, - "mode": rule.mode, - "enabled": rule.enabled, - } - - if rule.stats: - stats["objects_synced"] += rule.stats.objects_synced - stats["objects_pending"] += rule.stats.objects_pending - stats["bytes_synced"] += rule.stats.bytes_synced - - if rule.stats.last_sync_at: - if not stats["last_sync_at"] or rule.stats.last_sync_at > stats["last_sync_at"]: - stats["last_sync_at"] = rule.stats.last_sync_at - - bucket_stats["last_sync_at"] = rule.stats.last_sync_at - bucket_stats["objects_synced"] = rule.stats.objects_synced - bucket_stats["objects_pending"] = rule.stats.objects_pending - - failure_count = replication.get_failure_count(rule.bucket_name) - stats["objects_failed"] += failure_count - bucket_stats["failures"] = failure_count - - stats["buckets"].append(bucket_stats) - - return jsonify(stats) - - -@ui_bp.get("/system") -def system_dashboard(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - flash("Access denied: System page requires admin permissions", "danger") - return redirect(url_for("ui.buckets_overview")) - - import platform as _platform - import sys - from app.version import APP_VERSION - - try: - import myfsio_core as _rc - has_rust = True - except ImportError: - has_rust = False - - gc = current_app.extensions.get("gc") - gc_status = gc.get_status() if gc else {"enabled": False} - gc_history_records = [] - if gc: - raw = gc.get_history(limit=10, offset=0) - for rec in raw: - r = rec.get("result", {}) - total_freed = r.get("temp_bytes_freed", 0) + r.get("multipart_bytes_freed", 0) + r.get("orphaned_version_bytes_freed", 0) - rec["bytes_freed_display"] = _format_bytes(total_freed) - rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc)) - gc_history_records.append(rec) - - checker = current_app.extensions.get("integrity") - integrity_status = checker.get_status() if checker else {"enabled": False} - integrity_history_records = [] - if checker: - raw = checker.get_history(limit=10, offset=0) - for rec in raw: - rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc)) - integrity_history_records.append(rec) - - features = [ - {"label": "Encryption (SSE-S3)", "enabled": current_app.config.get("ENCRYPTION_ENABLED", False)}, - {"label": "KMS", "enabled": current_app.config.get("KMS_ENABLED", False)}, - {"label": "Versioning Lifecycle", "enabled": current_app.config.get("LIFECYCLE_ENABLED", False)}, - {"label": "Metrics History", "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False)}, - {"label": "Operation Metrics", "enabled": current_app.config.get("OPERATION_METRICS_ENABLED", False)}, - {"label": "Site Sync", "enabled": current_app.config.get("SITE_SYNC_ENABLED", False)}, - {"label": "Website Hosting", "enabled": current_app.config.get("WEBSITE_HOSTING_ENABLED", False)}, - {"label": "Garbage Collection", "enabled": current_app.config.get("GC_ENABLED", False)}, - {"label": "Integrity Scanner", "enabled": current_app.config.get("INTEGRITY_ENABLED", False)}, - ] - - return render_template( - "system.html", - principal=principal, - app_version=APP_VERSION, - storage_root=current_app.config.get("STORAGE_ROOT", "./data"), - platform=_platform.platform(), - python_version=sys.version.split()[0], - has_rust=has_rust, - features=features, - gc_status=gc_status, - gc_history=gc_history_records, - integrity_status=integrity_status, - integrity_history=integrity_history_records, - display_timezone=current_app.config.get("DISPLAY_TIMEZONE", "UTC"), - ) - - -@ui_bp.post("/system/gc/run") -def system_gc_run(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - gc = current_app.extensions.get("gc") - if not gc: - return jsonify({"error": "GC is not enabled"}), 400 - - payload = request.get_json(silent=True) or {} - started = gc.run_async(dry_run=payload.get("dry_run")) - if not started: - return jsonify({"error": "GC is already in progress"}), 409 - return jsonify({"status": "started"}) - - -@ui_bp.get("/system/gc/status") -def system_gc_status(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - gc = current_app.extensions.get("gc") - if not gc: - return jsonify({"error": "GC is not enabled"}), 400 - - return jsonify(gc.get_status()) - - -@ui_bp.get("/system/gc/history") -def system_gc_history(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - gc = current_app.extensions.get("gc") - if not gc: - return jsonify({"executions": []}) - - limit = min(int(request.args.get("limit", 10)), 200) - offset = int(request.args.get("offset", 0)) - records = gc.get_history(limit=limit, offset=offset) - return jsonify({"executions": records}) - - -@ui_bp.post("/system/integrity/run") -def system_integrity_run(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - checker = current_app.extensions.get("integrity") - if not checker: - return jsonify({"error": "Integrity checker is not enabled"}), 400 - - payload = request.get_json(silent=True) or {} - started = checker.run_async( - auto_heal=payload.get("auto_heal"), - dry_run=payload.get("dry_run"), - ) - if not started: - return jsonify({"error": "A scan is already in progress"}), 409 - return jsonify({"status": "started"}) - - -@ui_bp.get("/system/integrity/status") -def system_integrity_status(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - checker = current_app.extensions.get("integrity") - if not checker: - return jsonify({"error": "Integrity checker is not enabled"}), 400 - - return jsonify(checker.get_status()) - - -@ui_bp.get("/system/integrity/history") -def system_integrity_history(): - principal = _current_principal() - try: - _iam().authorize(principal, None, "iam:*") - except IamError: - return jsonify({"error": "Access denied"}), 403 - - checker = current_app.extensions.get("integrity") - if not checker: - return jsonify({"executions": []}) - - limit = min(int(request.args.get("limit", 10)), 200) - offset = int(request.args.get("offset", 0)) - records = checker.get_history(limit=limit, offset=offset) - return jsonify({"executions": records}) - - -@ui_bp.app_errorhandler(404) -def ui_not_found(error): # type: ignore[override] - prefix = ui_bp.url_prefix or "" - path = request.path or "" - wants_html = request.accept_mimetypes.accept_html - if wants_html and (not prefix or path.startswith(prefix)): - return render_template("404.html"), 404 - return error diff --git a/app/version.py b/app/version.py deleted file mode 100644 index 0abc36a..0000000 --- a/app/version.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import annotations - -APP_VERSION = "0.4.2" - - -def get_version() -> str: - """Return the current application version.""" - return APP_VERSION diff --git a/app/website_domains.py b/app/website_domains.py deleted file mode 100644 index 7ec33f7..0000000 --- a/app/website_domains.py +++ /dev/null @@ -1,108 +0,0 @@ -from __future__ import annotations - -import json -import re -import threading -from pathlib import Path -from typing import Dict, List, Optional - -_DOMAIN_RE = re.compile( - r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$" -) - - -def normalize_domain(raw: str) -> str: - raw = raw.strip().lower() - for prefix in ("https://", "http://"): - if raw.startswith(prefix): - raw = raw[len(prefix):] - raw = raw.split("/", 1)[0] - raw = raw.split("?", 1)[0] - raw = raw.split("#", 1)[0] - if ":" in raw: - raw = raw.rsplit(":", 1)[0] - return raw - - -def is_valid_domain(domain: str) -> bool: - if not domain or len(domain) > 253: - return False - return bool(_DOMAIN_RE.match(domain)) - - -class WebsiteDomainStore: - def __init__(self, config_path: Path) -> None: - self.config_path = config_path - self._lock = threading.Lock() - self._domains: Dict[str, str] = {} - self._last_mtime: float = 0.0 - self.reload() - - def reload(self) -> None: - if not self.config_path.exists(): - self._domains = {} - self._last_mtime = 0.0 - return - try: - self._last_mtime = self.config_path.stat().st_mtime - with open(self.config_path, "r", encoding="utf-8") as f: - data = json.load(f) - if isinstance(data, dict): - self._domains = {k.lower(): v for k, v in data.items()} - else: - self._domains = {} - except (OSError, json.JSONDecodeError): - self._domains = {} - - def _maybe_reload(self) -> None: - try: - if self.config_path.exists(): - mtime = self.config_path.stat().st_mtime - if mtime != self._last_mtime: - self._last_mtime = mtime - with open(self.config_path, "r", encoding="utf-8") as f: - data = json.load(f) - if isinstance(data, dict): - self._domains = {k.lower(): v for k, v in data.items()} - else: - self._domains = {} - elif self._domains: - self._domains = {} - self._last_mtime = 0.0 - except (OSError, json.JSONDecodeError): - pass - - def _save(self) -> None: - self.config_path.parent.mkdir(parents=True, exist_ok=True) - with open(self.config_path, "w", encoding="utf-8") as f: - json.dump(self._domains, f, indent=2) - self._last_mtime = self.config_path.stat().st_mtime - - def list_all(self) -> List[Dict[str, str]]: - with self._lock: - self._maybe_reload() - return [{"domain": d, "bucket": b} for d, b in self._domains.items()] - - def get_bucket(self, domain: str) -> Optional[str]: - with self._lock: - self._maybe_reload() - return self._domains.get(domain.lower()) - - def get_domains_for_bucket(self, bucket: str) -> List[str]: - with self._lock: - self._maybe_reload() - return [d for d, b in self._domains.items() if b == bucket] - - def set_mapping(self, domain: str, bucket: str) -> None: - with self._lock: - self._domains[domain.lower()] = bucket - self._save() - - def delete_mapping(self, domain: str) -> bool: - with self._lock: - key = domain.lower() - if key not in self._domains: - return False - del self._domains[key] - self._save() - return True diff --git a/crates/myfsio-auth/Cargo.toml b/crates/myfsio-auth/Cargo.toml new file mode 100644 index 0000000..7622bf9 --- /dev/null +++ b/crates/myfsio-auth/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "myfsio-auth" +version.workspace = true +edition.workspace = true + +[dependencies] +myfsio-common = { path = "../myfsio-common" } +hmac = { workspace = true } +sha2 = { workspace = true } +hex = { workspace = true } +aes = { workspace = true } +cbc = { workspace = true } +base64 = { workspace = true } +pbkdf2 = "0.12" +rand = "0.8" +lru = { workspace = true } +parking_lot = { workspace = true } +percent-encoding = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +chrono = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } + +[dev-dependencies] +tempfile = "3" diff --git a/crates/myfsio-auth/src/fernet.rs b/crates/myfsio-auth/src/fernet.rs new file mode 100644 index 0000000..1b2e129 --- /dev/null +++ b/crates/myfsio-auth/src/fernet.rs @@ -0,0 +1,118 @@ +use aes::cipher::{block_padding::Pkcs7, BlockDecryptMut, BlockEncryptMut, KeyIvInit}; +use base64::{engine::general_purpose::URL_SAFE, Engine}; +use hmac::{Hmac, Mac}; +use rand::RngCore; +use sha2::Sha256; + +type Aes128CbcDec = cbc::Decryptor; +type Aes128CbcEnc = cbc::Encryptor; +type HmacSha256 = Hmac; + +pub fn derive_fernet_key(secret: &str) -> String { + let mut derived = [0u8; 32]; + pbkdf2::pbkdf2_hmac::( + secret.as_bytes(), + b"myfsio-iam-encryption", + 100_000, + &mut derived, + ); + URL_SAFE.encode(derived) +} + +pub fn decrypt(key_b64: &str, token: &str) -> Result, &'static str> { + let key_bytes = URL_SAFE + .decode(key_b64) + .map_err(|_| "invalid fernet key base64")?; + if key_bytes.len() != 32 { + return Err("fernet key must be 32 bytes"); + } + + let signing_key = &key_bytes[..16]; + let encryption_key = &key_bytes[16..]; + + let token_bytes = URL_SAFE + .decode(token) + .map_err(|_| "invalid fernet token base64")?; + + if token_bytes.len() < 57 { + return Err("fernet token too short"); + } + + if token_bytes[0] != 0x80 { + return Err("invalid fernet version"); + } + + let hmac_offset = token_bytes.len() - 32; + let payload = &token_bytes[..hmac_offset]; + let expected_hmac = &token_bytes[hmac_offset..]; + + let mut mac = HmacSha256::new_from_slice(signing_key).map_err(|_| "hmac key error")?; + mac.update(payload); + mac.verify_slice(expected_hmac) + .map_err(|_| "HMAC verification failed")?; + + let iv = &token_bytes[9..25]; + let ciphertext = &token_bytes[25..hmac_offset]; + + let plaintext = Aes128CbcDec::new(encryption_key.into(), iv.into()) + .decrypt_padded_vec_mut::(ciphertext) + .map_err(|_| "AES-CBC decryption failed")?; + + Ok(plaintext) +} + +pub fn encrypt(key_b64: &str, plaintext: &[u8]) -> Result { + let key_bytes = URL_SAFE + .decode(key_b64) + .map_err(|_| "invalid fernet key base64")?; + if key_bytes.len() != 32 { + return Err("fernet key must be 32 bytes"); + } + + let signing_key = &key_bytes[..16]; + let encryption_key = &key_bytes[16..]; + + let mut iv = [0u8; 16]; + rand::thread_rng().fill_bytes(&mut iv); + + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map_err(|_| "system time error")? + .as_secs(); + + let ciphertext = Aes128CbcEnc::new(encryption_key.into(), (&iv).into()) + .encrypt_padded_vec_mut::(plaintext); + + let mut payload = Vec::with_capacity(1 + 8 + 16 + ciphertext.len()); + payload.push(0x80); + payload.extend_from_slice(×tamp.to_be_bytes()); + payload.extend_from_slice(&iv); + payload.extend_from_slice(&ciphertext); + + let mut mac = HmacSha256::new_from_slice(signing_key).map_err(|_| "hmac key error")?; + mac.update(&payload); + let tag = mac.finalize().into_bytes(); + + let mut token_bytes = payload; + token_bytes.extend_from_slice(&tag); + Ok(URL_SAFE.encode(&token_bytes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_derive_fernet_key_format() { + let key = derive_fernet_key("test-secret"); + let decoded = URL_SAFE.decode(&key).unwrap(); + assert_eq!(decoded.len(), 32); + } + + #[test] + fn test_roundtrip_with_python_compat() { + let key = derive_fernet_key("dev-secret-key"); + let decoded = URL_SAFE.decode(&key).unwrap(); + assert_eq!(decoded.len(), 32); + } +} diff --git a/crates/myfsio-auth/src/iam.rs b/crates/myfsio-auth/src/iam.rs new file mode 100644 index 0000000..a7f45b9 --- /dev/null +++ b/crates/myfsio-auth/src/iam.rs @@ -0,0 +1,1015 @@ +use chrono::{DateTime, Utc}; +use myfsio_common::types::Principal; +use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Instant, SystemTime}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct IamConfig { + #[serde(default = "default_version")] + pub version: u32, + #[serde(default)] + pub users: Vec, +} + +fn default_version() -> u32 { + 2 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct IamUser { + pub user_id: String, + pub display_name: String, + #[serde(default = "default_enabled")] + pub enabled: bool, + #[serde(default)] + pub expires_at: Option, + #[serde(default)] + pub access_keys: Vec, + #[serde(default)] + pub policies: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +struct RawIamConfig { + #[serde(default)] + pub users: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +struct RawIamUser { + pub user_id: Option, + pub display_name: Option, + #[serde(default = "default_enabled")] + pub enabled: bool, + #[serde(default)] + pub expires_at: Option, + pub access_key: Option, + pub secret_key: Option, + #[serde(default)] + pub access_keys: Vec, + #[serde(default)] + pub policies: Vec, +} + +impl RawIamUser { + fn normalize(self) -> IamUser { + let mut access_keys = self.access_keys; + if access_keys.is_empty() { + if let (Some(ak), Some(sk)) = (self.access_key, self.secret_key) { + access_keys.push(AccessKey { + access_key: ak, + secret_key: sk, + status: "active".to_string(), + created_at: None, + }); + } + } + let display_name = self.display_name.unwrap_or_else(|| { + access_keys + .first() + .map(|k| k.access_key.clone()) + .unwrap_or_else(|| "unknown".to_string()) + }); + let user_id = self.user_id.unwrap_or_else(|| { + format!("u-{}", display_name.to_ascii_lowercase().replace(' ', "-")) + }); + IamUser { + user_id, + display_name, + enabled: self.enabled, + expires_at: self.expires_at, + access_keys, + policies: self.policies, + } + } +} + +fn default_enabled() -> bool { + true +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AccessKey { + pub access_key: String, + pub secret_key: String, + #[serde(default = "default_status")] + pub status: String, + #[serde(default)] + pub created_at: Option, +} + +fn default_status() -> String { + "active".to_string() +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct IamPolicy { + pub bucket: String, + pub actions: Vec, + #[serde(default = "default_prefix")] + pub prefix: String, +} + +fn default_prefix() -> String { + "*".to_string() +} + +struct IamState { + key_secrets: HashMap, + key_index: HashMap, + key_status: HashMap, + user_records: HashMap, + file_mtime: Option, + last_check: Instant, +} + +pub struct IamService { + config_path: PathBuf, + state: Arc>, + check_interval: std::time::Duration, + fernet_key: Option, +} + +impl IamService { + pub fn new(config_path: PathBuf) -> Self { + Self::new_with_secret(config_path, None) + } + + pub fn new_with_secret(config_path: PathBuf, secret_key: Option) -> Self { + let fernet_key = secret_key.map(|s| crate::fernet::derive_fernet_key(&s)); + let service = Self { + config_path, + state: Arc::new(RwLock::new(IamState { + key_secrets: HashMap::new(), + key_index: HashMap::new(), + key_status: HashMap::new(), + user_records: HashMap::new(), + file_mtime: None, + last_check: Instant::now(), + })), + check_interval: std::time::Duration::from_secs(2), + fernet_key, + }; + service.reload(); + service + } + + fn reload_if_needed(&self) { + { + let state = self.state.read(); + if state.last_check.elapsed() < self.check_interval { + return; + } + } + + let current_mtime = std::fs::metadata(&self.config_path) + .and_then(|m| m.modified()) + .ok(); + + let needs_reload = { + let state = self.state.read(); + match (&state.file_mtime, ¤t_mtime) { + (None, Some(_)) => true, + (Some(old), Some(new)) => old != new, + (Some(_), None) => true, + (None, None) => false, + } + }; + + if needs_reload { + self.reload(); + } + + self.state.write().last_check = Instant::now(); + } + + fn reload(&self) { + let content = match std::fs::read_to_string(&self.config_path) { + Ok(c) => c, + Err(e) => { + tracing::warn!( + "Failed to read IAM config {}: {}", + self.config_path.display(), + e + ); + return; + } + }; + + let raw = if content.starts_with("MYFSIO_IAM_ENC:") { + let encrypted_token = &content["MYFSIO_IAM_ENC:".len()..]; + match &self.fernet_key { + Some(key) => match crate::fernet::decrypt(key, encrypted_token.trim()) { + Ok(plaintext) => match String::from_utf8(plaintext) { + Ok(s) => s, + Err(e) => { + tracing::error!("Decrypted IAM config is not valid UTF-8: {}", e); + return; + } + }, + Err(e) => { + tracing::error!( + "Failed to decrypt IAM config: {}. SECRET_KEY may have changed.", + e + ); + return; + } + }, + None => { + tracing::error!("IAM config is encrypted but no SECRET_KEY configured"); + return; + } + } + } else { + content + }; + + let raw_config: RawIamConfig = match serde_json::from_str(&raw) { + Ok(c) => c, + Err(e) => { + tracing::error!("Failed to parse IAM config: {}", e); + return; + } + }; + + let users: Vec = raw_config + .users + .into_iter() + .map(|u| u.normalize()) + .collect(); + + let mut key_secrets = HashMap::new(); + let mut key_index = HashMap::new(); + let mut key_status = HashMap::new(); + let mut user_records = HashMap::new(); + + for user in &users { + user_records.insert(user.user_id.clone(), user.clone()); + for ak in &user.access_keys { + key_secrets.insert(ak.access_key.clone(), ak.secret_key.clone()); + key_index.insert(ak.access_key.clone(), user.user_id.clone()); + key_status.insert(ak.access_key.clone(), ak.status.clone()); + } + } + + let file_mtime = std::fs::metadata(&self.config_path) + .and_then(|m| m.modified()) + .ok(); + + let mut state = self.state.write(); + state.key_secrets = key_secrets; + state.key_index = key_index; + state.key_status = key_status; + state.user_records = user_records; + state.file_mtime = file_mtime; + state.last_check = Instant::now(); + + tracing::info!( + "IAM config reloaded: {} users, {} keys", + users.len(), + state.key_secrets.len() + ); + } + + pub fn get_secret_key(&self, access_key: &str) -> Option { + self.reload_if_needed(); + let state = self.state.read(); + + let status = state.key_status.get(access_key)?; + if status != "active" { + return None; + } + + let user_id = state.key_index.get(access_key)?; + let user = state.user_records.get(user_id)?; + if !user.enabled { + return None; + } + + if let Some(ref expires_at) = user.expires_at { + if let Ok(exp) = expires_at.parse::>() { + if Utc::now() > exp { + return None; + } + } + } + + state.key_secrets.get(access_key).cloned() + } + + pub fn get_principal(&self, access_key: &str) -> Option { + self.reload_if_needed(); + let state = self.state.read(); + + let status = state.key_status.get(access_key)?; + if status != "active" { + return None; + } + + let user_id = state.key_index.get(access_key)?; + let user = state.user_records.get(user_id)?; + if !user.enabled { + return None; + } + + if let Some(ref expires_at) = user.expires_at { + if let Ok(exp) = expires_at.parse::>() { + if Utc::now() > exp { + return None; + } + } + } + + let is_admin = user + .policies + .iter() + .any(|p| p.bucket == "*" && p.actions.iter().any(|a| a == "*")); + + Some(Principal::new( + access_key.to_string(), + user.user_id.clone(), + user.display_name.clone(), + is_admin, + )) + } + + pub fn authenticate(&self, access_key: &str, secret_key: &str) -> Option { + let stored_secret = self.get_secret_key(access_key)?; + if !crate::sigv4::constant_time_compare(&stored_secret, secret_key) { + return None; + } + self.get_principal(access_key) + } + + pub fn authorize( + &self, + principal: &Principal, + bucket_name: Option<&str>, + action: &str, + object_key: Option<&str>, + ) -> bool { + self.reload_if_needed(); + + if principal.is_admin { + return true; + } + + let normalized_bucket = bucket_name.unwrap_or("*").trim().to_ascii_lowercase(); + let normalized_action = action.trim().to_ascii_lowercase(); + + let state = self.state.read(); + let user = match state.user_records.get(&principal.user_id) { + Some(u) => u, + None => return false, + }; + + if !user.enabled { + return false; + } + + if let Some(ref expires_at) = user.expires_at { + if let Ok(exp) = expires_at.parse::>() { + if Utc::now() > exp { + return false; + } + } + } + + for policy in &user.policies { + if !bucket_matches(&policy.bucket, &normalized_bucket) { + continue; + } + if !action_matches(&policy.actions, &normalized_action) { + continue; + } + if let Some(key) = object_key { + if !prefix_matches(&policy.prefix, key) { + continue; + } + } + return true; + } + + false + } + + pub fn export_config(&self, mask_secrets: bool) -> serde_json::Value { + self.reload_if_needed(); + let state = self.state.read(); + let users: Vec = state + .user_records + .values() + .map(|u| { + let access_keys: Vec = u + .access_keys + .iter() + .map(|k| { + let secret = if mask_secrets { + "***".to_string() + } else { + k.secret_key.clone() + }; + serde_json::json!({ + "access_key": k.access_key, + "secret_key": secret, + "status": k.status, + "created_at": k.created_at, + }) + }) + .collect(); + serde_json::json!({ + "user_id": u.user_id, + "display_name": u.display_name, + "enabled": u.enabled, + "expires_at": u.expires_at, + "access_keys": access_keys, + "policies": u.policies, + }) + }) + .collect(); + serde_json::json!({ + "version": 2, + "users": users, + }) + } + + pub async fn list_users(&self) -> Vec { + self.reload_if_needed(); + let state = self.state.read(); + state + .user_records + .values() + .map(|u| { + serde_json::json!({ + "user_id": u.user_id, + "display_name": u.display_name, + "enabled": u.enabled, + "access_keys": u.access_keys.iter().map(|k| { + serde_json::json!({ + "access_key": k.access_key, + "status": k.status, + "created_at": k.created_at, + }) + }).collect::>(), + "policy_count": u.policies.len(), + }) + }) + .collect() + } + + pub async fn get_user(&self, identifier: &str) -> Option { + self.reload_if_needed(); + let state = self.state.read(); + + let user = state.user_records.get(identifier).or_else(|| { + state + .key_index + .get(identifier) + .and_then(|uid| state.user_records.get(uid)) + })?; + + Some(serde_json::json!({ + "user_id": user.user_id, + "display_name": user.display_name, + "enabled": user.enabled, + "expires_at": user.expires_at, + "access_keys": user.access_keys.iter().map(|k| { + serde_json::json!({ + "access_key": k.access_key, + "status": k.status, + "created_at": k.created_at, + }) + }).collect::>(), + "policies": user.policies, + })) + } + + pub async fn set_user_enabled(&self, identifier: &str, enabled: bool) -> Result<(), String> { + let content = std::fs::read_to_string(&self.config_path) + .map_err(|e| format!("Failed to read IAM config: {}", e))?; + + let raw: RawIamConfig = serde_json::from_str(&content) + .map_err(|e| format!("Failed to parse IAM config: {}", e))?; + let mut config = IamConfig { + version: 2, + users: raw.users.into_iter().map(|u| u.normalize()).collect(), + }; + + let user = config + .users + .iter_mut() + .find(|u| { + u.user_id == identifier || u.access_keys.iter().any(|k| k.access_key == identifier) + }) + .ok_or_else(|| "User not found".to_string())?; + + user.enabled = enabled; + + let json = serde_json::to_string_pretty(&config) + .map_err(|e| format!("Failed to serialize IAM config: {}", e))?; + std::fs::write(&self.config_path, json) + .map_err(|e| format!("Failed to write IAM config: {}", e))?; + + self.reload(); + Ok(()) + } + + pub fn get_user_policies(&self, identifier: &str) -> Option> { + self.reload_if_needed(); + let state = self.state.read(); + let user = state.user_records.get(identifier).or_else(|| { + state + .key_index + .get(identifier) + .and_then(|uid| state.user_records.get(uid)) + })?; + Some( + user.policies + .iter() + .map(|p| serde_json::to_value(p).unwrap_or_default()) + .collect(), + ) + } + + pub fn create_access_key(&self, identifier: &str) -> Result { + let content = std::fs::read_to_string(&self.config_path) + .map_err(|e| format!("Failed to read IAM config: {}", e))?; + let raw: RawIamConfig = serde_json::from_str(&content) + .map_err(|e| format!("Failed to parse IAM config: {}", e))?; + let mut config = IamConfig { + version: 2, + users: raw.users.into_iter().map(|u| u.normalize()).collect(), + }; + + let user = config + .users + .iter_mut() + .find(|u| { + u.user_id == identifier || u.access_keys.iter().any(|k| k.access_key == identifier) + }) + .ok_or_else(|| format!("User '{}' not found", identifier))?; + + let new_ak = format!("AK{}", uuid::Uuid::new_v4().simple()); + let new_sk = format!("SK{}", uuid::Uuid::new_v4().simple()); + + let key = AccessKey { + access_key: new_ak.clone(), + secret_key: new_sk.clone(), + status: "active".to_string(), + created_at: Some(chrono::Utc::now().to_rfc3339()), + }; + user.access_keys.push(key); + + let json = serde_json::to_string_pretty(&config) + .map_err(|e| format!("Failed to serialize IAM config: {}", e))?; + std::fs::write(&self.config_path, json) + .map_err(|e| format!("Failed to write IAM config: {}", e))?; + + self.reload(); + Ok(serde_json::json!({ + "access_key": new_ak, + "secret_key": new_sk, + })) + } + + pub fn delete_access_key(&self, access_key: &str) -> Result<(), String> { + let content = std::fs::read_to_string(&self.config_path) + .map_err(|e| format!("Failed to read IAM config: {}", e))?; + let raw: RawIamConfig = serde_json::from_str(&content) + .map_err(|e| format!("Failed to parse IAM config: {}", e))?; + let mut config = IamConfig { + version: 2, + users: raw.users.into_iter().map(|u| u.normalize()).collect(), + }; + + let mut found = false; + for user in &mut config.users { + if user.access_keys.iter().any(|k| k.access_key == access_key) { + if user.access_keys.len() <= 1 { + return Err("Cannot delete the last access key".to_string()); + } + user.access_keys.retain(|k| k.access_key != access_key); + found = true; + break; + } + } + if !found { + return Err(format!("Access key '{}' not found", access_key)); + } + + let json = serde_json::to_string_pretty(&config) + .map_err(|e| format!("Failed to serialize IAM config: {}", e))?; + std::fs::write(&self.config_path, json) + .map_err(|e| format!("Failed to write IAM config: {}", e))?; + + self.reload(); + Ok(()) + } + + fn load_config(&self) -> Result { + let content = std::fs::read_to_string(&self.config_path) + .map_err(|e| format!("Failed to read IAM config: {}", e))?; + let raw_text = if content.starts_with("MYFSIO_IAM_ENC:") { + let encrypted_token = &content["MYFSIO_IAM_ENC:".len()..]; + let key = self.fernet_key.as_ref().ok_or_else(|| { + "IAM config is encrypted but no SECRET_KEY configured".to_string() + })?; + let plaintext = crate::fernet::decrypt(key, encrypted_token.trim()) + .map_err(|e| format!("Failed to decrypt IAM config: {}", e))?; + String::from_utf8(plaintext) + .map_err(|e| format!("Decrypted IAM config not UTF-8: {}", e))? + } else { + content + }; + let raw: RawIamConfig = serde_json::from_str(&raw_text) + .map_err(|e| format!("Failed to parse IAM config: {}", e))?; + Ok(IamConfig { + version: 2, + users: raw.users.into_iter().map(|u| u.normalize()).collect(), + }) + } + + fn save_config(&self, config: &IamConfig) -> Result<(), String> { + let json = serde_json::to_string_pretty(config) + .map_err(|e| format!("Failed to serialize IAM config: {}", e))?; + let payload = if let Some(key) = &self.fernet_key { + let token = crate::fernet::encrypt(key, json.as_bytes()) + .map_err(|e| format!("Failed to encrypt IAM config: {}", e))?; + format!("MYFSIO_IAM_ENC:{}", token) + } else { + json + }; + std::fs::write(&self.config_path, payload) + .map_err(|e| format!("Failed to write IAM config: {}", e))?; + self.reload(); + Ok(()) + } + + pub fn create_user( + &self, + display_name: &str, + policies: Option>, + access_key: Option, + secret_key: Option, + expires_at: Option, + ) -> Result { + let mut config = self.load_config()?; + + let new_ak = access_key + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| format!("AK{}", uuid::Uuid::new_v4().simple())); + let new_sk = secret_key + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| format!("SK{}", uuid::Uuid::new_v4().simple())); + + if config + .users + .iter() + .any(|u| u.access_keys.iter().any(|k| k.access_key == new_ak)) + { + return Err(format!("Access key '{}' already exists", new_ak)); + } + + let user_id = format!("u-{}", uuid::Uuid::new_v4().simple()); + let resolved_policies = policies.unwrap_or_else(|| { + vec![IamPolicy { + bucket: "*".to_string(), + actions: vec!["*".to_string()], + prefix: "*".to_string(), + }] + }); + + let user = IamUser { + user_id: user_id.clone(), + display_name: display_name.to_string(), + enabled: true, + expires_at, + access_keys: vec![AccessKey { + access_key: new_ak.clone(), + secret_key: new_sk.clone(), + status: "active".to_string(), + created_at: Some(chrono::Utc::now().to_rfc3339()), + }], + policies: resolved_policies, + }; + config.users.push(user); + + self.save_config(&config)?; + Ok(serde_json::json!({ + "user_id": user_id, + "access_key": new_ak, + "secret_key": new_sk, + "display_name": display_name, + })) + } + + pub fn delete_user(&self, identifier: &str) -> Result<(), String> { + let mut config = self.load_config()?; + let before = config.users.len(); + config.users.retain(|u| { + u.user_id != identifier && !u.access_keys.iter().any(|k| k.access_key == identifier) + }); + if config.users.len() == before { + return Err(format!("User '{}' not found", identifier)); + } + self.save_config(&config) + } + + pub fn update_user( + &self, + identifier: &str, + display_name: Option, + expires_at: Option>, + ) -> Result<(), String> { + let mut config = self.load_config()?; + let user = config + .users + .iter_mut() + .find(|u| { + u.user_id == identifier || u.access_keys.iter().any(|k| k.access_key == identifier) + }) + .ok_or_else(|| format!("User '{}' not found", identifier))?; + if let Some(name) = display_name { + user.display_name = name; + } + if let Some(exp) = expires_at { + user.expires_at = exp; + } + self.save_config(&config) + } + + pub fn update_user_policies( + &self, + identifier: &str, + policies: Vec, + ) -> Result<(), String> { + let mut config = self.load_config()?; + let user = config + .users + .iter_mut() + .find(|u| { + u.user_id == identifier || u.access_keys.iter().any(|k| k.access_key == identifier) + }) + .ok_or_else(|| format!("User '{}' not found", identifier))?; + user.policies = policies; + self.save_config(&config) + } + + pub fn rotate_secret(&self, identifier: &str) -> Result { + let mut config = self.load_config()?; + let user = config + .users + .iter_mut() + .find(|u| { + u.user_id == identifier || u.access_keys.iter().any(|k| k.access_key == identifier) + }) + .ok_or_else(|| format!("User '{}' not found", identifier))?; + let key = user + .access_keys + .first_mut() + .ok_or_else(|| "User has no access keys".to_string())?; + let new_sk = format!("SK{}", uuid::Uuid::new_v4().simple()); + key.secret_key = new_sk.clone(); + let ak = key.access_key.clone(); + self.save_config(&config)?; + Ok(serde_json::json!({ + "access_key": ak, + "secret_key": new_sk, + })) + } +} + +fn bucket_matches(policy_bucket: &str, bucket: &str) -> bool { + let pb = policy_bucket.trim().to_ascii_lowercase(); + pb == "*" || pb == bucket +} + +fn action_matches(policy_actions: &[String], action: &str) -> bool { + for policy_action in policy_actions { + let pa = policy_action.trim().to_ascii_lowercase(); + if pa == "*" || pa == action { + return true; + } + if pa == "iam:*" && action.starts_with("iam:") { + return true; + } + } + false +} + +fn prefix_matches(policy_prefix: &str, object_key: &str) -> bool { + let p = policy_prefix.trim(); + if p.is_empty() || p == "*" { + return true; + } + let base = p.trim_end_matches('*'); + object_key.starts_with(base) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + fn test_iam_json() -> String { + serde_json::json!({ + "version": 2, + "users": [{ + "user_id": "u-test1234", + "display_name": "admin", + "enabled": true, + "access_keys": [{ + "access_key": "AKIAIOSFODNN7EXAMPLE", + "secret_key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + "status": "active", + "created_at": "2024-01-01T00:00:00Z" + }], + "policies": [{ + "bucket": "*", + "actions": ["*"], + "prefix": "*" + }] + }] + }) + .to_string() + } + + #[test] + fn test_load_and_lookup() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(test_iam_json().as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + let secret = svc.get_secret_key("AKIAIOSFODNN7EXAMPLE"); + assert_eq!(secret.unwrap(), "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"); + } + + #[test] + fn test_get_principal() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(test_iam_json().as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + let principal = svc.get_principal("AKIAIOSFODNN7EXAMPLE").unwrap(); + assert_eq!(principal.display_name, "admin"); + assert_eq!(principal.user_id, "u-test1234"); + assert!(principal.is_admin); + } + + #[test] + fn test_authenticate_success() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(test_iam_json().as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + let principal = svc + .authenticate( + "AKIAIOSFODNN7EXAMPLE", + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + ) + .unwrap(); + assert_eq!(principal.display_name, "admin"); + } + + #[test] + fn test_authenticate_wrong_secret() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(test_iam_json().as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + assert!(svc + .authenticate("AKIAIOSFODNN7EXAMPLE", "wrongsecret") + .is_none()); + } + + #[test] + fn test_unknown_key_returns_none() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(test_iam_json().as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + assert!(svc.get_secret_key("NONEXISTENTKEY").is_none()); + assert!(svc.get_principal("NONEXISTENTKEY").is_none()); + } + + #[test] + fn test_disabled_user() { + let json = serde_json::json!({ + "version": 2, + "users": [{ + "user_id": "u-disabled", + "display_name": "disabled-user", + "enabled": false, + "access_keys": [{ + "access_key": "DISABLED_KEY", + "secret_key": "secret123", + "status": "active" + }], + "policies": [] + }] + }) + .to_string(); + + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(json.as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + assert!(svc.get_secret_key("DISABLED_KEY").is_none()); + } + + #[test] + fn test_inactive_key() { + let json = serde_json::json!({ + "version": 2, + "users": [{ + "user_id": "u-test", + "display_name": "test", + "enabled": true, + "access_keys": [{ + "access_key": "INACTIVE_KEY", + "secret_key": "secret123", + "status": "inactive" + }], + "policies": [] + }] + }) + .to_string(); + + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(json.as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + assert!(svc.get_secret_key("INACTIVE_KEY").is_none()); + } + + #[test] + fn test_v1_flat_format() { + let json = serde_json::json!({ + "users": [{ + "access_key": "test", + "secret_key": "secret", + "display_name": "Test User", + "policies": [{"bucket": "*", "actions": ["*"], "prefix": "*"}] + }] + }) + .to_string(); + + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(json.as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + let secret = svc.get_secret_key("test"); + assert_eq!(secret.unwrap(), "secret"); + + let principal = svc.get_principal("test").unwrap(); + assert_eq!(principal.display_name, "Test User"); + assert!(principal.is_admin); + } + + #[test] + fn test_authorize_allows_matching_policy() { + let json = serde_json::json!({ + "version": 2, + "users": [{ + "user_id": "u-reader", + "display_name": "reader", + "enabled": true, + "access_keys": [{ + "access_key": "READER_KEY", + "secret_key": "reader-secret", + "status": "active" + }], + "policies": [{ + "bucket": "docs", + "actions": ["read"], + "prefix": "reports/" + }] + }] + }) + .to_string(); + + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(json.as_bytes()).unwrap(); + tmp.flush().unwrap(); + + let svc = IamService::new(tmp.path().to_path_buf()); + let principal = svc.get_principal("READER_KEY").unwrap(); + + assert!(svc.authorize(&principal, Some("docs"), "read", Some("reports/2026.csv"),)); + assert!(!svc.authorize(&principal, Some("docs"), "write", Some("reports/2026.csv"),)); + assert!(!svc.authorize(&principal, Some("docs"), "read", Some("private/2026.csv"),)); + assert!(!svc.authorize(&principal, Some("other"), "read", Some("reports/2026.csv"),)); + } +} diff --git a/crates/myfsio-auth/src/lib.rs b/crates/myfsio-auth/src/lib.rs new file mode 100644 index 0000000..0d15c37 --- /dev/null +++ b/crates/myfsio-auth/src/lib.rs @@ -0,0 +1,4 @@ +mod fernet; +pub mod iam; +pub mod principal; +pub mod sigv4; diff --git a/crates/myfsio-auth/src/principal.rs b/crates/myfsio-auth/src/principal.rs new file mode 100644 index 0000000..52fe839 --- /dev/null +++ b/crates/myfsio-auth/src/principal.rs @@ -0,0 +1 @@ +pub use myfsio_common::types::Principal; diff --git a/myfsio_core/src/sigv4.rs b/crates/myfsio-auth/src/sigv4.rs similarity index 60% rename from myfsio_core/src/sigv4.rs rename to crates/myfsio-auth/src/sigv4.rs index 904a853..5ef6997 100644 --- a/myfsio_core/src/sigv4.rs +++ b/crates/myfsio-auth/src/sigv4.rs @@ -2,7 +2,6 @@ use hmac::{Hmac, Mac}; use lru::LruCache; use parking_lot::Mutex; use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC}; -use pyo3::prelude::*; use sha2::{Digest, Sha256}; use std::num::NonZeroUsize; use std::sync::LazyLock; @@ -42,7 +41,7 @@ fn aws_uri_encode(input: &str) -> String { percent_encode(input.as_bytes(), AWS_ENCODE_SET).to_string() } -fn derive_signing_key_cached( +pub fn derive_signing_key_cached( secret_key: &str, date_stamp: &str, region: &str, @@ -65,7 +64,10 @@ fn derive_signing_key_cached( } } - let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes()); + let k_date = hmac_sha256( + format!("AWS4{}", secret_key).as_bytes(), + date_stamp.as_bytes(), + ); let k_region = hmac_sha256(&k_date, region.as_bytes()); let k_service = hmac_sha256(&k_region, service.as_bytes()); let k_signing = hmac_sha256(&k_service, b"aws4_request"); @@ -95,13 +97,12 @@ fn constant_time_compare_inner(a: &[u8], b: &[u8]) -> bool { result == 0 } -#[pyfunction] pub fn verify_sigv4_signature( method: &str, canonical_uri: &str, - query_params: Vec<(String, String)>, + query_params: &[(String, String)], signed_headers_str: &str, - header_values: Vec<(String, String)>, + header_values: &[(String, String)], payload_hash: &str, amz_date: &str, date_stamp: &str, @@ -110,7 +111,7 @@ pub fn verify_sigv4_signature( secret_key: &str, provided_signature: &str, ) -> bool { - let mut sorted_params = query_params; + let mut sorted_params = query_params.to_vec(); sorted_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1))); let canonical_query_string = sorted_params @@ -120,7 +121,7 @@ pub fn verify_sigv4_signature( .join("&"); let mut canonical_headers = String::new(); - for (name, value) in &header_values { + for (name, value) in header_values { let lower_name = name.to_lowercase(); let normalized = value.split_whitespace().collect::>().join(" "); let final_value = if lower_name == "expect" && normalized.is_empty() { @@ -136,7 +137,12 @@ pub fn verify_sigv4_signature( let canonical_request = format!( "{}\n{}\n{}\n{}\n{}\n{}", - method, canonical_uri, canonical_query_string, canonical_headers, signed_headers_str, payload_hash + method, + canonical_uri, + canonical_query_string, + canonical_headers, + signed_headers_str, + payload_hash ); let credential_scope = format!("{}/{}/{}/aws4_request", date_stamp, region, service); @@ -153,7 +159,6 @@ pub fn verify_sigv4_signature( constant_time_compare_inner(calculated_hex.as_bytes(), provided_signature.as_bytes()) } -#[pyfunction] pub fn derive_signing_key( secret_key: &str, date_stamp: &str, @@ -163,13 +168,16 @@ pub fn derive_signing_key( derive_signing_key_cached(secret_key, date_stamp, region, service) } -#[pyfunction] pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String { let sig = hmac_sha256(signing_key, string_to_sign.as_bytes()); hex::encode(sig) } -#[pyfunction] +pub fn compute_post_policy_signature(signing_key: &[u8], policy_b64: &str) -> String { + let sig = hmac_sha256(signing_key, policy_b64.as_bytes()); + hex::encode(sig) +} + pub fn build_string_to_sign( amz_date: &str, credential_scope: &str, @@ -182,12 +190,98 @@ pub fn build_string_to_sign( ) } -#[pyfunction] pub fn constant_time_compare(a: &str, b: &str) -> bool { constant_time_compare_inner(a.as_bytes(), b.as_bytes()) } -#[pyfunction] pub fn clear_signing_key_cache() { SIGNING_KEY_CACHE.lock().clear(); } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_derive_signing_key() { + let key = derive_signing_key( + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + "20130524", + "us-east-1", + "s3", + ); + assert_eq!(key.len(), 32); + } + + #[test] + fn test_derive_signing_key_cached() { + let key1 = derive_signing_key("secret", "20240101", "us-east-1", "s3"); + let key2 = derive_signing_key("secret", "20240101", "us-east-1", "s3"); + assert_eq!(key1, key2); + } + + #[test] + fn test_constant_time_compare() { + assert!(constant_time_compare("abc", "abc")); + assert!(!constant_time_compare("abc", "abd")); + assert!(!constant_time_compare("abc", "abcd")); + } + + #[test] + fn test_build_string_to_sign() { + let result = build_string_to_sign( + "20130524T000000Z", + "20130524/us-east-1/s3/aws4_request", + "GET\n/\n\nhost:example.com\n\nhost\nUNSIGNED-PAYLOAD", + ); + assert!(result.starts_with("AWS4-HMAC-SHA256\n")); + assert!(result.contains("20130524T000000Z")); + } + + #[test] + fn test_aws_uri_encode() { + assert_eq!(aws_uri_encode("hello world"), "hello%20world"); + assert_eq!(aws_uri_encode("test-file_name.txt"), "test-file_name.txt"); + assert_eq!(aws_uri_encode("a/b"), "a%2Fb"); + } + + #[test] + fn test_verify_sigv4_roundtrip() { + let secret = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"; + let date_stamp = "20130524"; + let region = "us-east-1"; + let service = "s3"; + let amz_date = "20130524T000000Z"; + + let signing_key = derive_signing_key(secret, date_stamp, region, service); + + let canonical_request = + "GET\n/\n\nhost:examplebucket.s3.amazonaws.com\n\nhost\nUNSIGNED-PAYLOAD"; + let string_to_sign = build_string_to_sign( + amz_date, + &format!("{}/{}/{}/aws4_request", date_stamp, region, service), + canonical_request, + ); + + let signature = compute_signature(&signing_key, &string_to_sign); + + let result = verify_sigv4_signature( + "GET", + "/", + &[], + "host", + &[( + "host".to_string(), + "examplebucket.s3.amazonaws.com".to_string(), + )], + "UNSIGNED-PAYLOAD", + amz_date, + date_stamp, + region, + service, + secret, + &signature, + ); + assert!(result); + } +} diff --git a/crates/myfsio-common/Cargo.toml b/crates/myfsio-common/Cargo.toml new file mode 100644 index 0000000..29774bc --- /dev/null +++ b/crates/myfsio-common/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "myfsio-common" +version.workspace = true +edition.workspace = true + +[dependencies] +thiserror = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +chrono = { workspace = true } +uuid = { workspace = true } diff --git a/crates/myfsio-common/src/constants.rs b/crates/myfsio-common/src/constants.rs new file mode 100644 index 0000000..9951f8d --- /dev/null +++ b/crates/myfsio-common/src/constants.rs @@ -0,0 +1,21 @@ +pub const SYSTEM_ROOT: &str = ".myfsio.sys"; +pub const SYSTEM_BUCKETS_DIR: &str = "buckets"; +pub const SYSTEM_MULTIPART_DIR: &str = "multipart"; +pub const BUCKET_META_DIR: &str = "meta"; +pub const BUCKET_VERSIONS_DIR: &str = "versions"; +pub const BUCKET_CONFIG_FILE: &str = ".bucket.json"; +pub const STATS_FILE: &str = "stats.json"; +pub const ETAG_INDEX_FILE: &str = "etag_index.json"; +pub const INDEX_FILE: &str = "_index.json"; +pub const MANIFEST_FILE: &str = "manifest.json"; +pub const DIR_MARKER_FILE: &str = ".__myfsio_dirobj__"; + +pub const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"]; + +pub const DEFAULT_REGION: &str = "us-east-1"; +pub const AWS_SERVICE: &str = "s3"; + +pub const DEFAULT_MAX_KEYS: usize = 1000; +pub const DEFAULT_OBJECT_KEY_MAX_BYTES: usize = 1024; +pub const DEFAULT_CHUNK_SIZE: usize = 65536; +pub const STREAM_CHUNK_SIZE: usize = 1_048_576; diff --git a/crates/myfsio-common/src/error.rs b/crates/myfsio-common/src/error.rs new file mode 100644 index 0000000..ceabe0d --- /dev/null +++ b/crates/myfsio-common/src/error.rs @@ -0,0 +1,271 @@ +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum S3ErrorCode { + AccessDenied, + BadDigest, + BucketAlreadyExists, + BucketAlreadyOwnedByYou, + BucketNotEmpty, + EntityTooLarge, + EntityTooSmall, + InternalError, + InvalidAccessKeyId, + InvalidArgument, + InvalidBucketName, + InvalidKey, + InvalidPart, + InvalidPartOrder, + InvalidPolicyDocument, + InvalidRange, + InvalidRequest, + InvalidTag, + MalformedXML, + MethodNotAllowed, + NoSuchBucket, + NoSuchBucketPolicy, + NoSuchKey, + NoSuchLifecycleConfiguration, + NoSuchUpload, + NoSuchVersion, + NoSuchTagSet, + ObjectCorrupted, + PreconditionFailed, + NotModified, + QuotaExceeded, + RequestTimeTooSkewed, + ServerSideEncryptionConfigurationNotFoundError, + SignatureDoesNotMatch, + SlowDown, +} + +impl S3ErrorCode { + pub fn http_status(&self) -> u16 { + match self { + Self::AccessDenied => 403, + Self::BadDigest => 400, + Self::BucketAlreadyExists => 409, + Self::BucketAlreadyOwnedByYou => 409, + Self::BucketNotEmpty => 409, + Self::EntityTooLarge => 413, + Self::EntityTooSmall => 400, + Self::InternalError => 500, + Self::InvalidAccessKeyId => 403, + Self::InvalidArgument => 400, + Self::InvalidBucketName => 400, + Self::InvalidKey => 400, + Self::InvalidPart => 400, + Self::InvalidPartOrder => 400, + Self::InvalidPolicyDocument => 400, + Self::InvalidRange => 416, + Self::InvalidRequest => 400, + Self::InvalidTag => 400, + Self::MalformedXML => 400, + Self::MethodNotAllowed => 405, + Self::NoSuchBucket => 404, + Self::NoSuchBucketPolicy => 404, + Self::NoSuchKey => 404, + Self::NoSuchLifecycleConfiguration => 404, + Self::NoSuchUpload => 404, + Self::NoSuchVersion => 404, + Self::NoSuchTagSet => 404, + Self::ObjectCorrupted => 500, + Self::PreconditionFailed => 412, + Self::NotModified => 304, + Self::QuotaExceeded => 403, + Self::RequestTimeTooSkewed => 403, + Self::ServerSideEncryptionConfigurationNotFoundError => 404, + Self::SignatureDoesNotMatch => 403, + Self::SlowDown => 503, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::AccessDenied => "AccessDenied", + Self::BadDigest => "BadDigest", + Self::BucketAlreadyExists => "BucketAlreadyExists", + Self::BucketAlreadyOwnedByYou => "BucketAlreadyOwnedByYou", + Self::BucketNotEmpty => "BucketNotEmpty", + Self::EntityTooLarge => "EntityTooLarge", + Self::EntityTooSmall => "EntityTooSmall", + Self::InternalError => "InternalError", + Self::InvalidAccessKeyId => "InvalidAccessKeyId", + Self::InvalidArgument => "InvalidArgument", + Self::InvalidBucketName => "InvalidBucketName", + Self::InvalidKey => "InvalidKey", + Self::InvalidPart => "InvalidPart", + Self::InvalidPartOrder => "InvalidPartOrder", + Self::InvalidPolicyDocument => "InvalidPolicyDocument", + Self::InvalidRange => "InvalidRange", + Self::InvalidRequest => "InvalidRequest", + Self::InvalidTag => "InvalidTag", + Self::MalformedXML => "MalformedXML", + Self::MethodNotAllowed => "MethodNotAllowed", + Self::NoSuchBucket => "NoSuchBucket", + Self::NoSuchBucketPolicy => "NoSuchBucketPolicy", + Self::NoSuchKey => "NoSuchKey", + Self::NoSuchLifecycleConfiguration => "NoSuchLifecycleConfiguration", + Self::NoSuchUpload => "NoSuchUpload", + Self::NoSuchVersion => "NoSuchVersion", + Self::NoSuchTagSet => "NoSuchTagSet", + Self::ObjectCorrupted => "ObjectCorrupted", + Self::PreconditionFailed => "PreconditionFailed", + Self::NotModified => "NotModified", + Self::QuotaExceeded => "QuotaExceeded", + Self::RequestTimeTooSkewed => "RequestTimeTooSkewed", + Self::ServerSideEncryptionConfigurationNotFoundError => { + "ServerSideEncryptionConfigurationNotFoundError" + } + Self::SignatureDoesNotMatch => "SignatureDoesNotMatch", + Self::SlowDown => "SlowDown", + } + } + + pub fn default_message(&self) -> &'static str { + match self { + Self::AccessDenied => "Access Denied", + Self::BadDigest => "The Content-MD5 or checksum value you specified did not match what we received", + Self::BucketAlreadyExists => "The requested bucket name is not available", + Self::BucketAlreadyOwnedByYou => "Your previous request to create the named bucket succeeded and you already own it", + Self::BucketNotEmpty => "The bucket you tried to delete is not empty", + Self::EntityTooLarge => "Your proposed upload exceeds the maximum allowed size", + Self::EntityTooSmall => "Your proposed upload is smaller than the minimum allowed object size", + Self::InternalError => "We encountered an internal error. Please try again.", + Self::InvalidAccessKeyId => "The access key ID you provided does not exist", + Self::InvalidArgument => "Invalid argument", + Self::InvalidBucketName => "The specified bucket is not valid", + Self::InvalidKey => "The specified key is not valid", + Self::InvalidPart => "One or more of the specified parts could not be found", + Self::InvalidPartOrder => "The list of parts was not in ascending order", + Self::InvalidPolicyDocument => "The content of the form does not meet the conditions specified in the policy document", + Self::InvalidRange => "The requested range is not satisfiable", + Self::InvalidRequest => "Invalid request", + Self::InvalidTag => "The Tagging header is invalid", + Self::MalformedXML => "The XML you provided was not well-formed", + Self::MethodNotAllowed => "The specified method is not allowed against this resource", + Self::NoSuchBucket => "The specified bucket does not exist", + Self::NoSuchBucketPolicy => "The bucket policy does not exist", + Self::NoSuchKey => "The specified key does not exist", + Self::NoSuchLifecycleConfiguration => "The lifecycle configuration does not exist", + Self::NoSuchUpload => "The specified multipart upload does not exist", + Self::NoSuchVersion => "The specified version does not exist", + Self::NoSuchTagSet => "The TagSet does not exist", + Self::ObjectCorrupted => "The stored object is corrupted and cannot be served", + Self::PreconditionFailed => "At least one of the preconditions you specified did not hold", + Self::NotModified => "Not Modified", + Self::QuotaExceeded => "The bucket quota has been exceeded", + Self::RequestTimeTooSkewed => "The difference between the request time and the server's time is too large", + Self::ServerSideEncryptionConfigurationNotFoundError => "The server side encryption configuration was not found", + Self::SignatureDoesNotMatch => "The request signature we calculated does not match the signature you provided", + Self::SlowDown => "Please reduce your request rate", + } + } +} + +impl fmt::Display for S3ErrorCode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +#[derive(Debug, Clone)] +pub struct S3Error { + pub code: S3ErrorCode, + pub message: String, + pub resource: String, + pub request_id: String, +} + +impl S3Error { + pub fn new(code: S3ErrorCode, message: impl Into) -> Self { + Self { + code, + message: message.into(), + resource: String::new(), + request_id: String::new(), + } + } + + pub fn from_code(code: S3ErrorCode) -> Self { + Self::new(code, code.default_message()) + } + + pub fn with_resource(mut self, resource: impl Into) -> Self { + self.resource = resource.into(); + self + } + + pub fn with_request_id(mut self, request_id: impl Into) -> Self { + self.request_id = request_id.into(); + self + } + + pub fn http_status(&self) -> u16 { + self.code.http_status() + } + + pub fn to_xml(&self) -> String { + format!( + "\ + \ + {}\ + {}\ + {}\ + {}\ + ", + self.code.as_str(), + xml_escape(&self.message), + xml_escape(&self.resource), + xml_escape(&self.request_id), + ) + } +} + +impl fmt::Display for S3Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.code, self.message) + } +} + +impl std::error::Error for S3Error {} + +fn xml_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_codes() { + assert_eq!(S3ErrorCode::NoSuchKey.http_status(), 404); + assert_eq!(S3ErrorCode::AccessDenied.http_status(), 403); + assert_eq!(S3ErrorCode::NoSuchBucket.as_str(), "NoSuchBucket"); + } + + #[test] + fn test_error_to_xml() { + let err = S3Error::from_code(S3ErrorCode::NoSuchKey) + .with_resource("/test-bucket/test-key") + .with_request_id("abc123"); + let xml = err.to_xml(); + assert!(xml.contains("NoSuchKey")); + assert!(xml.contains("/test-bucket/test-key")); + assert!(xml.contains("abc123")); + } + + #[test] + fn test_xml_escape() { + let err = S3Error::new(S3ErrorCode::InvalidArgument, "key & \"value\"") + .with_resource("/bucket/key&"); + let xml = err.to_xml(); + assert!(xml.contains("<test>")); + assert!(xml.contains("&")); + } +} diff --git a/crates/myfsio-common/src/lib.rs b/crates/myfsio-common/src/lib.rs new file mode 100644 index 0000000..ad67240 --- /dev/null +++ b/crates/myfsio-common/src/lib.rs @@ -0,0 +1,3 @@ +pub mod constants; +pub mod error; +pub mod types; diff --git a/crates/myfsio-common/src/types.rs b/crates/myfsio-common/src/types.rs new file mode 100644 index 0000000..6977d75 --- /dev/null +++ b/crates/myfsio-common/src/types.rs @@ -0,0 +1,243 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ObjectMeta { + pub key: String, + pub size: u64, + pub last_modified: DateTime, + pub etag: Option, + pub content_type: Option, + pub storage_class: Option, + pub metadata: HashMap, + #[serde(default)] + pub version_id: Option, + #[serde(default)] + pub is_delete_marker: bool, + #[serde(default, skip_serializing)] + pub internal_metadata: HashMap, +} + +impl ObjectMeta { + pub fn new(key: String, size: u64, last_modified: DateTime) -> Self { + Self { + key, + size, + last_modified, + etag: None, + content_type: None, + storage_class: Some("STANDARD".to_string()), + metadata: HashMap::new(), + version_id: None, + is_delete_marker: false, + internal_metadata: HashMap::new(), + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct DeleteOutcome { + pub version_id: Option, + pub is_delete_marker: bool, + pub existed: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BucketMeta { + pub name: String, + pub creation_date: DateTime, +} + +#[derive(Debug, Clone, Default)] +pub struct BucketStats { + pub objects: u64, + pub bytes: u64, + pub version_count: u64, + pub version_bytes: u64, +} + +impl BucketStats { + pub fn total_objects(&self) -> u64 { + self.objects + self.version_count + } + + pub fn total_bytes(&self) -> u64 { + self.bytes + self.version_bytes + } +} + +#[derive(Debug, Clone)] +pub struct ListObjectsResult { + pub objects: Vec, + pub is_truncated: bool, + pub next_continuation_token: Option, +} + +#[derive(Debug, Clone)] +pub struct ShallowListResult { + pub objects: Vec, + pub common_prefixes: Vec, + pub is_truncated: bool, + pub next_continuation_token: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct ListParams { + pub max_keys: usize, + pub continuation_token: Option, + pub prefix: Option, + pub start_after: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct ShallowListParams { + pub prefix: String, + pub delimiter: String, + pub max_keys: usize, + pub continuation_token: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PartMeta { + pub part_number: u32, + pub etag: String, + pub size: u64, + pub last_modified: Option>, +} + +#[derive(Debug, Clone)] +pub struct PartInfo { + pub part_number: u32, + pub etag: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MultipartUploadInfo { + pub upload_id: String, + pub key: String, + pub initiated: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VersionInfo { + pub version_id: String, + pub key: String, + pub size: u64, + pub last_modified: DateTime, + pub etag: Option, + pub is_latest: bool, + #[serde(default)] + pub is_delete_marker: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Tag { + pub key: String, + pub value: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +pub enum VersioningStatus { + #[default] + Disabled, + Enabled, + Suspended, +} + +impl VersioningStatus { + pub fn is_enabled(self) -> bool { + matches!(self, VersioningStatus::Enabled) + } + + pub fn is_active(self) -> bool { + matches!(self, VersioningStatus::Enabled | VersioningStatus::Suspended) + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct BucketConfig { + #[serde(default)] + pub versioning_enabled: bool, + #[serde(default)] + pub versioning_suspended: bool, + #[serde(default)] + pub tags: Vec, + #[serde(default)] + pub cors: Option, + #[serde(default)] + pub encryption: Option, + #[serde(default)] + pub lifecycle: Option, + #[serde(default)] + pub website: Option, + #[serde(default)] + pub quota: Option, + #[serde(default)] + pub acl: Option, + #[serde(default)] + pub notification: Option, + #[serde(default)] + pub logging: Option, + #[serde(default)] + pub object_lock: Option, + #[serde(default)] + pub policy: Option, + #[serde(default)] + pub replication: Option, +} + +impl BucketConfig { + pub fn versioning_status(&self) -> VersioningStatus { + if self.versioning_enabled { + VersioningStatus::Enabled + } else if self.versioning_suspended { + VersioningStatus::Suspended + } else { + VersioningStatus::Disabled + } + } + + pub fn set_versioning_status(&mut self, status: VersioningStatus) { + match status { + VersioningStatus::Enabled => { + self.versioning_enabled = true; + self.versioning_suspended = false; + } + VersioningStatus::Suspended => { + self.versioning_enabled = false; + self.versioning_suspended = true; + } + VersioningStatus::Disabled => { + self.versioning_enabled = false; + self.versioning_suspended = false; + } + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QuotaConfig { + pub max_bytes: Option, + pub max_objects: Option, +} + +#[derive(Debug, Clone)] +pub struct Principal { + pub access_key: String, + pub user_id: String, + pub display_name: String, + pub is_admin: bool, +} + +impl Principal { + pub fn new(access_key: String, user_id: String, display_name: String, is_admin: bool) -> Self { + Self { + access_key, + user_id, + display_name, + is_admin, + } + } +} diff --git a/crates/myfsio-crypto/Cargo.toml b/crates/myfsio-crypto/Cargo.toml new file mode 100644 index 0000000..09be255 --- /dev/null +++ b/crates/myfsio-crypto/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "myfsio-crypto" +version.workspace = true +edition.workspace = true + +[dependencies] +myfsio-common = { path = "../myfsio-common" } +md-5 = { workspace = true } +sha2 = { workspace = true } +hex = { workspace = true } +aes-gcm = { workspace = true } +hkdf = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +uuid = { workspace = true } +chrono = { workspace = true } +base64 = { workspace = true } +rand = "0.8" + +[dev-dependencies] +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } +tempfile = "3" diff --git a/crates/myfsio-crypto/src/aes_gcm.rs b/crates/myfsio-crypto/src/aes_gcm.rs new file mode 100644 index 0000000..e7ebd98 --- /dev/null +++ b/crates/myfsio-crypto/src/aes_gcm.rs @@ -0,0 +1,545 @@ +use aes_gcm::aead::Aead; +use aes_gcm::{Aes256Gcm, KeyInit, Nonce}; +use hkdf::Hkdf; +use sha2::Sha256; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::Path; +use thiserror::Error; + +const DEFAULT_CHUNK_SIZE: usize = 65536; +const HEADER_SIZE: usize = 4; + +#[derive(Debug, Error)] +pub enum CryptoError { + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("Invalid key size: expected 32 bytes, got {0}")] + InvalidKeySize(usize), + #[error("Invalid nonce size: expected 12 bytes, got {0}")] + InvalidNonceSize(usize), + #[error("Encryption failed: {0}")] + EncryptionFailed(String), + #[error("Decryption failed at chunk {0}")] + DecryptionFailed(u32), + #[error("HKDF expand failed: {0}")] + HkdfFailed(String), +} + +fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result { + let mut filled = 0; + while filled < buf.len() { + match reader.read(&mut buf[filled..]) { + Ok(0) => break, + Ok(n) => filled += n, + Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + } + } + Ok(filled) +} + +fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], CryptoError> { + let hkdf = Hkdf::::new(Some(base_nonce), b"chunk_nonce"); + let mut okm = [0u8; 12]; + hkdf.expand(&chunk_index.to_be_bytes(), &mut okm) + .map_err(|e| CryptoError::HkdfFailed(e.to_string()))?; + Ok(okm) +} + +pub fn encrypt_stream_chunked( + input_path: &Path, + output_path: &Path, + key: &[u8], + base_nonce: &[u8], + chunk_size: Option, +) -> Result { + if key.len() != 32 { + return Err(CryptoError::InvalidKeySize(key.len())); + } + if base_nonce.len() != 12 { + return Err(CryptoError::InvalidNonceSize(base_nonce.len())); + } + + let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE); + let key_arr: [u8; 32] = key.try_into().unwrap(); + let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap(); + let cipher = Aes256Gcm::new(&key_arr.into()); + + let mut infile = File::open(input_path)?; + let mut outfile = File::create(output_path)?; + + outfile.write_all(&[0u8; 4])?; + + let mut buf = vec![0u8; chunk_size]; + let mut chunk_index: u32 = 0; + + loop { + let n = read_exact_chunk(&mut infile, &mut buf)?; + if n == 0 { + break; + } + + let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?; + let nonce = Nonce::from_slice(&nonce_bytes); + + let encrypted = cipher + .encrypt(nonce, &buf[..n]) + .map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?; + + let size = encrypted.len() as u32; + outfile.write_all(&size.to_be_bytes())?; + outfile.write_all(&encrypted)?; + + chunk_index += 1; + } + + outfile.seek(SeekFrom::Start(0))?; + outfile.write_all(&chunk_index.to_be_bytes())?; + + Ok(chunk_index) +} + +pub fn decrypt_stream_chunked( + input_path: &Path, + output_path: &Path, + key: &[u8], + base_nonce: &[u8], +) -> Result { + if key.len() != 32 { + return Err(CryptoError::InvalidKeySize(key.len())); + } + if base_nonce.len() != 12 { + return Err(CryptoError::InvalidNonceSize(base_nonce.len())); + } + + let key_arr: [u8; 32] = key.try_into().unwrap(); + let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap(); + let cipher = Aes256Gcm::new(&key_arr.into()); + + let mut infile = File::open(input_path)?; + let mut outfile = File::create(output_path)?; + + let mut header = [0u8; HEADER_SIZE]; + infile.read_exact(&mut header)?; + let chunk_count = u32::from_be_bytes(header); + + let mut size_buf = [0u8; HEADER_SIZE]; + for chunk_index in 0..chunk_count { + infile.read_exact(&mut size_buf)?; + let chunk_size = u32::from_be_bytes(size_buf) as usize; + + let mut encrypted = vec![0u8; chunk_size]; + infile.read_exact(&mut encrypted)?; + + let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?; + let nonce = Nonce::from_slice(&nonce_bytes); + + let decrypted = cipher + .decrypt(nonce, encrypted.as_ref()) + .map_err(|_| CryptoError::DecryptionFailed(chunk_index))?; + + outfile.write_all(&decrypted)?; + } + + Ok(chunk_count) +} + +const GCM_TAG_LEN: usize = 16; + +pub fn decrypt_stream_chunked_range( + input_path: &Path, + output_path: &Path, + key: &[u8], + base_nonce: &[u8], + chunk_plain_size: usize, + plaintext_size: u64, + plain_start: u64, + plain_end_inclusive: u64, +) -> Result { + if key.len() != 32 { + return Err(CryptoError::InvalidKeySize(key.len())); + } + if base_nonce.len() != 12 { + return Err(CryptoError::InvalidNonceSize(base_nonce.len())); + } + if chunk_plain_size == 0 { + return Err(CryptoError::EncryptionFailed( + "chunk_plain_size must be > 0".into(), + )); + } + if plaintext_size == 0 { + let _ = File::create(output_path)?; + return Ok(0); + } + if plain_start > plain_end_inclusive || plain_end_inclusive >= plaintext_size { + return Err(CryptoError::EncryptionFailed(format!( + "range [{}, {}] invalid for plaintext size {}", + plain_start, plain_end_inclusive, plaintext_size + ))); + } + + let key_arr: [u8; 32] = key.try_into().unwrap(); + let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap(); + let cipher = Aes256Gcm::new(&key_arr.into()); + + let n = chunk_plain_size as u64; + let first_chunk = (plain_start / n) as u32; + let last_chunk = (plain_end_inclusive / n) as u32; + let total_chunks = plaintext_size.div_ceil(n) as u32; + let final_chunk_plain = plaintext_size - (total_chunks as u64 - 1) * n; + + let mut infile = File::open(input_path)?; + + let mut header = [0u8; HEADER_SIZE]; + infile.read_exact(&mut header)?; + let stored_chunk_count = u32::from_be_bytes(header); + if stored_chunk_count != total_chunks { + return Err(CryptoError::EncryptionFailed(format!( + "chunk count mismatch: header says {}, plaintext_size implies {}", + stored_chunk_count, total_chunks + ))); + } + + let mut outfile = File::create(output_path)?; + + let stride = n + GCM_TAG_LEN as u64 + HEADER_SIZE as u64; + let first_offset = HEADER_SIZE as u64 + first_chunk as u64 * stride; + infile.seek(SeekFrom::Start(first_offset))?; + + let mut size_buf = [0u8; HEADER_SIZE]; + let mut bytes_written: u64 = 0; + + for chunk_index in first_chunk..=last_chunk { + infile.read_exact(&mut size_buf)?; + let ct_len = u32::from_be_bytes(size_buf) as usize; + + let expected_plain = if chunk_index + 1 == total_chunks { + final_chunk_plain as usize + } else { + chunk_plain_size + }; + let expected_ct = expected_plain + GCM_TAG_LEN; + if ct_len != expected_ct { + return Err(CryptoError::EncryptionFailed(format!( + "chunk {} stored length {} != expected {} (corrupt file or chunk_size mismatch)", + chunk_index, ct_len, expected_ct + ))); + } + + let mut encrypted = vec![0u8; ct_len]; + infile.read_exact(&mut encrypted)?; + + let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?; + let nonce = Nonce::from_slice(&nonce_bytes); + let decrypted = cipher + .decrypt(nonce, encrypted.as_ref()) + .map_err(|_| CryptoError::DecryptionFailed(chunk_index))?; + + let chunk_plain_start = chunk_index as u64 * n; + let chunk_plain_end_exclusive = chunk_plain_start + decrypted.len() as u64; + + let slice_start = plain_start.saturating_sub(chunk_plain_start) as usize; + let slice_end = (plain_end_inclusive + 1).min(chunk_plain_end_exclusive); + let slice_end_local = (slice_end - chunk_plain_start) as usize; + + if slice_end_local > slice_start { + outfile.write_all(&decrypted[slice_start..slice_end_local])?; + bytes_written += (slice_end_local - slice_start) as u64; + } + } + + Ok(bytes_written) +} + +pub async fn encrypt_stream_chunked_async( + input_path: &Path, + output_path: &Path, + key: &[u8], + base_nonce: &[u8], + chunk_size: Option, +) -> Result { + let input_path = input_path.to_owned(); + let output_path = output_path.to_owned(); + let key = key.to_vec(); + let base_nonce = base_nonce.to_vec(); + tokio::task::spawn_blocking(move || { + encrypt_stream_chunked(&input_path, &output_path, &key, &base_nonce, chunk_size) + }) + .await + .map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))? +} + +pub async fn decrypt_stream_chunked_async( + input_path: &Path, + output_path: &Path, + key: &[u8], + base_nonce: &[u8], +) -> Result { + let input_path = input_path.to_owned(); + let output_path = output_path.to_owned(); + let key = key.to_vec(); + let base_nonce = base_nonce.to_vec(); + tokio::task::spawn_blocking(move || { + decrypt_stream_chunked(&input_path, &output_path, &key, &base_nonce) + }) + .await + .map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))? +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as IoWrite; + + #[test] + fn test_encrypt_decrypt_roundtrip() { + let dir = tempfile::tempdir().unwrap(); + let input = dir.path().join("input.bin"); + let encrypted = dir.path().join("encrypted.bin"); + let decrypted = dir.path().join("decrypted.bin"); + + let data = b"Hello, this is a test of AES-256-GCM chunked encryption!"; + std::fs::File::create(&input) + .unwrap() + .write_all(data) + .unwrap(); + + let key = [0x42u8; 32]; + let nonce = [0x01u8; 12]; + + let chunks = encrypt_stream_chunked(&input, &encrypted, &key, &nonce, Some(16)).unwrap(); + assert!(chunks > 0); + + let chunks2 = decrypt_stream_chunked(&encrypted, &decrypted, &key, &nonce).unwrap(); + assert_eq!(chunks, chunks2); + + let result = std::fs::read(&decrypted).unwrap(); + assert_eq!(result, data); + } + + #[test] + fn test_invalid_key_size() { + let dir = tempfile::tempdir().unwrap(); + let input = dir.path().join("input.bin"); + std::fs::File::create(&input) + .unwrap() + .write_all(b"test") + .unwrap(); + + let result = encrypt_stream_chunked( + &input, + &dir.path().join("out"), + &[0u8; 16], + &[0u8; 12], + None, + ); + assert!(matches!(result, Err(CryptoError::InvalidKeySize(16)))); + } + + fn write_file(path: &Path, data: &[u8]) { + std::fs::File::create(path).unwrap().write_all(data).unwrap(); + } + + fn make_encrypted_file( + dir: &Path, + data: &[u8], + key: &[u8; 32], + nonce: &[u8; 12], + chunk: usize, + ) -> std::path::PathBuf { + let input = dir.join("input.bin"); + let encrypted = dir.join("encrypted.bin"); + write_file(&input, data); + encrypt_stream_chunked(&input, &encrypted, key, nonce, Some(chunk)).unwrap(); + encrypted + } + + #[test] + fn test_range_within_single_chunk() { + let dir = tempfile::tempdir().unwrap(); + let data: Vec = (0u8..=255).cycle().take(4096).collect(); + let key = [0x33u8; 32]; + let nonce = [0x07u8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 1024); + let out = dir.path().join("range.bin"); + + let n = decrypt_stream_chunked_range( + &encrypted, + &out, + &key, + &nonce, + 1024, + data.len() as u64, + 200, + 399, + ) + .unwrap(); + assert_eq!(n, 200); + let got = std::fs::read(&out).unwrap(); + assert_eq!(got, &data[200..400]); + } + + #[test] + fn test_range_spanning_multiple_chunks() { + let dir = tempfile::tempdir().unwrap(); + let data: Vec = (0..5000u32).map(|i| (i % 251) as u8).collect(); + let key = [0x44u8; 32]; + let nonce = [0x02u8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 512); + let out = dir.path().join("range.bin"); + + let n = decrypt_stream_chunked_range( + &encrypted, + &out, + &key, + &nonce, + 512, + data.len() as u64, + 100, + 2999, + ) + .unwrap(); + assert_eq!(n, 2900); + let got = std::fs::read(&out).unwrap(); + assert_eq!(got, &data[100..3000]); + } + + #[test] + fn test_range_covers_final_partial_chunk() { + let dir = tempfile::tempdir().unwrap(); + let data: Vec = (0..1300u32).map(|i| (i % 71) as u8).collect(); + let key = [0x55u8; 32]; + let nonce = [0x0au8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 512); + let out = dir.path().join("range.bin"); + + let n = decrypt_stream_chunked_range( + &encrypted, + &out, + &key, + &nonce, + 512, + data.len() as u64, + 900, + 1299, + ) + .unwrap(); + assert_eq!(n, 400); + let got = std::fs::read(&out).unwrap(); + assert_eq!(got, &data[900..1300]); + } + + #[test] + fn test_range_full_object() { + let dir = tempfile::tempdir().unwrap(); + let data: Vec = (0..2048u32).map(|i| (i % 13) as u8).collect(); + let key = [0x11u8; 32]; + let nonce = [0x33u8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 512); + let out = dir.path().join("range.bin"); + + let n = decrypt_stream_chunked_range( + &encrypted, + &out, + &key, + &nonce, + 512, + data.len() as u64, + 0, + data.len() as u64 - 1, + ) + .unwrap(); + assert_eq!(n, data.len() as u64); + let got = std::fs::read(&out).unwrap(); + assert_eq!(got, data); + } + + #[test] + fn test_range_wrong_key_fails() { + let dir = tempfile::tempdir().unwrap(); + let data = b"range-auth-check".repeat(100); + let key = [0x66u8; 32]; + let nonce = [0x09u8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 256); + let out = dir.path().join("range.bin"); + + let wrong = [0x67u8; 32]; + let r = decrypt_stream_chunked_range( + &encrypted, + &out, + &wrong, + &nonce, + 256, + data.len() as u64, + 0, + data.len() as u64 - 1, + ); + assert!(matches!(r, Err(CryptoError::DecryptionFailed(_)))); + } + + #[test] + fn test_range_out_of_bounds_rejected() { + let dir = tempfile::tempdir().unwrap(); + let data = vec![0u8; 100]; + let key = [0x22u8; 32]; + let nonce = [0x44u8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 64); + let out = dir.path().join("range.bin"); + + let r = decrypt_stream_chunked_range( + &encrypted, + &out, + &key, + &nonce, + 64, + data.len() as u64, + 50, + 200, + ); + assert!(r.is_err()); + } + + #[test] + fn test_range_mismatched_chunk_size_detected() { + let dir = tempfile::tempdir().unwrap(); + let data: Vec = (0..2048u32).map(|i| i as u8).collect(); + let key = [0x77u8; 32]; + let nonce = [0x88u8; 12]; + let encrypted = make_encrypted_file(dir.path(), &data, &key, &nonce, 512); + let out = dir.path().join("range.bin"); + + let r = decrypt_stream_chunked_range( + &encrypted, + &out, + &key, + &nonce, + 1024, + data.len() as u64, + 0, + 1023, + ); + assert!(r.is_err()); + } + + #[test] + fn test_wrong_key_fails_decrypt() { + let dir = tempfile::tempdir().unwrap(); + let input = dir.path().join("input.bin"); + let encrypted = dir.path().join("encrypted.bin"); + let decrypted = dir.path().join("decrypted.bin"); + + std::fs::File::create(&input) + .unwrap() + .write_all(b"secret data") + .unwrap(); + + let key = [0x42u8; 32]; + let nonce = [0x01u8; 12]; + encrypt_stream_chunked(&input, &encrypted, &key, &nonce, None).unwrap(); + + let wrong_key = [0x43u8; 32]; + let result = decrypt_stream_chunked(&encrypted, &decrypted, &wrong_key, &nonce); + assert!(matches!(result, Err(CryptoError::DecryptionFailed(_)))); + } +} diff --git a/crates/myfsio-crypto/src/encryption.rs b/crates/myfsio-crypto/src/encryption.rs new file mode 100644 index 0000000..c8cf135 --- /dev/null +++ b/crates/myfsio-crypto/src/encryption.rs @@ -0,0 +1,492 @@ +use base64::engine::general_purpose::STANDARD as B64; +use base64::Engine; +use rand::RngCore; +use std::collections::HashMap; +use std::path::Path; + +use crate::aes_gcm::{ + decrypt_stream_chunked, decrypt_stream_chunked_range, encrypt_stream_chunked, CryptoError, +}; +use crate::kms::KmsService; + +#[derive(Debug, Clone, PartialEq)] +pub enum SseAlgorithm { + Aes256, + AwsKms, + CustomerProvided, +} + +impl SseAlgorithm { + pub fn as_str(&self) -> &'static str { + match self { + SseAlgorithm::Aes256 => "AES256", + SseAlgorithm::AwsKms => "aws:kms", + SseAlgorithm::CustomerProvided => "AES256", + } + } +} + +#[derive(Debug, Clone)] +pub struct EncryptionContext { + pub algorithm: SseAlgorithm, + pub kms_key_id: Option, + pub customer_key: Option>, +} + +#[derive(Debug, Clone)] +pub struct EncryptionMetadata { + pub algorithm: String, + pub nonce: String, + pub encrypted_data_key: Option, + pub kms_key_id: Option, + pub chunk_size: Option, + pub plaintext_size: Option, +} + +impl EncryptionMetadata { + pub fn to_metadata_map(&self) -> HashMap { + let mut map = HashMap::new(); + map.insert( + "x-amz-server-side-encryption".to_string(), + self.algorithm.clone(), + ); + map.insert("x-amz-encryption-nonce".to_string(), self.nonce.clone()); + if let Some(ref dk) = self.encrypted_data_key { + map.insert("x-amz-encrypted-data-key".to_string(), dk.clone()); + } + if let Some(ref kid) = self.kms_key_id { + map.insert("x-amz-encryption-key-id".to_string(), kid.clone()); + } + if let Some(cs) = self.chunk_size { + map.insert("x-amz-encryption-chunk-size".to_string(), cs.to_string()); + } + if let Some(ps) = self.plaintext_size { + map.insert( + "x-amz-encryption-plaintext-size".to_string(), + ps.to_string(), + ); + } + map + } + + pub fn from_metadata(meta: &HashMap) -> Option { + let algorithm = meta.get("x-amz-server-side-encryption")?; + let nonce = meta.get("x-amz-encryption-nonce")?; + Some(Self { + algorithm: algorithm.clone(), + nonce: nonce.clone(), + encrypted_data_key: meta.get("x-amz-encrypted-data-key").cloned(), + kms_key_id: meta.get("x-amz-encryption-key-id").cloned(), + chunk_size: meta + .get("x-amz-encryption-chunk-size") + .and_then(|s| s.parse().ok()), + plaintext_size: meta + .get("x-amz-encryption-plaintext-size") + .and_then(|s| s.parse().ok()), + }) + } + + pub fn is_encrypted(meta: &HashMap) -> bool { + meta.contains_key("x-amz-server-side-encryption") + } + + pub fn clean_metadata(meta: &mut HashMap) { + meta.remove("x-amz-server-side-encryption"); + meta.remove("x-amz-encryption-nonce"); + meta.remove("x-amz-encrypted-data-key"); + meta.remove("x-amz-encryption-key-id"); + meta.remove("x-amz-encryption-chunk-size"); + meta.remove("x-amz-encryption-plaintext-size"); + } +} + +pub struct EncryptionService { + master_key: [u8; 32], + kms: Option>, + config: EncryptionConfig, +} + +#[derive(Debug, Clone, Copy)] +pub struct EncryptionConfig { + pub chunk_size: usize, +} + +impl Default for EncryptionConfig { + fn default() -> Self { + Self { chunk_size: 65_536 } + } +} + +impl EncryptionService { + pub fn new(master_key: [u8; 32], kms: Option>) -> Self { + Self::with_config(master_key, kms, EncryptionConfig::default()) + } + + pub fn with_config( + master_key: [u8; 32], + kms: Option>, + config: EncryptionConfig, + ) -> Self { + Self { + master_key, + kms, + config, + } + } + + pub fn generate_data_key(&self) -> ([u8; 32], [u8; 12]) { + let mut data_key = [0u8; 32]; + let mut nonce = [0u8; 12]; + rand::thread_rng().fill_bytes(&mut data_key); + rand::thread_rng().fill_bytes(&mut nonce); + (data_key, nonce) + } + + pub fn wrap_data_key(&self, data_key: &[u8; 32]) -> Result { + use aes_gcm::aead::Aead; + use aes_gcm::{Aes256Gcm, KeyInit, Nonce}; + + let cipher = Aes256Gcm::new((&self.master_key).into()); + let mut nonce_bytes = [0u8; 12]; + rand::thread_rng().fill_bytes(&mut nonce_bytes); + let nonce = Nonce::from_slice(&nonce_bytes); + + let encrypted = cipher + .encrypt(nonce, data_key.as_slice()) + .map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?; + + let mut combined = Vec::with_capacity(12 + encrypted.len()); + combined.extend_from_slice(&nonce_bytes); + combined.extend_from_slice(&encrypted); + Ok(B64.encode(&combined)) + } + + pub fn unwrap_data_key(&self, wrapped_b64: &str) -> Result<[u8; 32], CryptoError> { + use aes_gcm::aead::Aead; + use aes_gcm::{Aes256Gcm, KeyInit, Nonce}; + + let combined = B64.decode(wrapped_b64).map_err(|e| { + CryptoError::EncryptionFailed(format!("Bad wrapped key encoding: {}", e)) + })?; + if combined.len() < 12 { + return Err(CryptoError::EncryptionFailed( + "Wrapped key too short".to_string(), + )); + } + + let (nonce_bytes, ciphertext) = combined.split_at(12); + let cipher = Aes256Gcm::new((&self.master_key).into()); + let nonce = Nonce::from_slice(nonce_bytes); + + let plaintext = cipher + .decrypt(nonce, ciphertext) + .map_err(|_| CryptoError::DecryptionFailed(0))?; + + if plaintext.len() != 32 { + return Err(CryptoError::InvalidKeySize(plaintext.len())); + } + let mut key = [0u8; 32]; + key.copy_from_slice(&plaintext); + Ok(key) + } + + pub async fn encrypt_object( + &self, + input_path: &Path, + output_path: &Path, + ctx: &EncryptionContext, + ) -> Result { + let (data_key, nonce) = self.generate_data_key(); + + let (encrypted_data_key, kms_key_id) = match ctx.algorithm { + SseAlgorithm::Aes256 => { + let wrapped = self.wrap_data_key(&data_key)?; + (Some(wrapped), None) + } + SseAlgorithm::AwsKms => { + let kms = self + .kms + .as_ref() + .ok_or_else(|| CryptoError::EncryptionFailed("KMS not available".into()))?; + let kid = ctx + .kms_key_id + .as_ref() + .ok_or_else(|| CryptoError::EncryptionFailed("No KMS key ID".into()))?; + let ciphertext = kms.encrypt_data(kid, &data_key).await?; + (Some(B64.encode(&ciphertext)), Some(kid.clone())) + } + SseAlgorithm::CustomerProvided => (None, None), + }; + + let actual_key = if ctx.algorithm == SseAlgorithm::CustomerProvided { + let ck = ctx + .customer_key + .as_ref() + .ok_or_else(|| CryptoError::EncryptionFailed("No customer key provided".into()))?; + if ck.len() != 32 { + return Err(CryptoError::InvalidKeySize(ck.len())); + } + let mut k = [0u8; 32]; + k.copy_from_slice(ck); + k + } else { + data_key + }; + + let plaintext_size = tokio::fs::metadata(input_path) + .await + .map_err(CryptoError::Io)? + .len(); + + let ip = input_path.to_owned(); + let op = output_path.to_owned(); + let ak = actual_key; + let n = nonce; + let chunk_size = self.config.chunk_size; + tokio::task::spawn_blocking(move || { + encrypt_stream_chunked(&ip, &op, &ak, &n, Some(chunk_size)) + }) + .await + .map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))??; + + Ok(EncryptionMetadata { + algorithm: ctx.algorithm.as_str().to_string(), + nonce: B64.encode(nonce), + encrypted_data_key, + kms_key_id, + chunk_size: Some(chunk_size), + plaintext_size: Some(plaintext_size), + }) + } + + async fn resolve_data_key( + &self, + enc_meta: &EncryptionMetadata, + customer_key: Option<&[u8]>, + ) -> Result<([u8; 32], [u8; 12]), CryptoError> { + let nonce_bytes = B64 + .decode(&enc_meta.nonce) + .map_err(|e| CryptoError::EncryptionFailed(format!("Bad nonce encoding: {}", e)))?; + if nonce_bytes.len() != 12 { + return Err(CryptoError::InvalidNonceSize(nonce_bytes.len())); + } + let nonce: [u8; 12] = nonce_bytes.try_into().unwrap(); + + let data_key: [u8; 32] = if let Some(ck) = customer_key { + if ck.len() != 32 { + return Err(CryptoError::InvalidKeySize(ck.len())); + } + let mut k = [0u8; 32]; + k.copy_from_slice(ck); + k + } else if enc_meta.algorithm == "aws:kms" { + let kms = self + .kms + .as_ref() + .ok_or_else(|| CryptoError::EncryptionFailed("KMS not available".into()))?; + let kid = enc_meta + .kms_key_id + .as_ref() + .ok_or_else(|| CryptoError::EncryptionFailed("No KMS key ID in metadata".into()))?; + let encrypted_dk = enc_meta.encrypted_data_key.as_ref().ok_or_else(|| { + CryptoError::EncryptionFailed("No encrypted data key in metadata".into()) + })?; + let ct = B64.decode(encrypted_dk).map_err(|e| { + CryptoError::EncryptionFailed(format!("Bad data key encoding: {}", e)) + })?; + let dk = kms.decrypt_data(kid, &ct).await?; + if dk.len() != 32 { + return Err(CryptoError::InvalidKeySize(dk.len())); + } + let mut k = [0u8; 32]; + k.copy_from_slice(&dk); + k + } else { + let wrapped = enc_meta.encrypted_data_key.as_ref().ok_or_else(|| { + CryptoError::EncryptionFailed("No encrypted data key in metadata".into()) + })?; + self.unwrap_data_key(wrapped)? + }; + + Ok((data_key, nonce)) + } + + pub async fn decrypt_object( + &self, + input_path: &Path, + output_path: &Path, + enc_meta: &EncryptionMetadata, + customer_key: Option<&[u8]>, + ) -> Result<(), CryptoError> { + let (data_key, nonce) = self.resolve_data_key(enc_meta, customer_key).await?; + + let ip = input_path.to_owned(); + let op = output_path.to_owned(); + tokio::task::spawn_blocking(move || decrypt_stream_chunked(&ip, &op, &data_key, &nonce)) + .await + .map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))??; + + Ok(()) + } + + pub async fn decrypt_object_range( + &self, + input_path: &Path, + output_path: &Path, + enc_meta: &EncryptionMetadata, + customer_key: Option<&[u8]>, + plain_start: u64, + plain_end_inclusive: u64, + ) -> Result { + let chunk_size = enc_meta.chunk_size.ok_or_else(|| { + CryptoError::EncryptionFailed("chunk_size missing from encryption metadata".into()) + })?; + let plaintext_size = enc_meta.plaintext_size.ok_or_else(|| { + CryptoError::EncryptionFailed("plaintext_size missing from encryption metadata".into()) + })?; + + let (data_key, nonce) = self.resolve_data_key(enc_meta, customer_key).await?; + + let ip = input_path.to_owned(); + let op = output_path.to_owned(); + tokio::task::spawn_blocking(move || { + decrypt_stream_chunked_range( + &ip, + &op, + &data_key, + &nonce, + chunk_size, + plaintext_size, + plain_start, + plain_end_inclusive, + ) + }) + .await + .map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))? + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + fn test_master_key() -> [u8; 32] { + [0x42u8; 32] + } + + #[test] + fn test_wrap_unwrap_data_key() { + let svc = EncryptionService::new(test_master_key(), None); + let dk = [0xAAu8; 32]; + let wrapped = svc.wrap_data_key(&dk).unwrap(); + let unwrapped = svc.unwrap_data_key(&wrapped).unwrap(); + assert_eq!(dk, unwrapped); + } + + #[tokio::test] + async fn test_encrypt_decrypt_object_sse_s3() { + let dir = tempfile::tempdir().unwrap(); + let input = dir.path().join("plain.bin"); + let encrypted = dir.path().join("enc.bin"); + let decrypted = dir.path().join("dec.bin"); + + let data = b"SSE-S3 encrypted content for testing!"; + std::fs::File::create(&input) + .unwrap() + .write_all(data) + .unwrap(); + + let svc = EncryptionService::new(test_master_key(), None); + + let ctx = EncryptionContext { + algorithm: SseAlgorithm::Aes256, + kms_key_id: None, + customer_key: None, + }; + + let meta = svc.encrypt_object(&input, &encrypted, &ctx).await.unwrap(); + assert_eq!(meta.algorithm, "AES256"); + assert!(meta.encrypted_data_key.is_some()); + + svc.decrypt_object(&encrypted, &decrypted, &meta, None) + .await + .unwrap(); + + let result = std::fs::read(&decrypted).unwrap(); + assert_eq!(result, data); + } + + #[tokio::test] + async fn test_encrypt_decrypt_object_sse_c() { + let dir = tempfile::tempdir().unwrap(); + let input = dir.path().join("plain.bin"); + let encrypted = dir.path().join("enc.bin"); + let decrypted = dir.path().join("dec.bin"); + + let data = b"SSE-C encrypted content!"; + std::fs::File::create(&input) + .unwrap() + .write_all(data) + .unwrap(); + + let customer_key = [0xBBu8; 32]; + let svc = EncryptionService::new(test_master_key(), None); + + let ctx = EncryptionContext { + algorithm: SseAlgorithm::CustomerProvided, + kms_key_id: None, + customer_key: Some(customer_key.to_vec()), + }; + + let meta = svc.encrypt_object(&input, &encrypted, &ctx).await.unwrap(); + assert!(meta.encrypted_data_key.is_none()); + + svc.decrypt_object(&encrypted, &decrypted, &meta, Some(&customer_key)) + .await + .unwrap(); + + let result = std::fs::read(&decrypted).unwrap(); + assert_eq!(result, data); + } + + #[test] + fn test_encryption_metadata_roundtrip() { + let meta = EncryptionMetadata { + algorithm: "AES256".to_string(), + nonce: "dGVzdG5vbmNlMTI=".to_string(), + encrypted_data_key: Some("c29tZWtleQ==".to_string()), + kms_key_id: None, + chunk_size: Some(65_536), + plaintext_size: Some(1_234_567), + }; + let map = meta.to_metadata_map(); + let restored = EncryptionMetadata::from_metadata(&map).unwrap(); + assert_eq!(restored.algorithm, "AES256"); + assert_eq!(restored.nonce, meta.nonce); + assert_eq!(restored.encrypted_data_key, meta.encrypted_data_key); + assert_eq!(restored.chunk_size, Some(65_536)); + assert_eq!(restored.plaintext_size, Some(1_234_567)); + } + + #[test] + fn test_encryption_metadata_legacy_missing_sizes() { + let mut map = HashMap::new(); + map.insert("x-amz-server-side-encryption".to_string(), "AES256".into()); + map.insert("x-amz-encryption-nonce".to_string(), "aGVsbG8=".into()); + let restored = EncryptionMetadata::from_metadata(&map).unwrap(); + assert_eq!(restored.chunk_size, None); + assert_eq!(restored.plaintext_size, None); + } + + #[test] + fn test_is_encrypted() { + let mut meta = HashMap::new(); + assert!(!EncryptionMetadata::is_encrypted(&meta)); + meta.insert( + "x-amz-server-side-encryption".to_string(), + "AES256".to_string(), + ); + assert!(EncryptionMetadata::is_encrypted(&meta)); + } +} diff --git a/crates/myfsio-crypto/src/hashing.rs b/crates/myfsio-crypto/src/hashing.rs new file mode 100644 index 0000000..31e2277 --- /dev/null +++ b/crates/myfsio-crypto/src/hashing.rs @@ -0,0 +1,138 @@ +use md5::{Digest, Md5}; +use sha2::Sha256; +use std::io::Read; +use std::path::Path; + +const CHUNK_SIZE: usize = 65536; + +pub fn md5_file(path: &Path) -> std::io::Result { + let mut file = std::fs::File::open(path)?; + let mut hasher = Md5::new(); + let mut buf = vec![0u8; CHUNK_SIZE]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + Ok(format!("{:x}", hasher.finalize())) +} + +pub fn md5_bytes(data: &[u8]) -> String { + let mut hasher = Md5::new(); + hasher.update(data); + format!("{:x}", hasher.finalize()) +} + +pub fn sha256_file(path: &Path) -> std::io::Result { + let mut file = std::fs::File::open(path)?; + let mut hasher = Sha256::new(); + let mut buf = vec![0u8; CHUNK_SIZE]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + Ok(format!("{:x}", hasher.finalize())) +} + +pub fn sha256_bytes(data: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(data); + format!("{:x}", hasher.finalize()) +} + +pub fn md5_sha256_file(path: &Path) -> std::io::Result<(String, String)> { + let mut file = std::fs::File::open(path)?; + let mut md5_hasher = Md5::new(); + let mut sha_hasher = Sha256::new(); + let mut buf = vec![0u8; CHUNK_SIZE]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + md5_hasher.update(&buf[..n]); + sha_hasher.update(&buf[..n]); + } + Ok(( + format!("{:x}", md5_hasher.finalize()), + format!("{:x}", sha_hasher.finalize()), + )) +} + +pub async fn md5_file_async(path: &Path) -> std::io::Result { + let path = path.to_owned(); + tokio::task::spawn_blocking(move || md5_file(&path)) + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))? +} + +pub async fn sha256_file_async(path: &Path) -> std::io::Result { + let path = path.to_owned(); + tokio::task::spawn_blocking(move || sha256_file(&path)) + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))? +} + +pub async fn md5_sha256_file_async(path: &Path) -> std::io::Result<(String, String)> { + let path = path.to_owned(); + tokio::task::spawn_blocking(move || md5_sha256_file(&path)) + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))? +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn test_md5_bytes() { + assert_eq!(md5_bytes(b""), "d41d8cd98f00b204e9800998ecf8427e"); + assert_eq!(md5_bytes(b"hello"), "5d41402abc4b2a76b9719d911017c592"); + } + + #[test] + fn test_sha256_bytes() { + let hash = sha256_bytes(b"hello"); + assert_eq!( + hash, + "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" + ); + } + + #[test] + fn test_md5_file() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(b"hello").unwrap(); + tmp.flush().unwrap(); + let hash = md5_file(tmp.path()).unwrap(); + assert_eq!(hash, "5d41402abc4b2a76b9719d911017c592"); + } + + #[test] + fn test_md5_sha256_file() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(b"hello").unwrap(); + tmp.flush().unwrap(); + let (md5, sha) = md5_sha256_file(tmp.path()).unwrap(); + assert_eq!(md5, "5d41402abc4b2a76b9719d911017c592"); + assert_eq!( + sha, + "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" + ); + } + + #[tokio::test] + async fn test_md5_file_async() { + let mut tmp = tempfile::NamedTempFile::new().unwrap(); + tmp.write_all(b"hello").unwrap(); + tmp.flush().unwrap(); + let hash = md5_file_async(tmp.path()).await.unwrap(); + assert_eq!(hash, "5d41402abc4b2a76b9719d911017c592"); + } +} diff --git a/crates/myfsio-crypto/src/kms.rs b/crates/myfsio-crypto/src/kms.rs new file mode 100644 index 0000000..afdd8d3 --- /dev/null +++ b/crates/myfsio-crypto/src/kms.rs @@ -0,0 +1,451 @@ +use aes_gcm::aead::Aead; +use aes_gcm::{Aes256Gcm, KeyInit, Nonce}; +use base64::engine::general_purpose::STANDARD as B64; +use base64::Engine; +use chrono::{DateTime, Utc}; +use rand::RngCore; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tokio::sync::RwLock; + +use crate::aes_gcm::CryptoError; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KmsKey { + #[serde(rename = "KeyId")] + pub key_id: String, + #[serde(rename = "Arn")] + pub arn: String, + #[serde(rename = "Description")] + pub description: String, + #[serde(rename = "CreationDate")] + pub creation_date: DateTime, + #[serde(rename = "Enabled")] + pub enabled: bool, + #[serde(rename = "KeyState")] + pub key_state: String, + #[serde(rename = "KeyUsage")] + pub key_usage: String, + #[serde(rename = "KeySpec")] + pub key_spec: String, + #[serde(rename = "EncryptedKeyMaterial")] + pub encrypted_key_material: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct KmsStore { + keys: Vec, +} + +pub struct KmsService { + keys_path: PathBuf, + master_key: Arc>, + keys: Arc>>, +} + +impl KmsService { + pub async fn new(keys_dir: &Path) -> Result { + std::fs::create_dir_all(keys_dir).map_err(CryptoError::Io)?; + + let keys_path = keys_dir.join("kms_keys.json"); + + let master_key = Self::load_or_create_master_key(&keys_dir.join("kms_master.key"))?; + + let keys = if keys_path.exists() { + let data = std::fs::read_to_string(&keys_path).map_err(CryptoError::Io)?; + let store: KmsStore = serde_json::from_str(&data) + .map_err(|e| CryptoError::EncryptionFailed(format!("Bad KMS store: {}", e)))?; + store.keys + } else { + Vec::new() + }; + + Ok(Self { + keys_path, + master_key: Arc::new(RwLock::new(master_key)), + keys: Arc::new(RwLock::new(keys)), + }) + } + + fn load_or_create_master_key(path: &Path) -> Result<[u8; 32], CryptoError> { + if path.exists() { + let encoded = std::fs::read_to_string(path).map_err(CryptoError::Io)?; + let decoded = B64.decode(encoded.trim()).map_err(|e| { + CryptoError::EncryptionFailed(format!("Bad master key encoding: {}", e)) + })?; + if decoded.len() != 32 { + return Err(CryptoError::InvalidKeySize(decoded.len())); + } + let mut key = [0u8; 32]; + key.copy_from_slice(&decoded); + Ok(key) + } else { + let mut key = [0u8; 32]; + rand::thread_rng().fill_bytes(&mut key); + let encoded = B64.encode(key); + std::fs::write(path, &encoded).map_err(CryptoError::Io)?; + Ok(key) + } + } + + fn encrypt_key_material( + master_key: &[u8; 32], + plaintext_key: &[u8], + ) -> Result { + let cipher = Aes256Gcm::new(master_key.into()); + let mut nonce_bytes = [0u8; 12]; + rand::thread_rng().fill_bytes(&mut nonce_bytes); + let nonce = Nonce::from_slice(&nonce_bytes); + + let ciphertext = cipher + .encrypt(nonce, plaintext_key) + .map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?; + + let mut combined = Vec::with_capacity(12 + ciphertext.len()); + combined.extend_from_slice(&nonce_bytes); + combined.extend_from_slice(&ciphertext); + Ok(B64.encode(&combined)) + } + + fn decrypt_key_material( + master_key: &[u8; 32], + encrypted_b64: &str, + ) -> Result, CryptoError> { + let combined = B64.decode(encrypted_b64).map_err(|e| { + CryptoError::EncryptionFailed(format!("Bad key material encoding: {}", e)) + })?; + if combined.len() < 12 { + return Err(CryptoError::EncryptionFailed( + "Encrypted key material too short".to_string(), + )); + } + + let (nonce_bytes, ciphertext) = combined.split_at(12); + let cipher = Aes256Gcm::new(master_key.into()); + let nonce = Nonce::from_slice(nonce_bytes); + + cipher + .decrypt(nonce, ciphertext) + .map_err(|_| CryptoError::DecryptionFailed(0)) + } + + async fn save(&self) -> Result<(), CryptoError> { + let keys = self.keys.read().await; + let store = KmsStore { keys: keys.clone() }; + let json = serde_json::to_string_pretty(&store) + .map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?; + std::fs::write(&self.keys_path, json).map_err(CryptoError::Io)?; + Ok(()) + } + + pub async fn create_key(&self, description: &str) -> Result { + let key_id = uuid::Uuid::new_v4().to_string(); + let arn = format!("arn:aws:kms:local:000000000000:key/{}", key_id); + + let mut plaintext_key = [0u8; 32]; + rand::thread_rng().fill_bytes(&mut plaintext_key); + + let master = self.master_key.read().await; + let encrypted = Self::encrypt_key_material(&master, &plaintext_key)?; + + let kms_key = KmsKey { + key_id: key_id.clone(), + arn, + description: description.to_string(), + creation_date: Utc::now(), + enabled: true, + key_state: "Enabled".to_string(), + key_usage: "ENCRYPT_DECRYPT".to_string(), + key_spec: "SYMMETRIC_DEFAULT".to_string(), + encrypted_key_material: encrypted, + }; + + self.keys.write().await.push(kms_key.clone()); + self.save().await?; + Ok(kms_key) + } + + pub async fn list_keys(&self) -> Vec { + self.keys.read().await.clone() + } + + pub async fn get_key(&self, key_id: &str) -> Option { + let keys = self.keys.read().await; + keys.iter() + .find(|k| k.key_id == key_id || k.arn == key_id) + .cloned() + } + + pub async fn delete_key(&self, key_id: &str) -> Result { + let mut keys = self.keys.write().await; + let len_before = keys.len(); + keys.retain(|k| k.key_id != key_id && k.arn != key_id); + let removed = keys.len() < len_before; + drop(keys); + if removed { + self.save().await?; + } + Ok(removed) + } + + pub async fn enable_key(&self, key_id: &str) -> Result { + let mut keys = self.keys.write().await; + if let Some(key) = keys.iter_mut().find(|k| k.key_id == key_id) { + key.enabled = true; + key.key_state = "Enabled".to_string(); + drop(keys); + self.save().await?; + Ok(true) + } else { + Ok(false) + } + } + + pub async fn disable_key(&self, key_id: &str) -> Result { + let mut keys = self.keys.write().await; + if let Some(key) = keys.iter_mut().find(|k| k.key_id == key_id) { + key.enabled = false; + key.key_state = "Disabled".to_string(); + drop(keys); + self.save().await?; + Ok(true) + } else { + Ok(false) + } + } + + pub async fn decrypt_data_key(&self, key_id: &str) -> Result, CryptoError> { + let keys = self.keys.read().await; + let key = keys + .iter() + .find(|k| k.key_id == key_id || k.arn == key_id) + .ok_or_else(|| CryptoError::EncryptionFailed("KMS key not found".to_string()))?; + + if !key.enabled { + return Err(CryptoError::EncryptionFailed( + "KMS key is disabled".to_string(), + )); + } + + let master = self.master_key.read().await; + Self::decrypt_key_material(&master, &key.encrypted_key_material) + } + + pub async fn encrypt_data( + &self, + key_id: &str, + plaintext: &[u8], + ) -> Result, CryptoError> { + let data_key = self.decrypt_data_key(key_id).await?; + if data_key.len() != 32 { + return Err(CryptoError::InvalidKeySize(data_key.len())); + } + + let key_arr: [u8; 32] = data_key.try_into().unwrap(); + let cipher = Aes256Gcm::new(&key_arr.into()); + let mut nonce_bytes = [0u8; 12]; + rand::thread_rng().fill_bytes(&mut nonce_bytes); + let nonce = Nonce::from_slice(&nonce_bytes); + + let ciphertext = cipher + .encrypt(nonce, plaintext) + .map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?; + + let mut result = Vec::with_capacity(12 + ciphertext.len()); + result.extend_from_slice(&nonce_bytes); + result.extend_from_slice(&ciphertext); + Ok(result) + } + + pub async fn decrypt_data( + &self, + key_id: &str, + ciphertext: &[u8], + ) -> Result, CryptoError> { + if ciphertext.len() < 12 { + return Err(CryptoError::EncryptionFailed( + "Ciphertext too short".to_string(), + )); + } + + let data_key = self.decrypt_data_key(key_id).await?; + if data_key.len() != 32 { + return Err(CryptoError::InvalidKeySize(data_key.len())); + } + + let key_arr: [u8; 32] = data_key.try_into().unwrap(); + let (nonce_bytes, ct) = ciphertext.split_at(12); + let cipher = Aes256Gcm::new(&key_arr.into()); + let nonce = Nonce::from_slice(nonce_bytes); + + cipher + .decrypt(nonce, ct) + .map_err(|_| CryptoError::DecryptionFailed(0)) + } + + pub async fn generate_data_key( + &self, + key_id: &str, + num_bytes: usize, + ) -> Result<(Vec, Vec), CryptoError> { + let kms_key = self.decrypt_data_key(key_id).await?; + if kms_key.len() != 32 { + return Err(CryptoError::InvalidKeySize(kms_key.len())); + } + + let mut plaintext_key = vec![0u8; num_bytes]; + rand::thread_rng().fill_bytes(&mut plaintext_key); + + let key_arr: [u8; 32] = kms_key.try_into().unwrap(); + let cipher = Aes256Gcm::new(&key_arr.into()); + let mut nonce_bytes = [0u8; 12]; + rand::thread_rng().fill_bytes(&mut nonce_bytes); + let nonce = Nonce::from_slice(&nonce_bytes); + + let encrypted = cipher + .encrypt(nonce, plaintext_key.as_slice()) + .map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?; + + let mut wrapped = Vec::with_capacity(12 + encrypted.len()); + wrapped.extend_from_slice(&nonce_bytes); + wrapped.extend_from_slice(&encrypted); + + Ok((plaintext_key, wrapped)) + } +} + +pub async fn load_or_create_master_key(keys_dir: &Path) -> Result<[u8; 32], CryptoError> { + std::fs::create_dir_all(keys_dir).map_err(CryptoError::Io)?; + let path = keys_dir.join("master.key"); + + if path.exists() { + let encoded = std::fs::read_to_string(&path).map_err(CryptoError::Io)?; + let decoded = B64.decode(encoded.trim()).map_err(|e| { + CryptoError::EncryptionFailed(format!("Bad master key encoding: {}", e)) + })?; + if decoded.len() != 32 { + return Err(CryptoError::InvalidKeySize(decoded.len())); + } + let mut key = [0u8; 32]; + key.copy_from_slice(&decoded); + Ok(key) + } else { + let mut key = [0u8; 32]; + rand::thread_rng().fill_bytes(&mut key); + let encoded = B64.encode(key); + std::fs::write(&path, &encoded).map_err(CryptoError::Io)?; + Ok(key) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_create_and_list_keys() { + let dir = tempfile::tempdir().unwrap(); + let kms = KmsService::new(dir.path()).await.unwrap(); + + let key = kms.create_key("test key").await.unwrap(); + assert!(key.enabled); + assert_eq!(key.description, "test key"); + assert!(key.key_id.len() > 0); + + let keys = kms.list_keys().await; + assert_eq!(keys.len(), 1); + assert_eq!(keys[0].key_id, key.key_id); + } + + #[tokio::test] + async fn test_enable_disable_key() { + let dir = tempfile::tempdir().unwrap(); + let kms = KmsService::new(dir.path()).await.unwrap(); + + let key = kms.create_key("toggle").await.unwrap(); + assert!(key.enabled); + + kms.disable_key(&key.key_id).await.unwrap(); + let k = kms.get_key(&key.key_id).await.unwrap(); + assert!(!k.enabled); + + kms.enable_key(&key.key_id).await.unwrap(); + let k = kms.get_key(&key.key_id).await.unwrap(); + assert!(k.enabled); + } + + #[tokio::test] + async fn test_delete_key() { + let dir = tempfile::tempdir().unwrap(); + let kms = KmsService::new(dir.path()).await.unwrap(); + + let key = kms.create_key("doomed").await.unwrap(); + assert!(kms.delete_key(&key.key_id).await.unwrap()); + assert!(kms.get_key(&key.key_id).await.is_none()); + assert_eq!(kms.list_keys().await.len(), 0); + } + + #[tokio::test] + async fn test_encrypt_decrypt_data() { + let dir = tempfile::tempdir().unwrap(); + let kms = KmsService::new(dir.path()).await.unwrap(); + + let key = kms.create_key("enc-key").await.unwrap(); + let plaintext = b"Hello, KMS!"; + + let ciphertext = kms.encrypt_data(&key.key_id, plaintext).await.unwrap(); + assert_ne!(&ciphertext, plaintext); + + let decrypted = kms.decrypt_data(&key.key_id, &ciphertext).await.unwrap(); + assert_eq!(decrypted, plaintext); + } + + #[tokio::test] + async fn test_generate_data_key() { + let dir = tempfile::tempdir().unwrap(); + let kms = KmsService::new(dir.path()).await.unwrap(); + + let key = kms.create_key("data-key-gen").await.unwrap(); + let (plaintext, wrapped) = kms.generate_data_key(&key.key_id, 32).await.unwrap(); + + assert_eq!(plaintext.len(), 32); + assert!(wrapped.len() > 32); + } + + #[tokio::test] + async fn test_disabled_key_cannot_encrypt() { + let dir = tempfile::tempdir().unwrap(); + let kms = KmsService::new(dir.path()).await.unwrap(); + + let key = kms.create_key("disabled").await.unwrap(); + kms.disable_key(&key.key_id).await.unwrap(); + + let result = kms.encrypt_data(&key.key_id, b"test").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_persistence_across_reload() { + let dir = tempfile::tempdir().unwrap(); + + let key_id = { + let kms = KmsService::new(dir.path()).await.unwrap(); + let key = kms.create_key("persistent").await.unwrap(); + key.key_id + }; + + let kms2 = KmsService::new(dir.path()).await.unwrap(); + let key = kms2.get_key(&key_id).await; + assert!(key.is_some()); + assert_eq!(key.unwrap().description, "persistent"); + } + + #[tokio::test] + async fn test_master_key_roundtrip() { + let dir = tempfile::tempdir().unwrap(); + let key1 = load_or_create_master_key(dir.path()).await.unwrap(); + let key2 = load_or_create_master_key(dir.path()).await.unwrap(); + assert_eq!(key1, key2); + } +} diff --git a/crates/myfsio-crypto/src/lib.rs b/crates/myfsio-crypto/src/lib.rs new file mode 100644 index 0000000..7f57a35 --- /dev/null +++ b/crates/myfsio-crypto/src/lib.rs @@ -0,0 +1,4 @@ +pub mod aes_gcm; +pub mod encryption; +pub mod hashing; +pub mod kms; diff --git a/crates/myfsio-server/Cargo.toml b/crates/myfsio-server/Cargo.toml new file mode 100644 index 0000000..eb0cd8a --- /dev/null +++ b/crates/myfsio-server/Cargo.toml @@ -0,0 +1,60 @@ +[package] +name = "myfsio-server" +version.workspace = true +edition.workspace = true + +[dependencies] +myfsio-common = { path = "../myfsio-common" } +myfsio-auth = { path = "../myfsio-auth" } +myfsio-crypto = { path = "../myfsio-crypto" } +myfsio-storage = { path = "../myfsio-storage" } +myfsio-xml = { path = "../myfsio-xml" } +base64 = { workspace = true } +md-5 = { workspace = true } +axum = { workspace = true } +tokio = { workspace = true } +tower = { workspace = true } +tower-http = { workspace = true } +hyper = { workspace = true } +bytes = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +serde_urlencoded = "0.7" +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +tokio-util = { workspace = true } +tokio-stream = { workspace = true } +chrono = { workspace = true } +uuid = { workspace = true } +futures = { workspace = true } +http-body = "1" +http-body-util = "0.1" +percent-encoding = { workspace = true } +quick-xml = { workspace = true } +mime_guess = "2" +crc32fast = { workspace = true } +sha2 = { workspace = true } +hex = { workspace = true } +duckdb = { workspace = true } +roxmltree = "0.20" +parking_lot = { workspace = true } +regex = "1" +multer = "3" +reqwest = { workspace = true } +aws-sdk-s3 = { workspace = true } +aws-config = { workspace = true } +aws-credential-types = { workspace = true } +aws-smithy-types = { workspace = true } +async-trait = { workspace = true } +rand = "0.8" +tera = { workspace = true } +cookie = { workspace = true } +subtle = { workspace = true } +clap = { workspace = true } +dotenvy = { workspace = true } +sysinfo = "0.32" +aes-gcm = { workspace = true } + +[dev-dependencies] +tempfile = "3" +tower = { workspace = true, features = ["util"] } diff --git a/crates/myfsio-server/src/config.rs b/crates/myfsio-server/src/config.rs new file mode 100644 index 0000000..ea83d9a --- /dev/null +++ b/crates/myfsio-server/src/config.rs @@ -0,0 +1,646 @@ +use std::net::SocketAddr; +use std::path::PathBuf; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct RateLimitSetting { + pub max_requests: u32, + pub window_seconds: u64, +} + +impl RateLimitSetting { + pub const fn new(max_requests: u32, window_seconds: u64) -> Self { + Self { + max_requests, + window_seconds, + } + } +} + +#[derive(Debug, Clone)] +pub struct ServerConfig { + pub bind_addr: SocketAddr, + pub ui_bind_addr: SocketAddr, + pub storage_root: PathBuf, + pub region: String, + pub iam_config_path: PathBuf, + pub sigv4_timestamp_tolerance_secs: u64, + pub presigned_url_min_expiry: u64, + pub presigned_url_max_expiry: u64, + pub secret_key: Option, + pub encryption_enabled: bool, + pub encryption_chunk_size_bytes: usize, + pub kms_enabled: bool, + pub kms_generate_data_key_min_bytes: usize, + pub kms_generate_data_key_max_bytes: usize, + pub gc_enabled: bool, + pub gc_interval_hours: f64, + pub gc_temp_file_max_age_hours: f64, + pub gc_multipart_max_age_days: u64, + pub gc_lock_file_max_age_hours: f64, + pub gc_dry_run: bool, + pub integrity_enabled: bool, + pub integrity_interval_hours: f64, + pub integrity_batch_size: usize, + pub integrity_auto_heal: bool, + pub integrity_dry_run: bool, + pub integrity_heal_concurrency: usize, + pub integrity_quarantine_retention_days: u64, + pub metrics_enabled: bool, + pub metrics_history_enabled: bool, + pub metrics_interval_minutes: u64, + pub metrics_retention_hours: u64, + pub metrics_history_interval_minutes: u64, + pub metrics_history_retention_hours: u64, + pub lifecycle_enabled: bool, + pub lifecycle_max_history_per_bucket: usize, + pub website_hosting_enabled: bool, + pub object_key_max_length_bytes: usize, + pub object_tag_limit: usize, + pub object_cache_max_size: usize, + pub bucket_config_cache_ttl_seconds: f64, + pub replication_connect_timeout_secs: u64, + pub replication_read_timeout_secs: u64, + pub replication_max_retries: u32, + pub replication_streaming_threshold_bytes: u64, + pub replication_max_failures_per_bucket: usize, + pub site_sync_enabled: bool, + pub site_sync_interval_secs: u64, + pub site_sync_batch_size: usize, + pub site_sync_connect_timeout_secs: u64, + pub site_sync_read_timeout_secs: u64, + pub site_sync_max_retries: u32, + pub site_sync_clock_skew_tolerance: f64, + pub site_id: Option, + pub site_endpoint: Option, + pub site_region: String, + pub site_priority: i32, + pub api_base_url: String, + pub num_trusted_proxies: usize, + pub allowed_redirect_hosts: Vec, + pub allow_internal_endpoints: bool, + pub cors_origins: Vec, + pub cors_methods: Vec, + pub cors_allow_headers: Vec, + pub cors_expose_headers: Vec, + pub session_lifetime_days: u64, + pub log_level: String, + pub multipart_min_part_size: u64, + pub bulk_delete_max_keys: usize, + pub stream_chunk_size: usize, + pub request_body_timeout_secs: u64, + pub ratelimit_default: RateLimitSetting, + pub ratelimit_list_buckets: RateLimitSetting, + pub ratelimit_bucket_ops: RateLimitSetting, + pub ratelimit_object_ops: RateLimitSetting, + pub ratelimit_head_ops: RateLimitSetting, + pub ratelimit_admin: RateLimitSetting, + pub ratelimit_storage_uri: String, + pub ui_enabled: bool, + pub templates_dir: PathBuf, + pub static_dir: PathBuf, +} + +impl ServerConfig { + pub fn from_env() -> Self { + let host = std::env::var("HOST").unwrap_or_else(|_| "127.0.0.1".to_string()); + let port: u16 = std::env::var("PORT") + .unwrap_or_else(|_| "5000".to_string()) + .parse() + .unwrap_or(5000); + let host_ip: std::net::IpAddr = host.parse().unwrap(); + let bind_addr = SocketAddr::new(host_ip, port); + let ui_port: u16 = std::env::var("UI_PORT") + .unwrap_or_else(|_| "5100".to_string()) + .parse() + .unwrap_or(5100); + let storage_root = std::env::var("STORAGE_ROOT").unwrap_or_else(|_| "./data".to_string()); + let region = std::env::var("AWS_REGION").unwrap_or_else(|_| "us-east-1".to_string()); + + let storage_path = PathBuf::from(&storage_root); + let iam_config_path = std::env::var("IAM_CONFIG") + .map(PathBuf::from) + .unwrap_or_else(|_| { + storage_path + .join(".myfsio.sys") + .join("config") + .join("iam.json") + }); + + let sigv4_timestamp_tolerance_secs: u64 = + std::env::var("SIGV4_TIMESTAMP_TOLERANCE_SECONDS") + .unwrap_or_else(|_| "900".to_string()) + .parse() + .unwrap_or(900); + + let presigned_url_min_expiry: u64 = std::env::var("PRESIGNED_URL_MIN_EXPIRY_SECONDS") + .unwrap_or_else(|_| "1".to_string()) + .parse() + .unwrap_or(1); + + let presigned_url_max_expiry: u64 = std::env::var("PRESIGNED_URL_MAX_EXPIRY_SECONDS") + .unwrap_or_else(|_| "604800".to_string()) + .parse() + .unwrap_or(604800); + + let secret_key = { + let env_key = std::env::var("SECRET_KEY").ok(); + match env_key { + Some(k) if !k.is_empty() && k != "dev-secret-key" => Some(k), + _ => { + let secret_file = storage_path + .join(".myfsio.sys") + .join("config") + .join(".secret"); + std::fs::read_to_string(&secret_file) + .ok() + .map(|s| s.trim().to_string()) + } + } + }; + + let encryption_enabled = parse_bool_env("ENCRYPTION_ENABLED", false); + let encryption_chunk_size_bytes = parse_usize_env("ENCRYPTION_CHUNK_SIZE_BYTES", 65_536); + + let kms_enabled = parse_bool_env("KMS_ENABLED", false); + let kms_generate_data_key_min_bytes = parse_usize_env("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1); + let kms_generate_data_key_max_bytes = + parse_usize_env("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024); + + let gc_enabled = parse_bool_env("GC_ENABLED", false); + let gc_interval_hours = parse_f64_env("GC_INTERVAL_HOURS", 6.0); + let gc_temp_file_max_age_hours = parse_f64_env("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0); + let gc_multipart_max_age_days = parse_u64_env("GC_MULTIPART_MAX_AGE_DAYS", 7); + let gc_lock_file_max_age_hours = parse_f64_env("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0); + let gc_dry_run = parse_bool_env("GC_DRY_RUN", false); + + let integrity_enabled = parse_bool_env("INTEGRITY_ENABLED", false); + let integrity_interval_hours = parse_f64_env("INTEGRITY_INTERVAL_HOURS", 24.0); + let integrity_batch_size = parse_usize_env("INTEGRITY_BATCH_SIZE", 10_000); + let integrity_auto_heal = parse_bool_env("INTEGRITY_AUTO_HEAL", false); + let integrity_dry_run = parse_bool_env("INTEGRITY_DRY_RUN", false); + let integrity_heal_concurrency = parse_usize_env("INTEGRITY_HEAL_CONCURRENCY", 4); + let integrity_quarantine_retention_days = + parse_u64_env("INTEGRITY_QUARANTINE_RETENTION_DAYS", 7); + + let metrics_enabled = parse_bool_env("OPERATION_METRICS_ENABLED", false); + + let metrics_history_enabled = parse_bool_env("METRICS_HISTORY_ENABLED", false); + + let metrics_interval_minutes = parse_u64_env("OPERATION_METRICS_INTERVAL_MINUTES", 5); + let metrics_retention_hours = parse_u64_env("OPERATION_METRICS_RETENTION_HOURS", 24); + let metrics_history_interval_minutes = parse_u64_env("METRICS_HISTORY_INTERVAL_MINUTES", 5); + let metrics_history_retention_hours = parse_u64_env("METRICS_HISTORY_RETENTION_HOURS", 24); + + let lifecycle_enabled = parse_bool_env("LIFECYCLE_ENABLED", false); + let lifecycle_max_history_per_bucket = + parse_usize_env("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50); + + let website_hosting_enabled = parse_bool_env("WEBSITE_HOSTING_ENABLED", false); + let object_key_max_length_bytes = parse_usize_env("OBJECT_KEY_MAX_LENGTH_BYTES", 1024); + let object_tag_limit = parse_usize_env("OBJECT_TAG_LIMIT", 50); + let object_cache_max_size = parse_usize_env("OBJECT_CACHE_MAX_SIZE", 100); + let bucket_config_cache_ttl_seconds = + parse_f64_env("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0); + + let replication_connect_timeout_secs = + parse_u64_env("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5); + let replication_read_timeout_secs = parse_u64_env("REPLICATION_READ_TIMEOUT_SECONDS", 30); + let replication_max_retries = parse_u64_env("REPLICATION_MAX_RETRIES", 2) as u32; + let replication_streaming_threshold_bytes = + parse_u64_env("REPLICATION_STREAMING_THRESHOLD_BYTES", 10_485_760); + let replication_max_failures_per_bucket = + parse_u64_env("REPLICATION_MAX_FAILURES_PER_BUCKET", 50) as usize; + + let site_sync_enabled = parse_bool_env("SITE_SYNC_ENABLED", false); + let site_sync_interval_secs = parse_u64_env("SITE_SYNC_INTERVAL_SECONDS", 60); + let site_sync_batch_size = parse_u64_env("SITE_SYNC_BATCH_SIZE", 100) as usize; + let site_sync_connect_timeout_secs = parse_u64_env("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10); + let site_sync_read_timeout_secs = parse_u64_env("SITE_SYNC_READ_TIMEOUT_SECONDS", 120); + let site_sync_max_retries = parse_u64_env("SITE_SYNC_MAX_RETRIES", 2) as u32; + let site_sync_clock_skew_tolerance: f64 = + std::env::var("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(1.0); + + let site_id = parse_optional_string_env("SITE_ID"); + let site_endpoint = parse_optional_string_env("SITE_ENDPOINT"); + let site_region = std::env::var("SITE_REGION").unwrap_or_else(|_| region.clone()); + let site_priority = parse_i32_env("SITE_PRIORITY", 100); + let api_base_url = std::env::var("API_BASE_URL") + .unwrap_or_else(|_| format!("http://{}", bind_addr)) + .trim_end_matches('/') + .to_string(); + let num_trusted_proxies = parse_usize_env("NUM_TRUSTED_PROXIES", 0); + let allowed_redirect_hosts = parse_list_env("ALLOWED_REDIRECT_HOSTS", ""); + let allow_internal_endpoints = parse_bool_env("ALLOW_INTERNAL_ENDPOINTS", false); + let cors_origins = parse_list_env("CORS_ORIGINS", "*"); + let cors_methods = parse_list_env("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS,HEAD"); + let cors_allow_headers = parse_list_env("CORS_ALLOW_HEADERS", "*"); + let cors_expose_headers = parse_list_env("CORS_EXPOSE_HEADERS", "*"); + let session_lifetime_days = parse_u64_env("SESSION_LIFETIME_DAYS", 1); + let log_level = std::env::var("LOG_LEVEL").unwrap_or_else(|_| "INFO".to_string()); + let multipart_min_part_size = parse_u64_env("MULTIPART_MIN_PART_SIZE", 5_242_880); + let bulk_delete_max_keys = parse_usize_env("BULK_DELETE_MAX_KEYS", 1000); + let stream_chunk_size = parse_usize_env("STREAM_CHUNK_SIZE", 1_048_576); + let request_body_timeout_secs = parse_u64_env("REQUEST_BODY_TIMEOUT_SECONDS", 60); + let ratelimit_default = + parse_rate_limit_env("RATE_LIMIT_DEFAULT", RateLimitSetting::new(5000, 60)); + let ratelimit_list_buckets = + parse_rate_limit_env("RATE_LIMIT_LIST_BUCKETS", ratelimit_default); + let ratelimit_bucket_ops = + parse_rate_limit_env("RATE_LIMIT_BUCKET_OPS", ratelimit_default); + let ratelimit_object_ops = + parse_rate_limit_env("RATE_LIMIT_OBJECT_OPS", ratelimit_default); + let ratelimit_head_ops = + parse_rate_limit_env("RATE_LIMIT_HEAD_OPS", ratelimit_default); + let ratelimit_admin = + parse_rate_limit_env("RATE_LIMIT_ADMIN", RateLimitSetting::new(60, 60)); + let ratelimit_storage_uri = + std::env::var("RATE_LIMIT_STORAGE_URI").unwrap_or_else(|_| "memory://".to_string()); + + let ui_enabled = parse_bool_env("UI_ENABLED", true); + let templates_dir = std::env::var("TEMPLATES_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| default_templates_dir()); + let static_dir = std::env::var("STATIC_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| default_static_dir()); + + Self { + bind_addr, + ui_bind_addr: SocketAddr::new(host_ip, ui_port), + storage_root: storage_path, + region, + iam_config_path, + sigv4_timestamp_tolerance_secs, + presigned_url_min_expiry, + presigned_url_max_expiry, + secret_key, + encryption_enabled, + encryption_chunk_size_bytes, + kms_enabled, + kms_generate_data_key_min_bytes, + kms_generate_data_key_max_bytes, + gc_enabled, + gc_interval_hours, + gc_temp_file_max_age_hours, + gc_multipart_max_age_days, + gc_lock_file_max_age_hours, + gc_dry_run, + integrity_enabled, + integrity_interval_hours, + integrity_batch_size, + integrity_auto_heal, + integrity_dry_run, + integrity_heal_concurrency, + integrity_quarantine_retention_days, + metrics_enabled, + metrics_history_enabled, + metrics_interval_minutes, + metrics_retention_hours, + metrics_history_interval_minutes, + metrics_history_retention_hours, + lifecycle_enabled, + lifecycle_max_history_per_bucket, + website_hosting_enabled, + object_key_max_length_bytes, + object_tag_limit, + object_cache_max_size, + bucket_config_cache_ttl_seconds, + replication_connect_timeout_secs, + replication_read_timeout_secs, + replication_max_retries, + replication_streaming_threshold_bytes, + replication_max_failures_per_bucket, + site_sync_enabled, + site_sync_interval_secs, + site_sync_batch_size, + site_sync_connect_timeout_secs, + site_sync_read_timeout_secs, + site_sync_max_retries, + site_sync_clock_skew_tolerance, + site_id, + site_endpoint, + site_region, + site_priority, + api_base_url, + num_trusted_proxies, + allowed_redirect_hosts, + allow_internal_endpoints, + cors_origins, + cors_methods, + cors_allow_headers, + cors_expose_headers, + session_lifetime_days, + log_level, + multipart_min_part_size, + bulk_delete_max_keys, + stream_chunk_size, + request_body_timeout_secs, + ratelimit_default, + ratelimit_list_buckets, + ratelimit_bucket_ops, + ratelimit_object_ops, + ratelimit_head_ops, + ratelimit_admin, + ratelimit_storage_uri, + ui_enabled, + templates_dir, + static_dir, + } + } +} + +impl Default for ServerConfig { + fn default() -> Self { + Self { + bind_addr: "127.0.0.1:5000".parse().unwrap(), + ui_bind_addr: "127.0.0.1:5100".parse().unwrap(), + storage_root: PathBuf::from("./data"), + region: "us-east-1".to_string(), + iam_config_path: PathBuf::from("./data/.myfsio.sys/config/iam.json"), + sigv4_timestamp_tolerance_secs: 900, + presigned_url_min_expiry: 1, + presigned_url_max_expiry: 604_800, + secret_key: None, + encryption_enabled: false, + encryption_chunk_size_bytes: 65_536, + kms_enabled: false, + kms_generate_data_key_min_bytes: 1, + kms_generate_data_key_max_bytes: 1024, + gc_enabled: false, + gc_interval_hours: 6.0, + gc_temp_file_max_age_hours: 24.0, + gc_multipart_max_age_days: 7, + gc_lock_file_max_age_hours: 1.0, + gc_dry_run: false, + integrity_enabled: false, + integrity_interval_hours: 24.0, + integrity_batch_size: 10_000, + integrity_auto_heal: false, + integrity_dry_run: false, + integrity_heal_concurrency: 4, + integrity_quarantine_retention_days: 7, + metrics_enabled: false, + metrics_history_enabled: false, + metrics_interval_minutes: 5, + metrics_retention_hours: 24, + metrics_history_interval_minutes: 5, + metrics_history_retention_hours: 24, + lifecycle_enabled: false, + lifecycle_max_history_per_bucket: 50, + website_hosting_enabled: false, + object_key_max_length_bytes: 1024, + object_tag_limit: 50, + object_cache_max_size: 100, + bucket_config_cache_ttl_seconds: 30.0, + replication_connect_timeout_secs: 5, + replication_read_timeout_secs: 30, + replication_max_retries: 2, + replication_streaming_threshold_bytes: 10_485_760, + replication_max_failures_per_bucket: 50, + site_sync_enabled: false, + site_sync_interval_secs: 60, + site_sync_batch_size: 100, + site_sync_connect_timeout_secs: 10, + site_sync_read_timeout_secs: 120, + site_sync_max_retries: 2, + site_sync_clock_skew_tolerance: 1.0, + site_id: None, + site_endpoint: None, + site_region: "us-east-1".to_string(), + site_priority: 100, + api_base_url: "http://127.0.0.1:5000".to_string(), + num_trusted_proxies: 0, + allowed_redirect_hosts: Vec::new(), + allow_internal_endpoints: false, + cors_origins: vec!["*".to_string()], + cors_methods: vec![ + "GET".to_string(), + "PUT".to_string(), + "POST".to_string(), + "DELETE".to_string(), + "OPTIONS".to_string(), + "HEAD".to_string(), + ], + cors_allow_headers: vec!["*".to_string()], + cors_expose_headers: vec!["*".to_string()], + session_lifetime_days: 1, + log_level: "INFO".to_string(), + multipart_min_part_size: 5_242_880, + bulk_delete_max_keys: 1000, + stream_chunk_size: 1_048_576, + request_body_timeout_secs: 60, + ratelimit_default: RateLimitSetting::new(5000, 60), + ratelimit_list_buckets: RateLimitSetting::new(5000, 60), + ratelimit_bucket_ops: RateLimitSetting::new(5000, 60), + ratelimit_object_ops: RateLimitSetting::new(5000, 60), + ratelimit_head_ops: RateLimitSetting::new(5000, 60), + ratelimit_admin: RateLimitSetting::new(60, 60), + ratelimit_storage_uri: "memory://".to_string(), + ui_enabled: true, + templates_dir: default_templates_dir(), + static_dir: default_static_dir(), + } + } +} + +fn default_templates_dir() -> PathBuf { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest_dir.join("templates") +} + +fn default_static_dir() -> PathBuf { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + for candidate in [ + manifest_dir.join("static"), + manifest_dir.join("..").join("..").join("..").join("static"), + ] { + if candidate.exists() { + return candidate; + } + } + manifest_dir.join("static") +} + +fn parse_u64_env(key: &str, default: u64) -> u64 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) +} + +fn parse_usize_env(key: &str, default: usize) -> usize { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) +} + +fn parse_i32_env(key: &str, default: i32) -> i32 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) +} + +fn parse_f64_env(key: &str, default: f64) -> f64 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) +} + +fn parse_bool_env(key: &str, default: bool) -> bool { + std::env::var(key) + .ok() + .map(|value| { + matches!( + value.trim().to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(default) +} + +fn parse_optional_string_env(key: &str) -> Option { + std::env::var(key) + .ok() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) +} + +fn parse_list_env(key: &str, default: &str) -> Vec { + std::env::var(key) + .unwrap_or_else(|_| default.to_string()) + .split(',') + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) + .collect() +} + +pub fn parse_rate_limit(value: &str) -> Option { + let trimmed = value.trim(); + if let Some((requests, window)) = trimmed.split_once('/') { + let max_requests = requests.trim().parse::().ok()?; + if max_requests == 0 { + return None; + } + let window_str = window.trim().to_ascii_lowercase(); + let window_seconds = if let Ok(n) = window_str.parse::() { + if n == 0 { + return None; + } + n + } else { + match window_str.as_str() { + "s" | "sec" | "second" | "seconds" => 1, + "m" | "min" | "minute" | "minutes" => 60, + "h" | "hr" | "hour" | "hours" => 3600, + "d" | "day" | "days" => 86_400, + _ => return None, + } + }; + return Some(RateLimitSetting::new(max_requests, window_seconds)); + } + + let parts = trimmed.split_whitespace().collect::>(); + if parts.len() != 3 || !parts[1].eq_ignore_ascii_case("per") { + return None; + } + let max_requests = parts[0].parse::().ok()?; + if max_requests == 0 { + return None; + } + let window_seconds = match parts[2].to_ascii_lowercase().as_str() { + "second" | "seconds" => 1, + "minute" | "minutes" => 60, + "hour" | "hours" => 3600, + "day" | "days" => 86_400, + _ => return None, + }; + Some(RateLimitSetting::new(max_requests, window_seconds)) +} + +fn parse_rate_limit_env(key: &str, default: RateLimitSetting) -> RateLimitSetting { + std::env::var(key) + .ok() + .and_then(|value| parse_rate_limit(&value)) + .unwrap_or(default) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Mutex, OnceLock}; + + fn env_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + + #[test] + fn parses_rate_limit_text() { + assert_eq!( + parse_rate_limit("200 per minute"), + Some(RateLimitSetting::new(200, 60)) + ); + assert_eq!( + parse_rate_limit("3 per hours"), + Some(RateLimitSetting::new(3, 3600)) + ); + assert_eq!( + parse_rate_limit("50000/60"), + Some(RateLimitSetting::new(50000, 60)) + ); + assert_eq!( + parse_rate_limit("100/minute"), + Some(RateLimitSetting::new(100, 60)) + ); + assert_eq!(parse_rate_limit("0/60"), None); + assert_eq!(parse_rate_limit("0 per minute"), None); + assert_eq!(parse_rate_limit("bad"), None); + } + + #[test] + fn env_defaults_and_invalid_values_fall_back() { + let _guard = env_lock().lock().unwrap(); + std::env::remove_var("OBJECT_KEY_MAX_LENGTH_BYTES"); + std::env::set_var("OBJECT_TAG_LIMIT", "not-a-number"); + std::env::set_var("RATE_LIMIT_DEFAULT", "invalid"); + + let config = ServerConfig::from_env(); + + assert_eq!(config.object_key_max_length_bytes, 1024); + assert_eq!(config.object_tag_limit, 50); + assert_eq!(config.ratelimit_default, RateLimitSetting::new(5000, 60)); + + std::env::remove_var("OBJECT_TAG_LIMIT"); + std::env::remove_var("RATE_LIMIT_DEFAULT"); + } + + #[test] + fn env_overrides_new_values() { + let _guard = env_lock().lock().unwrap(); + std::env::set_var("OBJECT_KEY_MAX_LENGTH_BYTES", "2048"); + std::env::set_var("GC_DRY_RUN", "true"); + std::env::set_var("RATE_LIMIT_ADMIN", "7 per second"); + std::env::set_var("HOST", "127.0.0.1"); + std::env::set_var("PORT", "5501"); + std::env::remove_var("API_BASE_URL"); + + let config = ServerConfig::from_env(); + + assert_eq!(config.object_key_max_length_bytes, 2048); + assert!(config.gc_dry_run); + assert_eq!(config.ratelimit_admin, RateLimitSetting::new(7, 1)); + assert_eq!(config.api_base_url, "http://127.0.0.1:5501"); + + std::env::remove_var("OBJECT_KEY_MAX_LENGTH_BYTES"); + std::env::remove_var("GC_DRY_RUN"); + std::env::remove_var("RATE_LIMIT_ADMIN"); + std::env::remove_var("HOST"); + std::env::remove_var("PORT"); + } +} diff --git a/crates/myfsio-server/src/handlers/admin.rs b/crates/myfsio-server/src/handlers/admin.rs new file mode 100644 index 0000000..246f07e --- /dev/null +++ b/crates/myfsio-server/src/handlers/admin.rs @@ -0,0 +1,1414 @@ +use axum::body::Body; +use axum::extract::{Path, State}; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use axum::Extension; +use myfsio_common::types::Principal; +use myfsio_storage::traits::StorageEngine; + +use crate::services::site_registry::{PeerSite, SiteInfo}; +use crate::services::website_domains::{is_valid_domain, normalize_domain}; +use crate::state::AppState; + +fn json_response(status: StatusCode, value: serde_json::Value) -> Response { + ( + status, + [("content-type", "application/json")], + value.to_string(), + ) + .into_response() +} + +fn json_error(code: &str, message: &str, status: StatusCode) -> Response { + json_response( + status, + serde_json::json!({"error": {"code": code, "message": message}}), + ) +} + +fn push_issue(result: &mut serde_json::Value, issue: serde_json::Value) { + if let Some(items) = result + .get_mut("issues") + .and_then(|value| value.as_array_mut()) + { + items.push(issue); + } +} + +fn require_admin(principal: &Principal) -> Option { + if !principal.is_admin { + return Some(json_error( + "AccessDenied", + "Admin access required", + StatusCode::FORBIDDEN, + )); + } + None +} + +fn require_iam_action(state: &AppState, principal: &Principal, action: &str) -> Option { + if !state.iam.authorize(principal, None, action, None) { + return Some(json_error( + "AccessDenied", + &format!("Requires {} permission", action), + StatusCode::FORBIDDEN, + )); + } + None +} + +async fn read_json_body(body: Body) -> Option { + let bytes = http_body_util::BodyExt::collect(body) + .await + .ok()? + .to_bytes(); + serde_json::from_slice(&bytes).ok() +} + +fn validate_site_id(site_id: &str) -> Option { + if site_id.is_empty() || site_id.len() > 63 { + return Some("site_id must be 1-63 characters".to_string()); + } + let first = site_id.chars().next().unwrap(); + if !first.is_ascii_alphanumeric() { + return Some("site_id must start with alphanumeric".to_string()); + } + if !site_id + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') + { + return Some("site_id must contain only alphanumeric, hyphens, underscores".to_string()); + } + None +} + +fn validate_endpoint(endpoint: &str) -> Option { + if !endpoint.starts_with("http://") && !endpoint.starts_with("https://") { + return Some("Endpoint must be http or https URL".to_string()); + } + None +} + +fn validate_region(region: &str) -> Option { + let re = regex::Regex::new(r"^[a-z]{2,}-[a-z]+-\d+$").unwrap(); + if !re.is_match(region) { + return Some("Region must match format like us-east-1".to_string()); + } + None +} + +fn validate_priority(priority: i64) -> Option { + if priority < 0 || priority > 1000 { + return Some("Priority must be between 0 and 1000".to_string()); + } + None +} + +pub async fn get_local_site( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + + if let Some(ref registry) = state.site_registry { + if let Some(local) = registry.get_local_site() { + return json_response(StatusCode::OK, serde_json::to_value(&local).unwrap()); + } + } + + json_error( + "NotFound", + "Local site not configured", + StatusCode::NOT_FOUND, + ) +} + +pub async fn update_local_site( + State(state): State, + Extension(principal): Extension, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "InvalidRequest", + "Site registry not available", + StatusCode::BAD_REQUEST, + ) + } + }; + + let payload = match read_json_body(body).await { + Some(v) => v, + None => { + return json_error( + "MalformedJSON", + "Invalid JSON body", + StatusCode::BAD_REQUEST, + ) + } + }; + + let site_id = match payload.get("site_id").and_then(|v| v.as_str()) { + Some(s) => s.to_string(), + None => { + return json_error( + "ValidationError", + "site_id is required", + StatusCode::BAD_REQUEST, + ) + } + }; + + if let Some(err) = validate_site_id(&site_id) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + + let endpoint = payload + .get("endpoint") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + if !endpoint.is_empty() { + if let Some(err) = validate_endpoint(&endpoint) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + } + + if let Some(p) = payload.get("priority").and_then(|v| v.as_i64()) { + if let Some(err) = validate_priority(p) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + } + + if let Some(r) = payload.get("region").and_then(|v| v.as_str()) { + if let Some(err) = validate_region(r) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + } + + let existing = registry.get_local_site(); + let site = SiteInfo { + site_id: site_id.clone(), + endpoint, + region: payload + .get("region") + .and_then(|v| v.as_str()) + .unwrap_or("us-east-1") + .to_string(), + priority: payload + .get("priority") + .and_then(|v| v.as_i64()) + .unwrap_or(100) as i32, + display_name: payload + .get("display_name") + .and_then(|v| v.as_str()) + .unwrap_or(&site_id) + .to_string(), + created_at: existing.and_then(|e| e.created_at), + }; + + registry.set_local_site(site.clone()); + json_response(StatusCode::OK, serde_json::to_value(&site).unwrap()) +} + +pub async fn list_all_sites( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_response( + StatusCode::OK, + serde_json::json!({"local": null, "peers": [], "total_peers": 0}), + ) + } + }; + + let local = registry.get_local_site(); + let peers = registry.list_peers(); + + json_response( + StatusCode::OK, + serde_json::json!({ + "local": local, + "peers": peers, + "total_peers": peers.len(), + }), + ) +} + +pub async fn register_peer_site( + State(state): State, + Extension(principal): Extension, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "InvalidRequest", + "Site registry not available", + StatusCode::BAD_REQUEST, + ) + } + }; + + let payload = match read_json_body(body).await { + Some(v) => v, + None => { + return json_error( + "MalformedJSON", + "Invalid JSON body", + StatusCode::BAD_REQUEST, + ) + } + }; + + let site_id = match payload.get("site_id").and_then(|v| v.as_str()) { + Some(s) => s.to_string(), + None => { + return json_error( + "ValidationError", + "site_id is required", + StatusCode::BAD_REQUEST, + ) + } + }; + if let Some(err) = validate_site_id(&site_id) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + + let endpoint = match payload.get("endpoint").and_then(|v| v.as_str()) { + Some(e) => e.to_string(), + None => { + return json_error( + "ValidationError", + "endpoint is required", + StatusCode::BAD_REQUEST, + ) + } + }; + if let Some(err) = validate_endpoint(&endpoint) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + + let region = payload + .get("region") + .and_then(|v| v.as_str()) + .unwrap_or("us-east-1") + .to_string(); + if let Some(err) = validate_region(®ion) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + + let priority = payload + .get("priority") + .and_then(|v| v.as_i64()) + .unwrap_or(100); + if let Some(err) = validate_priority(priority) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + + if registry.get_peer(&site_id).is_some() { + return json_error( + "AlreadyExists", + &format!("Peer site '{}' already exists", site_id), + StatusCode::CONFLICT, + ); + } + + let peer = PeerSite { + site_id: site_id.clone(), + endpoint, + region, + priority: priority as i32, + display_name: payload + .get("display_name") + .and_then(|v| v.as_str()) + .unwrap_or(&site_id) + .to_string(), + connection_id: payload + .get("connection_id") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + created_at: Some(chrono::Utc::now().to_rfc3339()), + is_healthy: false, + last_health_check: None, + }; + + registry.add_peer(peer.clone()); + json_response(StatusCode::CREATED, serde_json::to_value(&peer).unwrap()) +} + +pub async fn get_peer_site( + State(state): State, + Extension(principal): Extension, + Path(site_id): Path, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "NotFound", + "Site registry not available", + StatusCode::NOT_FOUND, + ) + } + }; + + match registry.get_peer(&site_id) { + Some(peer) => json_response(StatusCode::OK, serde_json::to_value(&peer).unwrap()), + None => json_error( + "NotFound", + &format!("Peer site '{}' not found", site_id), + StatusCode::NOT_FOUND, + ), + } +} + +pub async fn update_peer_site( + State(state): State, + Extension(principal): Extension, + Path(site_id): Path, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "NotFound", + "Site registry not available", + StatusCode::NOT_FOUND, + ) + } + }; + + let existing = match registry.get_peer(&site_id) { + Some(p) => p, + None => { + return json_error( + "NotFound", + &format!("Peer site '{}' not found", site_id), + StatusCode::NOT_FOUND, + ) + } + }; + + let payload = match read_json_body(body).await { + Some(v) => v, + None => { + return json_error( + "MalformedJSON", + "Invalid JSON body", + StatusCode::BAD_REQUEST, + ) + } + }; + + if let Some(ep) = payload.get("endpoint").and_then(|v| v.as_str()) { + if let Some(err) = validate_endpoint(ep) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + } + if let Some(p) = payload.get("priority").and_then(|v| v.as_i64()) { + if let Some(err) = validate_priority(p) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + } + if let Some(r) = payload.get("region").and_then(|v| v.as_str()) { + if let Some(err) = validate_region(r) { + return json_error("ValidationError", &err, StatusCode::BAD_REQUEST); + } + } + + let peer = PeerSite { + site_id: site_id.clone(), + endpoint: payload + .get("endpoint") + .and_then(|v| v.as_str()) + .unwrap_or(&existing.endpoint) + .to_string(), + region: payload + .get("region") + .and_then(|v| v.as_str()) + .unwrap_or(&existing.region) + .to_string(), + priority: payload + .get("priority") + .and_then(|v| v.as_i64()) + .unwrap_or(existing.priority as i64) as i32, + display_name: payload + .get("display_name") + .and_then(|v| v.as_str()) + .unwrap_or(&existing.display_name) + .to_string(), + connection_id: payload + .get("connection_id") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .or(existing.connection_id), + created_at: existing.created_at, + is_healthy: existing.is_healthy, + last_health_check: existing.last_health_check, + }; + + registry.update_peer(peer.clone()); + json_response(StatusCode::OK, serde_json::to_value(&peer).unwrap()) +} + +pub async fn delete_peer_site( + State(state): State, + Extension(principal): Extension, + Path(site_id): Path, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "NotFound", + "Site registry not available", + StatusCode::NOT_FOUND, + ) + } + }; + + if !registry.delete_peer(&site_id) { + return json_error( + "NotFound", + &format!("Peer site '{}' not found", site_id), + StatusCode::NOT_FOUND, + ); + } + StatusCode::NO_CONTENT.into_response() +} + +pub async fn check_peer_health( + State(state): State, + Extension(principal): Extension, + Path(site_id): Path, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "NotFound", + "Site registry not available", + StatusCode::NOT_FOUND, + ) + } + }; + + if registry.get_peer(&site_id).is_none() { + return json_error( + "NotFound", + &format!("Peer site '{}' not found", site_id), + StatusCode::NOT_FOUND, + ); + } + + let peer = registry.get_peer(&site_id).unwrap(); + let checked_at = chrono::Utc::now().timestamp_millis() as f64 / 1000.0; + let mut is_healthy = false; + let mut error: Option = None; + + if let Some(connection_id) = peer.connection_id.as_deref() { + if let Some(connection) = state.connections.get(connection_id) { + is_healthy = state.replication.check_endpoint(&connection).await; + if !is_healthy { + error = Some(format!( + "Cannot reach endpoint: {}", + connection.endpoint_url + )); + } + } else { + error = Some(format!("Connection '{}' not found", connection_id)); + } + } else { + error = Some("No connection configured for this peer".to_string()); + } + + registry.update_health(&site_id, is_healthy); + + json_response( + StatusCode::OK, + serde_json::json!({ + "site_id": site_id, + "is_healthy": is_healthy, + "error": error, + "checked_at": checked_at, + }), + ) +} + +pub async fn get_topology( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_response( + StatusCode::OK, + serde_json::json!({"sites": [], "total": 0, "healthy_count": 0}), + ) + } + }; + + let local = registry.get_local_site(); + let peers = registry.list_peers(); + + let mut sites: Vec = Vec::new(); + if let Some(l) = local { + let mut v = serde_json::to_value(&l).unwrap(); + v.as_object_mut() + .unwrap() + .insert("is_local".to_string(), serde_json::json!(true)); + v.as_object_mut() + .unwrap() + .insert("is_healthy".to_string(), serde_json::json!(true)); + sites.push(v); + } + for p in &peers { + let mut v = serde_json::to_value(p).unwrap(); + v.as_object_mut() + .unwrap() + .insert("is_local".to_string(), serde_json::json!(false)); + sites.push(v); + } + + sites.sort_by_key(|s| s.get("priority").and_then(|v| v.as_i64()).unwrap_or(100)); + + let healthy_count = sites + .iter() + .filter(|s| { + s.get("is_healthy") + .and_then(|v| v.as_bool()) + .unwrap_or(false) + }) + .count(); + + json_response( + StatusCode::OK, + serde_json::json!({ + "sites": sites, + "total": sites.len(), + "healthy_count": healthy_count, + }), + ) +} + +pub async fn check_bidirectional_status( + State(state): State, + Extension(principal): Extension, + Path(site_id): Path, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let registry = match &state.site_registry { + Some(r) => r, + None => { + return json_error( + "NotFound", + "Site registry not available", + StatusCode::NOT_FOUND, + ) + } + }; + + if registry.get_peer(&site_id).is_none() { + return json_error( + "NotFound", + &format!("Peer site '{}' not found", site_id), + StatusCode::NOT_FOUND, + ); + } + + let local = registry.get_local_site(); + let peer = registry.get_peer(&site_id).unwrap(); + let local_bidirectional_rules: Vec = state + .replication + .list_rules() + .into_iter() + .filter(|rule| { + peer.connection_id + .as_deref() + .map(|connection_id| rule.target_connection_id == connection_id) + .unwrap_or(false) + && rule.mode == crate::services::replication::MODE_BIDIRECTIONAL + }) + .map(|rule| { + serde_json::json!({ + "bucket_name": rule.bucket_name, + "target_bucket": rule.target_bucket, + "enabled": rule.enabled, + }) + }) + .collect(); + + let mut result = serde_json::json!({ + "site_id": site_id, + "local_site_id": local.as_ref().map(|l| l.site_id.clone()), + "local_endpoint": local.as_ref().map(|l| l.endpoint.clone()), + "local_bidirectional_rules": local_bidirectional_rules, + "local_site_sync_enabled": state.config.site_sync_enabled, + "remote_status": null, + "issues": Vec::::new(), + "is_fully_configured": false, + }); + + if local + .as_ref() + .map(|site| site.site_id.trim().is_empty()) + .unwrap_or(true) + { + push_issue( + &mut result, + serde_json::json!({ + "code": "NO_LOCAL_SITE_ID", + "message": "Local site identity not configured", + "severity": "error", + }), + ); + } + if local + .as_ref() + .map(|site| site.endpoint.trim().is_empty()) + .unwrap_or(true) + { + push_issue( + &mut result, + serde_json::json!({ + "code": "NO_LOCAL_ENDPOINT", + "message": "Local site endpoint not configured (remote site cannot reach back)", + "severity": "error", + }), + ); + } + + let Some(connection_id) = peer.connection_id.as_deref() else { + push_issue( + &mut result, + serde_json::json!({ + "code": "NO_CONNECTION", + "message": "No connection configured for this peer", + "severity": "error", + }), + ); + return json_response(StatusCode::OK, result); + }; + + let Some(connection) = state.connections.get(connection_id) else { + push_issue( + &mut result, + serde_json::json!({ + "code": "CONNECTION_NOT_FOUND", + "message": format!("Connection '{}' not found", connection_id), + "severity": "error", + }), + ); + return json_response(StatusCode::OK, result); + }; + + if result["local_bidirectional_rules"] + .as_array() + .map(|rules| rules.is_empty()) + .unwrap_or(true) + { + push_issue( + &mut result, + serde_json::json!({ + "code": "NO_LOCAL_BIDIRECTIONAL_RULES", + "message": "No bidirectional replication rules configured on this site", + "severity": "warning", + }), + ); + } + if !state.config.site_sync_enabled { + push_issue( + &mut result, + serde_json::json!({ + "code": "SITE_SYNC_DISABLED", + "message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.", + "severity": "warning", + }), + ); + } + if !state.replication.check_endpoint(&connection).await { + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_UNREACHABLE", + "message": "Remote endpoint is not reachable", + "severity": "error", + }), + ); + return json_response(StatusCode::OK, result); + } + + let admin_url = format!( + "{}/admin/sites", + connection.endpoint_url.trim_end_matches('/') + ); + match reqwest::Client::new() + .get(&admin_url) + .header("accept", "application/json") + .header("x-access-key", &connection.access_key) + .header("x-secret-key", &connection.secret_key) + .timeout(std::time::Duration::from_secs(10)) + .send() + .await + { + Ok(resp) if resp.status().is_success() => match resp.json::().await { + Ok(remote_data) => { + let remote_local = remote_data + .get("local") + .cloned() + .unwrap_or(serde_json::Value::Null); + let remote_peers = remote_data + .get("peers") + .and_then(|value| value.as_array()) + .cloned() + .unwrap_or_default(); + let mut has_peer_for_us = false; + let mut peer_connection_configured = false; + + for remote_peer in &remote_peers { + let matches_site = local + .as_ref() + .map(|site| { + remote_peer.get("site_id").and_then(|v| v.as_str()) + == Some(site.site_id.as_str()) + || remote_peer.get("endpoint").and_then(|v| v.as_str()) + == Some(site.endpoint.as_str()) + }) + .unwrap_or(false); + if matches_site { + has_peer_for_us = true; + peer_connection_configured = remote_peer + .get("connection_id") + .and_then(|v| v.as_str()) + .map(|v| !v.trim().is_empty()) + .unwrap_or(false); + break; + } + } + + result["remote_status"] = serde_json::json!({ + "reachable": true, + "local_site": remote_local, + "site_sync_enabled": serde_json::Value::Null, + "has_peer_for_us": has_peer_for_us, + "peer_connection_configured": peer_connection_configured, + "has_bidirectional_rules_for_us": serde_json::Value::Null, + }); + + if !has_peer_for_us { + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_NO_PEER_FOR_US", + "message": "Remote site does not have this site registered as a peer", + "severity": "error", + }), + ); + } else if !peer_connection_configured { + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_NO_CONNECTION_FOR_US", + "message": "Remote site has us as peer but no connection configured (cannot push back)", + "severity": "error", + }), + ); + } + } + Err(_) => { + result["remote_status"] = serde_json::json!({ + "reachable": true, + "invalid_response": true, + }); + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_INVALID_RESPONSE", + "message": "Remote admin API returned invalid JSON", + "severity": "warning", + }), + ); + } + }, + Ok(resp) + if resp.status() == StatusCode::UNAUTHORIZED + || resp.status() == StatusCode::FORBIDDEN => + { + result["remote_status"] = serde_json::json!({ + "reachable": true, + "admin_access_denied": true, + }); + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_ADMIN_ACCESS_DENIED", + "message": "Cannot verify remote configuration (admin access denied)", + "severity": "warning", + }), + ); + } + Ok(resp) => { + result["remote_status"] = serde_json::json!({ + "reachable": true, + "admin_api_error": resp.status().as_u16(), + }); + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_ADMIN_API_ERROR", + "message": format!("Remote admin API returned status {}", resp.status().as_u16()), + "severity": "warning", + }), + ); + } + Err(_) => { + result["remote_status"] = serde_json::json!({ + "reachable": false, + "error": "Connection failed", + }); + push_issue( + &mut result, + serde_json::json!({ + "code": "REMOTE_ADMIN_UNREACHABLE", + "message": "Could not reach remote admin API", + "severity": "warning", + }), + ); + } + } + + let has_errors = result["issues"] + .as_array() + .map(|items| { + items.iter().any(|issue| { + issue.get("severity").and_then(|value| value.as_str()) == Some("error") + }) + }) + .unwrap_or(true); + result["is_fully_configured"] = serde_json::json!( + !has_errors + && result["local_bidirectional_rules"] + .as_array() + .map(|rules| !rules.is_empty()) + .unwrap_or(false) + ); + + json_response(StatusCode::OK, result) +} + +pub async fn iam_list_users( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:list_users") { + return err; + } + let users = state.iam.list_users().await; + json_response(StatusCode::OK, serde_json::json!({"users": users})) +} + +pub async fn iam_get_user( + State(state): State, + Extension(principal): Extension, + Path(identifier): Path, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:get_user") { + return err; + } + match state.iam.get_user(&identifier).await { + Some(user) => json_response(StatusCode::OK, user), + None => json_error( + "NotFound", + &format!("User '{}' not found", identifier), + StatusCode::NOT_FOUND, + ), + } +} + +pub async fn iam_get_user_policies( + State(state): State, + Extension(principal): Extension, + Path(identifier): Path, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:get_policy") { + return err; + } + match state.iam.get_user_policies(&identifier) { + Some(policies) => json_response(StatusCode::OK, serde_json::json!({"policies": policies})), + None => json_error( + "NotFound", + &format!("User '{}' not found", identifier), + StatusCode::NOT_FOUND, + ), + } +} + +pub async fn iam_create_access_key( + State(state): State, + Extension(principal): Extension, + Path(identifier): Path, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:create_key") { + return err; + } + match state.iam.create_access_key(&identifier) { + Ok(result) => json_response(StatusCode::CREATED, result), + Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST), + } +} + +pub async fn iam_delete_access_key( + State(state): State, + Extension(principal): Extension, + Path((_identifier, access_key)): Path<(String, String)>, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:delete_key") { + return err; + } + match state.iam.delete_access_key(&access_key) { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST), + } +} + +pub async fn iam_disable_user( + State(state): State, + Extension(principal): Extension, + Path(identifier): Path, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:disable_user") { + return err; + } + match state.iam.set_user_enabled(&identifier, false).await { + Ok(()) => json_response(StatusCode::OK, serde_json::json!({"status": "disabled"})), + Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST), + } +} + +pub async fn iam_enable_user( + State(state): State, + Extension(principal): Extension, + Path(identifier): Path, +) -> Response { + if let Some(err) = require_iam_action(&state, &principal, "iam:disable_user") { + return err; + } + match state.iam.set_user_enabled(&identifier, true).await { + Ok(()) => json_response(StatusCode::OK, serde_json::json!({"status": "enabled"})), + Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST), + } +} + +pub async fn list_website_domains( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let store = match &state.website_domains { + Some(s) => s, + None => { + return json_error( + "InvalidRequest", + "Website hosting is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + json_response(StatusCode::OK, serde_json::json!(store.list_all())) +} + +pub async fn create_website_domain( + State(state): State, + Extension(principal): Extension, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let store = match &state.website_domains { + Some(s) => s, + None => { + return json_error( + "InvalidRequest", + "Website hosting is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + + let payload = match read_json_body(body).await { + Some(v) => v, + None => { + return json_error( + "MalformedJSON", + "Invalid JSON body", + StatusCode::BAD_REQUEST, + ) + } + }; + + let domain = normalize_domain(payload.get("domain").and_then(|v| v.as_str()).unwrap_or("")); + if domain.is_empty() { + return json_error( + "ValidationError", + "domain is required", + StatusCode::BAD_REQUEST, + ); + } + if !is_valid_domain(&domain) { + return json_error( + "ValidationError", + &format!("Invalid domain: '{}'", domain), + StatusCode::BAD_REQUEST, + ); + } + + let bucket = payload + .get("bucket") + .and_then(|v| v.as_str()) + .unwrap_or("") + .trim() + .to_string(); + if bucket.is_empty() { + return json_error( + "ValidationError", + "bucket is required", + StatusCode::BAD_REQUEST, + ); + } + + match state.storage.bucket_exists(&bucket).await { + Ok(true) => {} + _ => { + return json_error( + "NoSuchBucket", + &format!("Bucket '{}' does not exist", bucket), + StatusCode::NOT_FOUND, + ) + } + } + + if store.get_bucket(&domain).is_some() { + return json_error( + "Conflict", + &format!("Domain '{}' is already mapped", domain), + StatusCode::CONFLICT, + ); + } + + store.set_mapping(&domain, &bucket); + json_response( + StatusCode::CREATED, + serde_json::json!({"domain": domain, "bucket": bucket}), + ) +} + +pub async fn get_website_domain( + State(state): State, + Extension(principal): Extension, + Path(domain): Path, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let store = match &state.website_domains { + Some(s) => s, + None => { + return json_error( + "InvalidRequest", + "Website hosting is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + + let domain = normalize_domain(&domain); + match store.get_bucket(&domain) { + Some(bucket) => json_response( + StatusCode::OK, + serde_json::json!({"domain": domain, "bucket": bucket}), + ), + None => json_error( + "NotFound", + &format!("No mapping found for domain '{}'", domain), + StatusCode::NOT_FOUND, + ), + } +} + +pub async fn update_website_domain( + State(state): State, + Extension(principal): Extension, + Path(domain): Path, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let store = match &state.website_domains { + Some(s) => s, + None => { + return json_error( + "InvalidRequest", + "Website hosting is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + + let domain = normalize_domain(&domain); + let payload = match read_json_body(body).await { + Some(v) => v, + None => { + return json_error( + "MalformedJSON", + "Invalid JSON body", + StatusCode::BAD_REQUEST, + ) + } + }; + + let bucket = payload + .get("bucket") + .and_then(|v| v.as_str()) + .unwrap_or("") + .trim() + .to_string(); + if bucket.is_empty() { + return json_error( + "ValidationError", + "bucket is required", + StatusCode::BAD_REQUEST, + ); + } + + match state.storage.bucket_exists(&bucket).await { + Ok(true) => {} + _ => { + return json_error( + "NoSuchBucket", + &format!("Bucket '{}' does not exist", bucket), + StatusCode::NOT_FOUND, + ) + } + } + + if store.get_bucket(&domain).is_none() { + return json_error( + "NotFound", + &format!("No mapping found for domain '{}'", domain), + StatusCode::NOT_FOUND, + ); + } + + store.set_mapping(&domain, &bucket); + json_response( + StatusCode::OK, + serde_json::json!({"domain": domain, "bucket": bucket}), + ) +} + +pub async fn delete_website_domain( + State(state): State, + Extension(principal): Extension, + Path(domain): Path, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let store = match &state.website_domains { + Some(s) => s, + None => { + return json_error( + "InvalidRequest", + "Website hosting is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + + let domain = normalize_domain(&domain); + if !store.delete_mapping(&domain) { + return json_error( + "NotFound", + &format!("No mapping found for domain '{}'", domain), + StatusCode::NOT_FOUND, + ); + } + StatusCode::NO_CONTENT.into_response() +} + +#[derive(serde::Deserialize, Default)] +pub struct PaginationQuery { + pub limit: Option, + pub offset: Option, +} + +pub async fn gc_status( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + match &state.gc { + Some(gc) => json_response(StatusCode::OK, gc.status().await), + None => json_response( + StatusCode::OK, + serde_json::json!({"enabled": false, "message": "GC is not enabled. Set GC_ENABLED=true to enable."}), + ), + } +} + +pub async fn gc_run( + State(state): State, + Extension(principal): Extension, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let gc = match &state.gc { + Some(gc) => gc, + None => { + return json_error( + "InvalidRequest", + "GC is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + + let payload = read_json_body(body).await.unwrap_or(serde_json::json!({})); + let dry_run = payload + .get("dry_run") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + match gc.run_now(dry_run).await { + Ok(result) => json_response(StatusCode::OK, result), + Err(e) => json_error("Conflict", &e, StatusCode::CONFLICT), + } +} + +pub async fn gc_history( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + match &state.gc { + Some(gc) => json_response( + StatusCode::OK, + serde_json::json!({"executions": gc.history().await}), + ), + None => json_response(StatusCode::OK, serde_json::json!({"executions": []})), + } +} + +pub async fn integrity_status( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + match &state.integrity { + Some(checker) => json_response(StatusCode::OK, checker.status().await), + None => json_response( + StatusCode::OK, + serde_json::json!({"enabled": false, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."}), + ), + } +} + +pub async fn integrity_run( + State(state): State, + Extension(principal): Extension, + body: Body, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + let checker = match &state.integrity { + Some(c) => c, + None => { + return json_error( + "InvalidRequest", + "Integrity checker is not enabled", + StatusCode::BAD_REQUEST, + ) + } + }; + + let payload = read_json_body(body).await.unwrap_or(serde_json::json!({})); + let dry_run = payload + .get("dry_run") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + let auto_heal = payload + .get("auto_heal") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + match checker.run_now(dry_run, auto_heal).await { + Ok(result) => json_response(StatusCode::OK, result), + Err(e) => json_error("Conflict", &e, StatusCode::CONFLICT), + } +} + +pub async fn integrity_history( + State(state): State, + Extension(principal): Extension, +) -> Response { + if let Some(err) = require_admin(&principal) { + return err; + } + match &state.integrity { + Some(checker) => json_response( + StatusCode::OK, + serde_json::json!({"executions": checker.history().await}), + ), + None => json_response(StatusCode::OK, serde_json::json!({"executions": []})), + } +} diff --git a/crates/myfsio-server/src/handlers/chunked.rs b/crates/myfsio-server/src/handlers/chunked.rs new file mode 100644 index 0000000..316dad1 --- /dev/null +++ b/crates/myfsio-server/src/handlers/chunked.rs @@ -0,0 +1,184 @@ +use std::pin::Pin; +use std::task::{Context, Poll}; + +use bytes::{Buf, BytesMut}; +use tokio::io::{AsyncRead, ReadBuf}; + +enum State { + ReadSize, + ReadData(u64), + ReadTrailer, + Finished, +} + +pub struct AwsChunkedStream { + inner: S, + buffer: BytesMut, + state: State, + pending: BytesMut, + eof: bool, +} + +impl AwsChunkedStream { + pub fn new(inner: S) -> Self { + Self { + inner, + buffer: BytesMut::with_capacity(8192), + state: State::ReadSize, + pending: BytesMut::new(), + eof: false, + } + } + + fn find_crlf(&self) -> Option { + for i in 0..self.buffer.len().saturating_sub(1) { + if self.buffer[i] == b'\r' && self.buffer[i + 1] == b'\n' { + return Some(i); + } + } + None + } + + fn parse_chunk_size(line: &[u8]) -> std::io::Result { + let text = std::str::from_utf8(line).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid chunk size encoding", + ) + })?; + let head = text.split(';').next().unwrap_or("").trim(); + u64::from_str_radix(head, 16).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("invalid chunk size: {}", head), + ) + }) + } + + fn try_advance(&mut self, out: &mut ReadBuf<'_>) -> std::io::Result { + loop { + if out.remaining() == 0 { + return Ok(true); + } + + if !self.pending.is_empty() { + let take = std::cmp::min(self.pending.len(), out.remaining()); + out.put_slice(&self.pending[..take]); + self.pending.advance(take); + continue; + } + + match self.state { + State::Finished => return Ok(true), + State::ReadSize => { + let idx = match self.find_crlf() { + Some(i) => i, + None => return Ok(false), + }; + let line = self.buffer.split_to(idx); + self.buffer.advance(2); + let size = Self::parse_chunk_size(&line)?; + if size == 0 { + self.state = State::ReadTrailer; + } else { + self.state = State::ReadData(size); + } + } + State::ReadData(remaining) => { + if self.buffer.is_empty() { + return Ok(false); + } + let avail = std::cmp::min(self.buffer.len() as u64, remaining) as usize; + let take = std::cmp::min(avail, out.remaining()); + out.put_slice(&self.buffer[..take]); + self.buffer.advance(take); + let new_remaining = remaining - take as u64; + if new_remaining == 0 { + if self.buffer.len() < 2 { + self.state = State::ReadData(0); + return Ok(false); + } + if &self.buffer[..2] != b"\r\n" { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "malformed chunk terminator", + )); + } + self.buffer.advance(2); + self.state = State::ReadSize; + } else { + self.state = State::ReadData(new_remaining); + } + } + State::ReadTrailer => { + let idx = match self.find_crlf() { + Some(i) => i, + None => return Ok(false), + }; + if idx == 0 { + self.buffer.advance(2); + self.state = State::Finished; + } else { + self.buffer.advance(idx + 2); + } + } + } + } + } +} + +impl AsyncRead for AwsChunkedStream +where + S: AsyncRead + Unpin, +{ + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + loop { + let before = buf.filled().len(); + let done = match self.try_advance(buf) { + Ok(v) => v, + Err(e) => return Poll::Ready(Err(e)), + }; + if buf.filled().len() > before { + return Poll::Ready(Ok(())); + } + if done { + return Poll::Ready(Ok(())); + } + if self.eof { + return Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "unexpected EOF in aws-chunked stream", + ))); + } + + let mut tmp = [0u8; 8192]; + let mut rb = ReadBuf::new(&mut tmp); + match Pin::new(&mut self.inner).poll_read(cx, &mut rb) { + Poll::Ready(Ok(())) => { + let n = rb.filled().len(); + if n == 0 { + self.eof = true; + continue; + } + self.buffer.extend_from_slice(rb.filled()); + } + Poll::Ready(Err(e)) => return Poll::Ready(Err(e)), + Poll::Pending => return Poll::Pending, + } + } + } +} + +pub fn decode_body(body: axum::body::Body) -> impl AsyncRead + Send + Unpin { + use futures::TryStreamExt; + let stream = tokio_util::io::StreamReader::new( + http_body_util::BodyStream::new(body) + .map_ok(|frame| frame.into_data().unwrap_or_default()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)), + ); + AwsChunkedStream::new(stream) +} diff --git a/crates/myfsio-server/src/handlers/config.rs b/crates/myfsio-server/src/handlers/config.rs new file mode 100644 index 0000000..6938d02 --- /dev/null +++ b/crates/myfsio-server/src/handlers/config.rs @@ -0,0 +1,1765 @@ +use axum::body::Body; +use axum::http::{HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Response}; +use chrono::{DateTime, Utc}; + +use myfsio_common::error::{S3Error, S3ErrorCode}; +use myfsio_storage::traits::StorageEngine; + +use crate::services::acl::{ + acl_from_object_metadata, acl_to_xml, create_canned_acl, store_object_acl, +}; +use crate::services::notifications::parse_notification_configurations; +use crate::services::object_lock::{ + ensure_retention_mutable, get_legal_hold, get_object_retention as retention_from_metadata, + set_legal_hold, set_object_retention as store_retention, ObjectLockRetention, RetentionMode, +}; +use crate::state::AppState; + +fn xml_response(status: StatusCode, xml: String) -> Response { + (status, [("content-type", "application/xml")], xml).into_response() +} + +fn stored_xml(value: &serde_json::Value) -> String { + match value { + serde_json::Value::String(s) => s.clone(), + other => other.to_string(), + } +} + +fn storage_err(err: myfsio_storage::error::StorageError) -> Response { + let s3err = S3Error::from(err); + let status = + StatusCode::from_u16(s3err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR); + ( + status, + [("content-type", "application/xml")], + s3err.to_xml(), + ) + .into_response() +} + +fn json_response(status: StatusCode, value: serde_json::Value) -> Response { + ( + status, + [("content-type", "application/json")], + value.to_string(), + ) + .into_response() +} + +fn custom_xml_error(status: StatusCode, code: &str, message: &str) -> Response { + let xml = format!( + "\ + {}{}", + xml_escape(code), + xml_escape(message), + ); + xml_response(status, xml) +} + +pub async fn get_versioning(state: &AppState, bucket: &str) -> Response { + match state.storage.get_versioning_status(bucket).await { + Ok(status) => { + let body = match status { + myfsio_common::types::VersioningStatus::Enabled => { + "\ + \ + Enabled\ + " + .to_string() + } + myfsio_common::types::VersioningStatus::Suspended => { + "\ + \ + Suspended\ + " + .to_string() + } + myfsio_common::types::VersioningStatus::Disabled => { + "\ + \ + " + .to_string() + } + }; + xml_response(StatusCode::OK, body) + } + Err(e) => storage_err(e), + } +} + +pub async fn put_versioning(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::from_code(S3ErrorCode::MalformedXML).to_xml(), + ); + } + }; + + let xml_str = String::from_utf8_lossy(&body_bytes); + let status = if xml_str.contains("Enabled") { + myfsio_common::types::VersioningStatus::Enabled + } else if xml_str.contains("Suspended") { + myfsio_common::types::VersioningStatus::Suspended + } else { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new( + S3ErrorCode::MalformedXML, + "VersioningConfiguration Status must be Enabled or Suspended", + ) + .to_xml(), + ); + }; + + match state.storage.set_versioning_status(bucket, status).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } +} + +pub async fn get_tagging(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + let mut xml = String::from( + "\ + ", + ); + for tag in &config.tags { + xml.push_str(&format!( + "{}{}", + tag.key, tag.value + )); + } + xml.push_str(""); + xml_response(StatusCode::OK, xml) + } + Err(e) => storage_err(e), + } +} + +pub async fn put_tagging(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::from_code(S3ErrorCode::MalformedXML).to_xml(), + ); + } + }; + + let xml_str = String::from_utf8_lossy(&body_bytes); + let tags = parse_tagging_xml(&xml_str); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.tags = tags; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_tagging(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.tags.clear(); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_cors(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(cors) = &config.cors { + xml_response(StatusCode::OK, stored_xml(cors)) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::new( + S3ErrorCode::NoSuchKey, + "The CORS configuration does not exist", + ) + .to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_cors(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + + let body_str = String::from_utf8_lossy(&body_bytes); + let value = serde_json::Value::String(body_str.to_string()); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.cors = Some(value); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_cors(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.cors = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_location(state: &AppState, _bucket: &str) -> Response { + let xml = format!( + "\ + {}", + state.config.region + ); + xml_response(StatusCode::OK, xml) +} + +pub async fn get_encryption(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(enc) = &config.encryption { + xml_response(StatusCode::OK, stored_xml(enc)) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::from_code(S3ErrorCode::ServerSideEncryptionConfigurationNotFoundError) + .to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_encryption(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + let value = serde_json::Value::String(String::from_utf8_lossy(&body_bytes).to_string()); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.encryption = Some(value); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_encryption(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.encryption = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_lifecycle(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(lc) = &config.lifecycle { + xml_response(StatusCode::OK, stored_xml(lc)) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::from_code(S3ErrorCode::NoSuchLifecycleConfiguration).to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_lifecycle(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + let value = serde_json::Value::String(String::from_utf8_lossy(&body_bytes).to_string()); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.lifecycle = Some(value); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_lifecycle(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.lifecycle = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_quota(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(quota) = &config.quota { + let usage = match state.storage.bucket_stats(bucket).await { + Ok(s) => s, + Err(e) => return storage_err(e), + }; + json_response( + StatusCode::OK, + serde_json::json!({ + "quota": { + "max_size_bytes": quota.max_bytes, + "max_objects": quota.max_objects, + }, + "usage": { + "bytes": usage.bytes, + "objects": usage.objects, + } + }), + ) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::new(S3ErrorCode::NoSuchKey, "No quota configuration found").to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_quota(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::InvalidArgument, "Invalid quota payload").to_xml(), + ); + } + }; + + let payload: serde_json::Value = match serde_json::from_slice(&body_bytes) { + Ok(v) => v, + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new( + S3ErrorCode::InvalidArgument, + "Request body must be valid JSON", + ) + .to_xml(), + ); + } + }; + + let max_size = payload.get("max_size_bytes").and_then(|v| v.as_u64()); + let max_objects = payload.get("max_objects").and_then(|v| v.as_u64()); + + if max_size.is_none() && max_objects.is_none() { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new( + S3ErrorCode::InvalidArgument, + "At least one of max_size_bytes or max_objects is required", + ) + .to_xml(), + ); + } + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.quota = Some(myfsio_common::types::QuotaConfig { + max_bytes: max_size, + max_objects, + }); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_quota(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.quota = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_policy(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(policy) = &config.policy { + json_response(StatusCode::OK, policy.clone()) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::from_code(S3ErrorCode::NoSuchBucketPolicy).to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_policy(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::MalformedXML, "Failed to read policy body").to_xml(), + ); + } + }; + + let policy: serde_json::Value = match serde_json::from_slice(&body_bytes) { + Ok(v) => v, + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::InvalidArgument, "Policy document must be JSON").to_xml(), + ); + } + }; + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.policy = Some(policy); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_policy(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.policy = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_policy_status(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + let is_public = config + .policy + .as_ref() + .map(policy_is_public) + .unwrap_or(false); + let xml = format!( + "{}", + if is_public { "TRUE" } else { "FALSE" } + ); + xml_response(StatusCode::OK, xml) + } + Err(e) => storage_err(e), + } +} + +pub async fn get_replication(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(replication) = &config.replication { + xml_response(StatusCode::OK, stored_xml(replication)) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::new( + S3ErrorCode::NoSuchKey, + "Replication configuration not found", + ) + .to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_replication(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::MalformedXML, "Failed to read replication body").to_xml(), + ); + } + }; + + if body_bytes.is_empty() { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::MalformedXML, "Request body is required").to_xml(), + ); + } + + let body_str = String::from_utf8_lossy(&body_bytes).to_string(); + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.replication = Some(serde_json::Value::String(body_str)); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_replication(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.replication = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +fn policy_is_public(policy: &serde_json::Value) -> bool { + let statements = match policy.get("Statement") { + Some(serde_json::Value::Array(items)) => items, + Some(item) => { + return is_allow_public_statement(item); + } + None => return false, + }; + + statements.iter().any(is_allow_public_statement) +} + +fn is_allow_public_statement(statement: &serde_json::Value) -> bool { + let effect_allow = statement + .get("Effect") + .and_then(|v| v.as_str()) + .map(|s| s.eq_ignore_ascii_case("allow")) + .unwrap_or(false); + if !effect_allow { + return false; + } + + match statement.get("Principal") { + Some(serde_json::Value::String(s)) => s == "*", + Some(serde_json::Value::Object(obj)) => obj.values().any(|v| v == "*"), + _ => false, + } +} + +pub async fn get_acl(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(acl) = &config.acl { + xml_response(StatusCode::OK, stored_xml(acl)) + } else { + let xml = "\ + \ + myfsiomyfsio\ + \ + \ + myfsiomyfsio\ + FULL_CONTROL\ + "; + xml_response(StatusCode::OK, xml.to_string()) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_acl(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + let value = serde_json::Value::String(String::from_utf8_lossy(&body_bytes).to_string()); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.acl = Some(value); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_website(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(ws) = &config.website { + xml_response(StatusCode::OK, stored_xml(ws)) + } else { + xml_response( + StatusCode::NOT_FOUND, + S3Error::new( + S3ErrorCode::NoSuchKey, + "The website configuration does not exist", + ) + .to_xml(), + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_website(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + let value = serde_json::Value::String(String::from_utf8_lossy(&body_bytes).to_string()); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.website = Some(value); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_website(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.website = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_object_lock(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(ol) = &config.object_lock { + xml_response(StatusCode::OK, stored_xml(ol)) + } else { + let xml = "\ + \ + Disabled\ + "; + xml_response(StatusCode::OK, xml.to_string()) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_notification(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + if let Some(n) = &config.notification { + xml_response(StatusCode::OK, stored_xml(n)) + } else { + let xml = "\ + \ + "; + xml_response(StatusCode::OK, xml.to_string()) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_logging(state: &AppState, bucket: &str) -> Response { + match state.storage.bucket_exists(bucket).await { + Ok(true) => {} + Ok(false) => { + return storage_err(myfsio_storage::error::StorageError::BucketNotFound( + bucket.to_string(), + )) + } + Err(e) => return storage_err(e), + } + + let logging_config = if let Some(cfg) = state.access_logging.get(bucket) { + Some(cfg) + } else { + match state.storage.get_bucket_config(bucket).await { + Ok(config) => { + let legacy = legacy_logging_config(&config); + if let Some(cfg) = legacy.as_ref() { + if let Err(err) = state.access_logging.set(bucket, cfg.clone()) { + tracing::warn!( + "Failed to migrate legacy bucket logging config for {}: {}", + bucket, + err + ); + } + } + legacy + } + Err(e) => return storage_err(e), + } + }; + + let body = match logging_config { + Some(cfg) if cfg.enabled => format!( + "\ + \ + {}{}\ + ", + xml_escape(&cfg.target_bucket), + xml_escape(&cfg.target_prefix), + ), + _ => "\ + " + .to_string(), + }; + xml_response(StatusCode::OK, body) +} + +fn xml_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +fn legacy_logging_config( + config: &myfsio_common::types::BucketConfig, +) -> Option { + let value = config.logging.as_ref()?; + match value { + serde_json::Value::String(xml) => parse_logging_config_xml(xml), + serde_json::Value::Object(_) => parse_logging_config_value(value.clone()), + _ => None, + } +} + +fn parse_logging_config_value( + value: serde_json::Value, +) -> Option { + let logging_enabled = value.get("LoggingEnabled")?; + let target_bucket = logging_enabled + .get("TargetBucket") + .and_then(|value| value.as_str()) + .map(str::trim) + .filter(|value| !value.is_empty())? + .to_string(); + let target_prefix = logging_enabled + .get("TargetPrefix") + .and_then(|value| value.as_str()) + .unwrap_or_default() + .to_string(); + Some(crate::services::access_logging::LoggingConfiguration { + target_bucket, + target_prefix, + enabled: true, + }) +} + +fn parse_logging_config_xml( + xml: &str, +) -> Option { + let doc = roxmltree::Document::parse(xml).ok()?; + let root = doc.root_element(); + let logging_enabled = root + .children() + .find(|n| n.is_element() && n.tag_name().name() == "LoggingEnabled")?; + let target_bucket = logging_enabled + .children() + .find(|n| n.is_element() && n.tag_name().name() == "TargetBucket") + .and_then(|n| n.text()) + .map(str::trim) + .filter(|value| !value.is_empty())? + .to_string(); + let target_prefix = logging_enabled + .children() + .find(|n| n.is_element() && n.tag_name().name() == "TargetPrefix") + .and_then(|n| n.text()) + .unwrap_or_default() + .to_string(); + Some(crate::services::access_logging::LoggingConfiguration { + target_bucket, + target_prefix, + enabled: true, + }) +} + +pub async fn put_object_lock(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + let value = serde_json::Value::String(String::from_utf8_lossy(&body_bytes).to_string()); + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.object_lock = Some(value); + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_object_lock(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.object_lock = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_notification(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "MalformedXML", + "Unable to parse XML document", + ) + } + }; + let raw = String::from_utf8_lossy(&body_bytes).to_string(); + let notification = if raw.trim().is_empty() { + None + } else { + match parse_notification_configurations(&raw) { + Ok(_) => Some(serde_json::Value::String(raw)), + Err(message) => { + let code = if message.contains("Destination URL is required") { + "InvalidArgument" + } else { + "MalformedXML" + }; + return custom_xml_error(StatusCode::BAD_REQUEST, code, &message); + } + } + }; + + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.notification = notification; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn delete_notification(state: &AppState, bucket: &str) -> Response { + match state.storage.get_bucket_config(bucket).await { + Ok(mut config) => { + config.notification = None; + match state.storage.set_bucket_config(bucket, &config).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_logging(state: &AppState, bucket: &str, body: Body) -> Response { + match state.storage.bucket_exists(bucket).await { + Ok(true) => {} + Ok(false) => { + return storage_err(myfsio_storage::error::StorageError::BucketNotFound( + bucket.to_string(), + )) + } + Err(e) => return storage_err(e), + } + + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => return StatusCode::BAD_REQUEST.into_response(), + }; + + if body_bytes.iter().all(u8::is_ascii_whitespace) { + state.access_logging.delete(bucket); + return StatusCode::OK.into_response(); + } + + let xml = match std::str::from_utf8(&body_bytes) { + Ok(s) => s, + Err(_) => { + return s3_error_response( + S3ErrorCode::MalformedXML, + "Unable to parse XML document", + StatusCode::BAD_REQUEST, + ) + } + }; + + let doc = match roxmltree::Document::parse(xml) { + Ok(d) => d, + Err(_) => { + return s3_error_response( + S3ErrorCode::MalformedXML, + "Unable to parse XML document", + StatusCode::BAD_REQUEST, + ) + } + }; + + let root = doc.root_element(); + let logging_enabled = root + .children() + .find(|n| n.is_element() && n.tag_name().name() == "LoggingEnabled"); + + let Some(le) = logging_enabled else { + state.access_logging.delete(bucket); + return StatusCode::OK.into_response(); + }; + + let target_bucket = le + .children() + .find(|n| n.is_element() && n.tag_name().name() == "TargetBucket") + .and_then(|n| n.text()) + .map(str::trim) + .unwrap_or_default(); + + if target_bucket.is_empty() { + return s3_error_response( + S3ErrorCode::InvalidArgument, + "TargetBucket is required", + StatusCode::BAD_REQUEST, + ); + } + + let cfg = crate::services::access_logging::LoggingConfiguration { + target_bucket: target_bucket.to_string(), + target_prefix: le + .children() + .find(|n| n.is_element() && n.tag_name().name() == "TargetPrefix") + .and_then(|n| n.text()) + .unwrap_or_default() + .to_string(), + enabled: true, + }; + + match state.storage.bucket_exists(&cfg.target_bucket).await { + Ok(true) => {} + Ok(false) => { + return s3_error_response( + S3ErrorCode::InvalidArgument, + "Target bucket does not exist", + StatusCode::BAD_REQUEST, + ) + } + Err(e) => return storage_err(e), + } + + if let Err(e) = state.access_logging.set(bucket, cfg) { + tracing::error!( + "Failed to persist bucket logging config for {}: {}", + bucket, + e + ); + return StatusCode::INTERNAL_SERVER_ERROR.into_response(); + } + + StatusCode::OK.into_response() +} + +pub async fn delete_logging(state: &AppState, bucket: &str) -> Response { + match state.storage.bucket_exists(bucket).await { + Ok(true) => {} + Ok(false) => { + return storage_err(myfsio_storage::error::StorageError::BucketNotFound( + bucket.to_string(), + )) + } + Err(e) => return storage_err(e), + } + state.access_logging.delete(bucket); + StatusCode::NO_CONTENT.into_response() +} + +fn s3_error_response(code: S3ErrorCode, message: &str, status: StatusCode) -> Response { + let err = S3Error::new(code, message.to_string()); + (status, [("content-type", "application/xml")], err.to_xml()).into_response() +} + +pub async fn list_object_versions( + state: &AppState, + bucket: &str, + prefix: Option<&str>, + delimiter: Option<&str>, + key_marker: Option<&str>, + version_id_marker: Option<&str>, + max_keys: usize, +) -> Response { + match state.storage.bucket_exists(bucket).await { + Ok(true) => {} + Ok(false) => { + return storage_err(myfsio_storage::error::StorageError::BucketNotFound( + bucket.to_string(), + )); + } + Err(e) => return storage_err(e), + } + + let params = myfsio_common::types::ListParams { + max_keys: usize::MAX, + prefix: prefix.map(ToOwned::to_owned), + ..Default::default() + }; + + let object_result = match state.storage.list_objects(bucket, ¶ms).await { + Ok(result) => result, + Err(e) => return storage_err(e), + }; + let live_objects = object_result.objects; + + let archived_versions = match state + .storage + .list_bucket_object_versions(bucket, prefix) + .await + { + Ok(versions) => versions, + Err(e) => return storage_err(e), + }; + + #[derive(Clone)] + struct Entry { + key: String, + version_id: String, + last_modified: chrono::DateTime, + etag: Option, + size: u64, + storage_class: String, + is_delete_marker: bool, + } + + let mut entries: Vec = Vec::with_capacity(live_objects.len() + archived_versions.len()); + for obj in &live_objects { + entries.push(Entry { + key: obj.key.clone(), + version_id: obj.version_id.clone().unwrap_or_else(|| "null".to_string()), + last_modified: obj.last_modified, + etag: obj.etag.clone(), + size: obj.size, + storage_class: obj + .storage_class + .clone() + .unwrap_or_else(|| "STANDARD".to_string()), + is_delete_marker: false, + }); + } + for version in &archived_versions { + entries.push(Entry { + key: version.key.clone(), + version_id: version.version_id.clone(), + last_modified: version.last_modified, + etag: version.etag.clone(), + size: version.size, + storage_class: "STANDARD".to_string(), + is_delete_marker: version.is_delete_marker, + }); + } + + entries.sort_by(|a, b| { + a.key + .cmp(&b.key) + .then_with(|| b.last_modified.cmp(&a.last_modified)) + .then_with(|| a.version_id.cmp(&b.version_id)) + }); + + let mut latest_marked: std::collections::HashSet = std::collections::HashSet::new(); + let mut is_latest_flags: Vec = Vec::with_capacity(entries.len()); + for entry in &entries { + if latest_marked.insert(entry.key.clone()) { + is_latest_flags.push(true); + } else { + is_latest_flags.push(false); + } + } + + let km = key_marker.unwrap_or(""); + let vim = version_id_marker.unwrap_or(""); + let start_index = if km.is_empty() { + 0 + } else if vim.is_empty() { + entries + .iter() + .position(|e| e.key.as_str() > km) + .unwrap_or(entries.len()) + } else if let Some(pos) = entries + .iter() + .position(|e| e.key == km && e.version_id == vim) + { + pos + 1 + } else { + entries + .iter() + .position(|e| e.key.as_str() > km) + .unwrap_or(entries.len()) + }; + + let delim = delimiter.unwrap_or(""); + let prefix_str = prefix.unwrap_or(""); + + let mut common_prefixes: Vec = Vec::new(); + let mut seen_prefixes: std::collections::HashSet = std::collections::HashSet::new(); + let mut rendered = String::new(); + let mut count = 0usize; + let mut is_truncated = false; + let mut next_key_marker: Option = None; + let mut next_version_id_marker: Option = None; + let mut last_emitted: Option<(String, String)> = None; + + let mut idx = start_index; + while idx < entries.len() { + let entry = &entries[idx]; + let is_latest = is_latest_flags[idx]; + + if !delim.is_empty() { + let rest = entry.key.strip_prefix(prefix_str).unwrap_or(&entry.key); + if let Some(delim_pos) = rest.find(delim) { + let grouped = entry.key[..prefix_str.len() + delim_pos + delim.len()].to_string(); + if seen_prefixes.contains(&grouped) { + idx += 1; + continue; + } + if count >= max_keys { + is_truncated = true; + if let Some((k, v)) = last_emitted.clone() { + next_key_marker = Some(k); + next_version_id_marker = Some(v); + } + break; + } + common_prefixes.push(grouped.clone()); + seen_prefixes.insert(grouped.clone()); + count += 1; + + let mut group_last = (entry.key.clone(), entry.version_id.clone()); + idx += 1; + while idx < entries.len() && entries[idx].key.starts_with(&grouped) { + group_last = (entries[idx].key.clone(), entries[idx].version_id.clone()); + idx += 1; + } + last_emitted = Some(group_last); + continue; + } + } + + if count >= max_keys { + is_truncated = true; + if let Some((k, v)) = last_emitted.clone() { + next_key_marker = Some(k); + next_version_id_marker = Some(v); + } + break; + } + + let tag = if entry.is_delete_marker { + "DeleteMarker" + } else { + "Version" + }; + rendered.push_str(&format!("<{}>", tag)); + rendered.push_str(&format!("{}", xml_escape(&entry.key))); + rendered.push_str(&format!( + "{}", + xml_escape(&entry.version_id) + )); + rendered.push_str(&format!("{}", is_latest)); + rendered.push_str(&format!( + "{}", + myfsio_xml::response::format_s3_datetime(&entry.last_modified) + )); + if !entry.is_delete_marker { + if let Some(ref etag) = entry.etag { + rendered.push_str(&format!("\"{}\"", xml_escape(etag))); + } + rendered.push_str(&format!("{}", entry.size)); + rendered.push_str(&format!( + "{}", + xml_escape(&entry.storage_class) + )); + } + rendered.push_str(&format!("", tag)); + + last_emitted = Some((entry.key.clone(), entry.version_id.clone())); + count += 1; + idx += 1; + } + + let mut xml = String::from( + "\ + ", + ); + xml.push_str(&format!("{}", xml_escape(bucket))); + xml.push_str(&format!("{}", xml_escape(prefix_str))); + if !km.is_empty() { + xml.push_str(&format!("{}", xml_escape(km))); + } else { + xml.push_str(""); + } + if !vim.is_empty() { + xml.push_str(&format!( + "{}", + xml_escape(vim) + )); + } else { + xml.push_str(""); + } + xml.push_str(&format!("{}", max_keys)); + if !delim.is_empty() { + xml.push_str(&format!("{}", xml_escape(delim))); + } + xml.push_str(&format!("{}", is_truncated)); + if let Some(ref nk) = next_key_marker { + xml.push_str(&format!( + "{}", + xml_escape(nk) + )); + } + if let Some(ref nv) = next_version_id_marker { + xml.push_str(&format!( + "{}", + xml_escape(nv) + )); + } + + xml.push_str(&rendered); + for cp in &common_prefixes { + xml.push_str(&format!( + "{}", + xml_escape(cp) + )); + } + + xml.push_str(""); + xml_response(StatusCode::OK, xml) +} + +pub async fn get_object_tagging(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.get_object_tags(bucket, key).await { + Ok(tags) => { + let mut xml = String::from( + "\ + ", + ); + for tag in &tags { + xml.push_str(&format!( + "{}{}", + tag.key, tag.value + )); + } + xml.push_str(""); + xml_response(StatusCode::OK, xml) + } + Err(e) => storage_err(e), + } +} + +pub async fn put_object_tagging(state: &AppState, bucket: &str, key: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::from_code(S3ErrorCode::MalformedXML).to_xml(), + ); + } + }; + + let xml_str = String::from_utf8_lossy(&body_bytes); + let tags = parse_tagging_xml(&xml_str); + if tags.len() > state.config.object_tag_limit { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new( + S3ErrorCode::InvalidTag, + format!("Maximum {} tags allowed", state.config.object_tag_limit), + ) + .to_xml(), + ); + } + for tag in &tags { + if tag.key.is_empty() || tag.key.len() > 128 { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::InvalidTag, "Tag key length must be 1-128").to_xml(), + ); + } + if tag.value.len() > 256 { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::InvalidTag, "Tag value length must be 0-256").to_xml(), + ); + } + if tag.key.contains('=') { + return xml_response( + StatusCode::BAD_REQUEST, + S3Error::new(S3ErrorCode::InvalidTag, "Tag keys must not contain '='").to_xml(), + ); + } + } + + match state.storage.set_object_tags(bucket, key, &tags).await { + Ok(()) => StatusCode::OK.into_response(), + Err(e) => storage_err(e), + } +} + +pub async fn delete_object_tagging(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.delete_object_tags(bucket, key).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err(e), + } +} + +pub async fn put_object_acl( + state: &AppState, + bucket: &str, + key: &str, + headers: &HeaderMap, + _body: Body, +) -> Response { + match state.storage.head_object(bucket, key).await { + Ok(_) => { + let canned_acl = headers + .get("x-amz-acl") + .and_then(|value| value.to_str().ok()) + .unwrap_or("private"); + let mut metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return storage_err(err), + }; + let owner = acl_from_object_metadata(&metadata) + .map(|acl| acl.owner) + .unwrap_or_else(|| "myfsio".to_string()); + let acl = create_canned_acl(canned_acl, &owner); + store_object_acl(&mut metadata, &acl); + match state + .storage + .put_object_metadata(bucket, key, &metadata) + .await + { + Ok(()) => StatusCode::OK.into_response(), + Err(err) => storage_err(err), + } + } + Err(e) => storage_err(e), + } +} + +pub async fn get_object_retention(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.head_object(bucket, key).await { + Ok(_) => { + let metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return storage_err(err), + }; + if let Some(retention) = retention_from_metadata(&metadata) { + let xml = format!( + "\ + \ + {}{}", + match retention.mode { + RetentionMode::GOVERNANCE => "GOVERNANCE", + RetentionMode::COMPLIANCE => "COMPLIANCE", + }, + retention.retain_until_date.format("%Y-%m-%dT%H:%M:%S.000Z"), + ); + xml_response(StatusCode::OK, xml) + } else { + custom_xml_error( + StatusCode::NOT_FOUND, + "NoSuchObjectLockConfiguration", + "No retention policy", + ) + } + } + Err(e) => storage_err(e), + } +} + +pub async fn put_object_retention( + state: &AppState, + bucket: &str, + key: &str, + headers: &HeaderMap, + body: Body, +) -> Response { + match state.storage.head_object(bucket, key).await { + Ok(_) => {} + Err(e) => return storage_err(e), + } + + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "MalformedXML", + "Unable to parse XML document", + ) + } + }; + let body_str = String::from_utf8_lossy(&body_bytes); + let doc = match roxmltree::Document::parse(&body_str) { + Ok(doc) => doc, + Err(_) => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "MalformedXML", + "Unable to parse XML document", + ) + } + }; + let mode = find_xml_text(&doc, "Mode").unwrap_or_default(); + let retain_until = find_xml_text(&doc, "RetainUntilDate").unwrap_or_default(); + if mode.is_empty() || retain_until.is_empty() { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "InvalidArgument", + "Mode and RetainUntilDate are required", + ); + } + let mode = match mode.as_str() { + "GOVERNANCE" => RetentionMode::GOVERNANCE, + "COMPLIANCE" => RetentionMode::COMPLIANCE, + other => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "InvalidArgument", + &format!("Invalid retention mode: {}", other), + ) + } + }; + let retain_until_date = match DateTime::parse_from_rfc3339(&retain_until) { + Ok(value) => value.with_timezone(&Utc), + Err(_) => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "InvalidArgument", + &format!("Invalid date format: {}", retain_until), + ) + } + }; + + let bypass_governance = headers + .get("x-amz-bypass-governance-retention") + .and_then(|value| value.to_str().ok()) + .map(|value| value.eq_ignore_ascii_case("true")) + .unwrap_or(false); + let mut metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return storage_err(err), + }; + if let Err(message) = ensure_retention_mutable(&metadata, bypass_governance) { + return custom_xml_error(StatusCode::FORBIDDEN, "AccessDenied", &message); + } + if let Err(message) = store_retention( + &mut metadata, + &ObjectLockRetention { + mode, + retain_until_date, + }, + ) { + return custom_xml_error(StatusCode::BAD_REQUEST, "InvalidArgument", &message); + } + match state + .storage + .put_object_metadata(bucket, key, &metadata) + .await + { + Ok(()) => StatusCode::OK.into_response(), + Err(err) => storage_err(err), + } +} + +pub async fn get_object_legal_hold(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.head_object(bucket, key).await { + Ok(_) => { + let metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return storage_err(err), + }; + let status = if get_legal_hold(&metadata) { + "ON" + } else { + "OFF" + }; + let xml = format!( + "\ + \ + {}", + status + ); + xml_response(StatusCode::OK, xml) + } + Err(e) => storage_err(e), + } +} + +pub async fn put_object_legal_hold( + state: &AppState, + bucket: &str, + key: &str, + body: Body, +) -> Response { + match state.storage.head_object(bucket, key).await { + Ok(_) => {} + Err(e) => return storage_err(e), + } + + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "MalformedXML", + "Unable to parse XML document", + ) + } + }; + let body_str = String::from_utf8_lossy(&body_bytes); + let doc = match roxmltree::Document::parse(&body_str) { + Ok(doc) => doc, + Err(_) => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "MalformedXML", + "Unable to parse XML document", + ) + } + }; + let status = find_xml_text(&doc, "Status").unwrap_or_default(); + let enabled = match status.as_str() { + "ON" => true, + "OFF" => false, + _ => { + return custom_xml_error( + StatusCode::BAD_REQUEST, + "InvalidArgument", + "Status must be ON or OFF", + ) + } + }; + let mut metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return storage_err(err), + }; + set_legal_hold(&mut metadata, enabled); + match state + .storage + .put_object_metadata(bucket, key, &metadata) + .await + { + Ok(()) => StatusCode::OK.into_response(), + Err(err) => storage_err(err), + } +} + +pub async fn get_object_acl(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.head_object(bucket, key).await { + Ok(_) => { + let metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return storage_err(err), + }; + let acl = acl_from_object_metadata(&metadata) + .unwrap_or_else(|| create_canned_acl("private", "myfsio")); + xml_response(StatusCode::OK, acl_to_xml(&acl)) + } + Err(e) => storage_err(e), + } +} + +fn find_xml_text(doc: &roxmltree::Document<'_>, name: &str) -> Option { + doc.descendants() + .find(|node| node.is_element() && node.tag_name().name() == name) + .and_then(|node| node.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()) +} + +#[cfg(test)] +mod tests { + use super::{legacy_logging_config, parse_logging_config_xml}; + use myfsio_common::types::BucketConfig; + + #[test] + fn parses_legacy_logging_xml_string() { + let mut config = BucketConfig::default(); + config.logging = Some(serde_json::Value::String( + "\ + \ + logsaudit/\ + " + .to_string(), + )); + + let parsed = legacy_logging_config(&config).expect("expected legacy logging config"); + assert_eq!(parsed.target_bucket, "logs"); + assert_eq!(parsed.target_prefix, "audit/"); + assert!(parsed.enabled); + } + + #[test] + fn parses_legacy_logging_json_object() { + let mut config = BucketConfig::default(); + config.logging = Some(serde_json::json!({ + "LoggingEnabled": { + "TargetBucket": "logs", + "TargetPrefix": "archive/" + } + })); + + let parsed = legacy_logging_config(&config).expect("expected legacy logging config"); + assert_eq!(parsed.target_bucket, "logs"); + assert_eq!(parsed.target_prefix, "archive/"); + assert!(parsed.enabled); + } + + #[test] + fn ignores_logging_xml_without_enabled_block() { + let parsed = parse_logging_config_xml( + "\ + \ + ", + ); + + assert!(parsed.is_none()); + } +} + +fn parse_tagging_xml(xml: &str) -> Vec { + let mut tags = Vec::new(); + let mut in_tag = false; + let mut current_key = String::new(); + let mut current_value = String::new(); + let mut current_element = String::new(); + + let mut reader = quick_xml::Reader::from_str(xml); + let mut buf = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(quick_xml::events::Event::Start(ref e)) => { + let name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + current_element = name.clone(); + if name == "Tag" { + in_tag = true; + current_key.clear(); + current_value.clear(); + } + } + Ok(quick_xml::events::Event::Text(ref e)) => { + if in_tag { + let text = e.unescape().unwrap_or_default().to_string(); + match current_element.as_str() { + "Key" => current_key = text, + "Value" => current_value = text, + _ => {} + } + } + } + Ok(quick_xml::events::Event::End(ref e)) => { + let name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + if name == "Tag" && in_tag { + if !current_key.is_empty() { + tags.push(myfsio_common::types::Tag { + key: current_key.clone(), + value: current_value.clone(), + }); + } + in_tag = false; + } + } + Ok(quick_xml::events::Event::Eof) => break, + Err(_) => break, + _ => {} + } + buf.clear(); + } + + tags +} diff --git a/crates/myfsio-server/src/handlers/kms.rs b/crates/myfsio-server/src/handlers/kms.rs new file mode 100644 index 0000000..b6ca8f0 --- /dev/null +++ b/crates/myfsio-server/src/handlers/kms.rs @@ -0,0 +1,559 @@ +use aes_gcm::aead::Aead; +use aes_gcm::{Aes256Gcm, KeyInit, Nonce}; +use axum::body::Body; +use axum::extract::State; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use base64::engine::general_purpose::STANDARD as B64; +use base64::Engine; +use rand::RngCore; +use serde_json::{json, Value}; + +use crate::state::AppState; + +fn json_ok(value: Value) -> Response { + ( + StatusCode::OK, + [("content-type", "application/json")], + value.to_string(), + ) + .into_response() +} + +fn json_err(status: StatusCode, msg: &str) -> Response { + ( + status, + [("content-type", "application/json")], + json!({"error": msg}).to_string(), + ) + .into_response() +} + +async fn read_json(body: Body) -> Result { + let body_bytes = http_body_util::BodyExt::collect(body) + .await + .map_err(|_| json_err(StatusCode::BAD_REQUEST, "Invalid request body"))? + .to_bytes(); + if body_bytes.is_empty() { + Ok(json!({})) + } else { + serde_json::from_slice(&body_bytes) + .map_err(|_| json_err(StatusCode::BAD_REQUEST, "Invalid JSON")) + } +} + +fn require_kms( + state: &AppState, +) -> Result<&std::sync::Arc, Response> { + state + .kms + .as_ref() + .ok_or_else(|| json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled")) +} + +fn decode_b64(value: &str, field: &str) -> Result, Response> { + B64.decode(value).map_err(|_| { + json_err( + StatusCode::BAD_REQUEST, + &format!("Invalid base64 {}", field), + ) + }) +} + +fn require_str<'a>(value: &'a Value, names: &[&str], message: &str) -> Result<&'a str, Response> { + for name in names { + if let Some(found) = value.get(*name).and_then(|v| v.as_str()) { + return Ok(found); + } + } + Err(json_err(StatusCode::BAD_REQUEST, message)) +} + +pub async fn list_keys(State(state): State) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + + let keys = kms.list_keys().await; + let keys_json: Vec = keys + .iter() + .map(|k| { + json!({ + "KeyId": k.key_id, + "Arn": k.arn, + "Description": k.description, + "CreationDate": k.creation_date.to_rfc3339(), + "Enabled": k.enabled, + "KeyState": k.key_state, + "KeyUsage": k.key_usage, + "KeySpec": k.key_spec, + }) + }) + .collect(); + + json_ok(json!({"keys": keys_json})) +} + +pub async fn create_key(State(state): State, body: Body) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + + let description = req + .get("Description") + .or_else(|| req.get("description")) + .and_then(|d| d.as_str()) + .unwrap_or(""); + + match kms.create_key(description).await { + Ok(key) => json_ok(json!({ + "KeyId": key.key_id, + "Arn": key.arn, + "Description": key.description, + "CreationDate": key.creation_date.to_rfc3339(), + "Enabled": key.enabled, + "KeyState": key.key_state, + })), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn get_key( + State(state): State, + axum::extract::Path(key_id): axum::extract::Path, +) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + + match kms.get_key(&key_id).await { + Some(key) => json_ok(json!({ + "KeyId": key.key_id, + "Arn": key.arn, + "Description": key.description, + "CreationDate": key.creation_date.to_rfc3339(), + "Enabled": key.enabled, + "KeyState": key.key_state, + "KeyUsage": key.key_usage, + "KeySpec": key.key_spec, + })), + None => json_err(StatusCode::NOT_FOUND, "Key not found"), + } +} + +pub async fn delete_key( + State(state): State, + axum::extract::Path(key_id): axum::extract::Path, +) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + + match kms.delete_key(&key_id).await { + Ok(true) => StatusCode::NO_CONTENT.into_response(), + Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn enable_key( + State(state): State, + axum::extract::Path(key_id): axum::extract::Path, +) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + + match kms.enable_key(&key_id).await { + Ok(true) => json_ok(json!({"status": "enabled"})), + Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn disable_key( + State(state): State, + axum::extract::Path(key_id): axum::extract::Path, +) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + + match kms.disable_key(&key_id).await { + Ok(true) => json_ok(json!({"status": "disabled"})), + Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn encrypt(State(state): State, body: Body) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + + let key_id = match require_str(&req, &["KeyId", "key_id"], "Missing KeyId") { + Ok(value) => value, + Err(response) => return response, + }; + let plaintext_b64 = match require_str(&req, &["Plaintext", "plaintext"], "Missing Plaintext") { + Ok(value) => value, + Err(response) => return response, + }; + let plaintext = match decode_b64(plaintext_b64, "Plaintext") { + Ok(value) => value, + Err(response) => return response, + }; + + match kms.encrypt_data(key_id, &plaintext).await { + Ok(ct) => json_ok(json!({ + "KeyId": key_id, + "CiphertextBlob": B64.encode(&ct), + })), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn decrypt(State(state): State, body: Body) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + + let key_id = match require_str(&req, &["KeyId", "key_id"], "Missing KeyId") { + Ok(value) => value, + Err(response) => return response, + }; + let ciphertext_b64 = match require_str( + &req, + &["CiphertextBlob", "ciphertext_blob"], + "Missing CiphertextBlob", + ) { + Ok(value) => value, + Err(response) => return response, + }; + let ciphertext = match decode_b64(ciphertext_b64, "CiphertextBlob") { + Ok(value) => value, + Err(response) => return response, + }; + + match kms.decrypt_data(key_id, &ciphertext).await { + Ok(pt) => json_ok(json!({ + "KeyId": key_id, + "Plaintext": B64.encode(&pt), + })), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn generate_data_key(State(state): State, body: Body) -> Response { + generate_data_key_inner(state, body, true).await +} + +pub async fn generate_data_key_without_plaintext( + State(state): State, + body: Body, +) -> Response { + generate_data_key_inner(state, body, false).await +} + +async fn generate_data_key_inner(state: AppState, body: Body, include_plaintext: bool) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + + let key_id = match require_str(&req, &["KeyId", "key_id"], "Missing KeyId") { + Ok(value) => value, + Err(response) => return response, + }; + let num_bytes = req + .get("NumberOfBytes") + .and_then(|v| v.as_u64()) + .unwrap_or(32) as usize; + + if num_bytes < state.config.kms_generate_data_key_min_bytes + || num_bytes > state.config.kms_generate_data_key_max_bytes + { + return json_err( + StatusCode::BAD_REQUEST, + &format!( + "NumberOfBytes must be {}-{}", + state.config.kms_generate_data_key_min_bytes, + state.config.kms_generate_data_key_max_bytes + ), + ); + } + + match kms.generate_data_key(key_id, num_bytes).await { + Ok((plaintext, wrapped)) => { + let mut value = json!({ + "KeyId": key_id, + "CiphertextBlob": B64.encode(&wrapped), + }); + if include_plaintext { + value["Plaintext"] = json!(B64.encode(&plaintext)); + } + json_ok(value) + } + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn re_encrypt(State(state): State, body: Body) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + + let ciphertext_b64 = match require_str( + &req, + &["CiphertextBlob", "ciphertext_blob"], + "CiphertextBlob is required", + ) { + Ok(value) => value, + Err(response) => return response, + }; + let destination_key_id = match require_str( + &req, + &["DestinationKeyId", "destination_key_id"], + "DestinationKeyId is required", + ) { + Ok(value) => value, + Err(response) => return response, + }; + let ciphertext = match decode_b64(ciphertext_b64, "CiphertextBlob") { + Ok(value) => value, + Err(response) => return response, + }; + + let keys = kms.list_keys().await; + let mut source_key_id: Option = None; + let mut plaintext: Option> = None; + for key in keys { + if !key.enabled { + continue; + } + if let Ok(value) = kms.decrypt_data(&key.key_id, &ciphertext).await { + source_key_id = Some(key.key_id); + plaintext = Some(value); + break; + } + } + + let Some(source_key_id) = source_key_id else { + return json_err( + StatusCode::BAD_REQUEST, + "Could not determine source key for CiphertextBlob", + ); + }; + let plaintext = plaintext.unwrap_or_default(); + + match kms.encrypt_data(destination_key_id, &plaintext).await { + Ok(new_ciphertext) => json_ok(json!({ + "CiphertextBlob": B64.encode(&new_ciphertext), + "SourceKeyId": source_key_id, + "KeyId": destination_key_id, + })), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} + +pub async fn generate_random(State(state): State, body: Body) -> Response { + if let Err(response) = require_kms(&state) { + return response; + } + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + let num_bytes = req + .get("NumberOfBytes") + .and_then(|v| v.as_u64()) + .unwrap_or(32) as usize; + + if num_bytes < state.config.kms_generate_data_key_min_bytes + || num_bytes > state.config.kms_generate_data_key_max_bytes + { + return json_err( + StatusCode::BAD_REQUEST, + &format!( + "NumberOfBytes must be {}-{}", + state.config.kms_generate_data_key_min_bytes, + state.config.kms_generate_data_key_max_bytes + ), + ); + } + + let mut bytes = vec![0u8; num_bytes]; + rand::thread_rng().fill_bytes(&mut bytes); + json_ok(json!({ + "Plaintext": B64.encode(bytes), + })) +} + +pub async fn client_generate_key(State(state): State) -> Response { + let _ = state; + + let mut key = [0u8; 32]; + rand::thread_rng().fill_bytes(&mut key); + json_ok(json!({ + "Key": B64.encode(key), + "Algorithm": "AES-256-GCM", + "KeySize": 32, + })) +} + +pub async fn client_encrypt(State(state): State, body: Body) -> Response { + let _ = state; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + let plaintext_b64 = + match require_str(&req, &["Plaintext", "plaintext"], "Plaintext is required") { + Ok(value) => value, + Err(response) => return response, + }; + let key_b64 = match require_str(&req, &["Key", "key"], "Key is required") { + Ok(value) => value, + Err(response) => return response, + }; + + let plaintext = match decode_b64(plaintext_b64, "Plaintext") { + Ok(value) => value, + Err(response) => return response, + }; + let key_bytes = match decode_b64(key_b64, "Key") { + Ok(value) => value, + Err(response) => return response, + }; + if key_bytes.len() != 32 { + return json_err(StatusCode::BAD_REQUEST, "Key must decode to 32 bytes"); + } + + let cipher = match Aes256Gcm::new_from_slice(&key_bytes) { + Ok(cipher) => cipher, + Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid encryption key"), + }; + let mut nonce_bytes = [0u8; 12]; + rand::thread_rng().fill_bytes(&mut nonce_bytes); + let nonce = Nonce::from_slice(&nonce_bytes); + + match cipher.encrypt(nonce, plaintext.as_ref()) { + Ok(ciphertext) => json_ok(json!({ + "Ciphertext": B64.encode(ciphertext), + "Nonce": B64.encode(nonce_bytes), + "Algorithm": "AES-256-GCM", + })), + Err(e) => json_err(StatusCode::BAD_REQUEST, &e.to_string()), + } +} + +pub async fn client_decrypt(State(state): State, body: Body) -> Response { + let _ = state; + let req = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + let ciphertext_b64 = match require_str( + &req, + &["Ciphertext", "ciphertext"], + "Ciphertext is required", + ) { + Ok(value) => value, + Err(response) => return response, + }; + let nonce_b64 = match require_str(&req, &["Nonce", "nonce"], "Nonce is required") { + Ok(value) => value, + Err(response) => return response, + }; + let key_b64 = match require_str(&req, &["Key", "key"], "Key is required") { + Ok(value) => value, + Err(response) => return response, + }; + + let ciphertext = match decode_b64(ciphertext_b64, "Ciphertext") { + Ok(value) => value, + Err(response) => return response, + }; + let nonce_bytes = match decode_b64(nonce_b64, "Nonce") { + Ok(value) => value, + Err(response) => return response, + }; + let key_bytes = match decode_b64(key_b64, "Key") { + Ok(value) => value, + Err(response) => return response, + }; + if key_bytes.len() != 32 { + return json_err(StatusCode::BAD_REQUEST, "Key must decode to 32 bytes"); + } + if nonce_bytes.len() != 12 { + return json_err(StatusCode::BAD_REQUEST, "Nonce must decode to 12 bytes"); + } + + let cipher = match Aes256Gcm::new_from_slice(&key_bytes) { + Ok(cipher) => cipher, + Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid encryption key"), + }; + let nonce = Nonce::from_slice(&nonce_bytes); + + match cipher.decrypt(nonce, ciphertext.as_ref()) { + Ok(plaintext) => json_ok(json!({ + "Plaintext": B64.encode(plaintext), + })), + Err(e) => json_err(StatusCode::BAD_REQUEST, &e.to_string()), + } +} + +pub async fn materials( + State(state): State, + axum::extract::Path(key_id): axum::extract::Path, + body: Body, +) -> Response { + let kms = match require_kms(&state) { + Ok(kms) => kms, + Err(response) => return response, + }; + let _ = match read_json(body).await { + Ok(req) => req, + Err(response) => return response, + }; + + match kms.generate_data_key(&key_id, 32).await { + Ok((plaintext, wrapped)) => json_ok(json!({ + "PlaintextKey": B64.encode(plaintext), + "EncryptedKey": B64.encode(wrapped), + "KeyId": key_id, + "Algorithm": "AES-256-GCM", + "KeyWrapAlgorithm": "kms", + })), + Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()), + } +} diff --git a/crates/myfsio-server/src/handlers/mod.rs b/crates/myfsio-server/src/handlers/mod.rs new file mode 100644 index 0000000..43dce2c --- /dev/null +++ b/crates/myfsio-server/src/handlers/mod.rs @@ -0,0 +1,3734 @@ +pub mod admin; +mod chunked; +mod config; +pub mod kms; +mod select; +pub mod ui; +pub mod ui_api; +pub mod ui_pages; + +use std::collections::HashMap; + +use axum::body::Body; +use axum::extract::{Path, Query, State}; +use axum::http::{HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Response}; +use base64::engine::general_purpose::{STANDARD, URL_SAFE}; +use base64::Engine; +use chrono::{DateTime, Utc}; +use md5::Md5; +use percent_encoding::percent_decode_str; +use serde_json::json; +use sha2::{Digest, Sha256}; + +use myfsio_common::error::{S3Error, S3ErrorCode}; +use myfsio_common::types::PartInfo; +use myfsio_storage::traits::StorageEngine; +use tokio::io::AsyncSeekExt; +use tokio_util::io::ReaderStream; + +use crate::services::notifications; +use crate::services::object_lock; +use crate::state::AppState; + +async fn open_self_deleting(path: std::path::PathBuf) -> std::io::Result { + #[cfg(unix)] + { + let file = tokio::fs::File::open(&path).await?; + let _ = tokio::fs::remove_file(&path).await; + Ok(file) + } + #[cfg(windows)] + { + use std::os::windows::fs::OpenOptionsExt; + const FILE_FLAG_DELETE_ON_CLOSE: u32 = 0x0400_0000; + const FILE_SHARE_READ: u32 = 0x0000_0001; + const FILE_SHARE_WRITE: u32 = 0x0000_0002; + const FILE_SHARE_DELETE: u32 = 0x0000_0004; + let file = tokio::task::spawn_blocking(move || { + std::fs::OpenOptions::new() + .read(true) + .custom_flags(FILE_FLAG_DELETE_ON_CLOSE) + .share_mode(FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE) + .open(&path) + }) + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))??; + Ok(tokio::fs::File::from_std(file)) + } +} + +fn s3_error_response(err: S3Error) -> Response { + let status = + StatusCode::from_u16(err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR); + let resource = if err.resource.is_empty() { + "/".to_string() + } else { + err.resource.clone() + }; + let body = err + .with_resource(resource) + .with_request_id(uuid::Uuid::new_v4().simple().to_string()) + .to_xml(); + (status, [("content-type", "application/xml")], body).into_response() +} + +fn storage_err_response(err: myfsio_storage::error::StorageError) -> Response { + if let myfsio_storage::error::StorageError::Io(io_err) = &err { + if let Some(message) = crate::middleware::sha_body::sha256_mismatch_message(io_err) { + return bad_digest_response(message); + } + if let Some(response) = io_error_to_s3_response(io_err) { + return response; + } + } + if let myfsio_storage::error::StorageError::DeleteMarker { + bucket, + key, + version_id, + } = &err + { + let s3_err = S3Error::from_code(S3ErrorCode::NoSuchKey) + .with_resource(format!("/{}/{}", bucket, key)) + .with_request_id(uuid::Uuid::new_v4().simple().to_string()); + let status = StatusCode::from_u16(s3_err.http_status()) + .unwrap_or(StatusCode::INTERNAL_SERVER_ERROR); + let mut resp_headers = HeaderMap::new(); + resp_headers.insert("x-amz-delete-marker", "true".parse().unwrap()); + if let Ok(vid) = version_id.parse() { + resp_headers.insert("x-amz-version-id", vid); + } + resp_headers.insert("content-type", "application/xml".parse().unwrap()); + return (status, resp_headers, s3_err.to_xml()).into_response(); + } + s3_error_response(S3Error::from(err)) +} + +fn io_error_to_s3_response(err: &std::io::Error) -> Option { + use std::io::ErrorKind; + let message = err.to_string(); + let lower = message.to_ascii_lowercase(); + let hit_collision = matches!( + err.kind(), + ErrorKind::NotADirectory + | ErrorKind::IsADirectory + | ErrorKind::AlreadyExists + | ErrorKind::DirectoryNotEmpty + ) || lower.contains("not a directory") + || lower.contains("is a directory") + || lower.contains("file exists") + || lower.contains("directory not empty"); + let hit_name_too_long = matches!(err.kind(), ErrorKind::InvalidFilename) + || lower.contains("file name too long"); + if !hit_collision && !hit_name_too_long { + return None; + } + let code = if hit_name_too_long { + S3ErrorCode::InvalidKey + } else { + S3ErrorCode::InvalidRequest + }; + let detail = if hit_name_too_long { + "Object key exceeds the filesystem's per-segment length limit" + } else { + "Object key collides with an existing object path on the storage backend" + }; + Some(s3_error_response(S3Error::new(code, detail))) +} + +fn trigger_replication(state: &AppState, bucket: &str, key: &str, action: &str) { + let manager = state.replication.clone(); + let bucket = bucket.to_string(); + let key = key.to_string(); + let action = action.to_string(); + tokio::spawn(async move { + manager.trigger(bucket, key, action).await; + }); +} + +async fn ensure_object_lock_allows_write( + state: &AppState, + bucket: &str, + key: &str, + headers: Option<&HeaderMap>, +) -> Result<(), Response> { + match state.storage.head_object(bucket, key).await { + Ok(_) => { + let metadata = match state.storage.get_object_metadata(bucket, key).await { + Ok(metadata) => metadata, + Err(err) => return Err(storage_err_response(err)), + }; + let bypass_governance = headers + .and_then(|headers| { + headers + .get("x-amz-bypass-governance-retention") + .and_then(|value| value.to_str().ok()) + }) + .map(|value| value.eq_ignore_ascii_case("true")) + .unwrap_or(false); + if let Err(message) = object_lock::can_delete_object(&metadata, bypass_governance) { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::AccessDenied, + message, + ))); + } + Ok(()) + } + Err(myfsio_storage::error::StorageError::ObjectNotFound { .. }) => Ok(()), + Err(myfsio_storage::error::StorageError::DeleteMarker { .. }) => Ok(()), + Err(err) => Err(storage_err_response(err)), + } +} + +async fn ensure_object_version_lock_allows_delete( + state: &AppState, + bucket: &str, + key: &str, + version_id: &str, + headers: &HeaderMap, +) -> Result<(), Response> { + let metadata = match state + .storage + .get_object_version_metadata(bucket, key, version_id) + .await + { + Ok(metadata) => metadata, + Err(err) => return Err(storage_err_response(err)), + }; + let bypass_governance = headers + .get("x-amz-bypass-governance-retention") + .and_then(|value| value.to_str().ok()) + .map(|value| value.eq_ignore_ascii_case("true")) + .unwrap_or(false); + if let Err(message) = object_lock::can_delete_object(&metadata, bypass_governance) { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::AccessDenied, + message, + ))); + } + Ok(()) +} + +pub async fn list_buckets( + State(state): State, + Query(query): Query, + headers: HeaderMap, +) -> Response { + if let Some(host_bucket) = virtual_host_bucket_from_headers(&state, &headers).await { + return get_bucket(State(state), Path(host_bucket), Query(query), headers).await; + } + + match state.storage.list_buckets().await { + Ok(buckets) => { + let xml = myfsio_xml::response::list_buckets_xml("myfsio", "myfsio", &buckets); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +pub async fn health_check() -> Response { + ( + StatusCode::OK, + [("content-type", "application/json")], + json!({ + "status": "ok", + "version": env!("CARGO_PKG_VERSION"), + }) + .to_string(), + ) + .into_response() +} + +pub async fn create_bucket( + State(state): State, + Path(bucket): Path, + Query(query): Query, + headers: HeaderMap, + body: Body, +) -> Response { + if let Some(host_bucket) = virtual_host_bucket_from_headers(&state, &headers).await { + if host_bucket != bucket { + return put_object( + State(state), + Path((host_bucket, bucket)), + Query(ObjectQuery::default()), + headers, + body, + ) + .await; + } + } + + if query.quota.is_some() { + return config::put_quota(&state, &bucket, body).await; + } + if query.versioning.is_some() { + return config::put_versioning(&state, &bucket, body).await; + } + if query.tagging.is_some() { + return config::put_tagging(&state, &bucket, body).await; + } + if query.cors.is_some() { + return config::put_cors(&state, &bucket, body).await; + } + if query.encryption.is_some() { + return config::put_encryption(&state, &bucket, body).await; + } + if query.lifecycle.is_some() { + return config::put_lifecycle(&state, &bucket, body).await; + } + if query.acl.is_some() { + return config::put_acl(&state, &bucket, body).await; + } + if query.policy.is_some() { + return config::put_policy(&state, &bucket, body).await; + } + if query.replication.is_some() { + return config::put_replication(&state, &bucket, body).await; + } + if query.website.is_some() { + return config::put_website(&state, &bucket, body).await; + } + if query.object_lock.is_some() { + return config::put_object_lock(&state, &bucket, body).await; + } + if query.notification.is_some() { + return config::put_notification(&state, &bucket, body).await; + } + if query.logging.is_some() { + return config::put_logging(&state, &bucket, body).await; + } + + match state.storage.create_bucket(&bucket).await { + Ok(()) => ( + StatusCode::OK, + [("location", format!("/{}", bucket).as_str())], + "", + ) + .into_response(), + Err(e) => storage_err_response(e), + } +} + +#[derive(serde::Deserialize, Default)] +pub struct BucketQuery { + #[serde(rename = "list-type")] + pub list_type: Option, + pub marker: Option, + pub prefix: Option, + pub delimiter: Option, + #[serde(rename = "max-keys")] + pub max_keys: Option, + #[serde(rename = "continuation-token")] + pub continuation_token: Option, + #[serde(rename = "start-after")] + pub start_after: Option, + #[serde(rename = "encoding-type")] + pub encoding_type: Option, + pub uploads: Option, + pub delete: Option, + pub versioning: Option, + pub tagging: Option, + pub cors: Option, + pub location: Option, + pub encryption: Option, + pub lifecycle: Option, + pub acl: Option, + pub quota: Option, + pub policy: Option, + #[serde(rename = "policyStatus")] + pub policy_status: Option, + pub replication: Option, + pub website: Option, + #[serde(rename = "object-lock")] + pub object_lock: Option, + pub notification: Option, + pub logging: Option, + pub versions: Option, + #[serde(rename = "key-marker")] + pub key_marker: Option, + #[serde(rename = "version-id-marker")] + pub version_id_marker: Option, +} + +async fn virtual_host_bucket_from_headers(state: &AppState, headers: &HeaderMap) -> Option { + let host = headers + .get("host") + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.split(':').next())? + .trim() + .to_ascii_lowercase(); + let (candidate, _) = host.split_once('.')?; + if myfsio_storage::validation::validate_bucket_name(candidate).is_some() { + return None; + } + match state.storage.bucket_exists(candidate).await { + Ok(true) => Some(candidate.to_string()), + _ => None, + } +} + +pub async fn get_bucket( + State(state): State, + Path(bucket): Path, + Query(query): Query, + headers: HeaderMap, +) -> Response { + if let Some(host_bucket) = virtual_host_bucket_from_headers(&state, &headers).await { + if host_bucket != bucket { + return get_object( + State(state), + Path((host_bucket, bucket)), + Query(ObjectQuery::default()), + headers, + ) + .await; + } + } + + if !matches!(state.storage.bucket_exists(&bucket).await, Ok(true)) { + return storage_err_response(myfsio_storage::error::StorageError::BucketNotFound(bucket)); + } + + if query.quota.is_some() { + return config::get_quota(&state, &bucket).await; + } + if query.versioning.is_some() { + return config::get_versioning(&state, &bucket).await; + } + if query.tagging.is_some() { + return config::get_tagging(&state, &bucket).await; + } + if query.cors.is_some() { + return config::get_cors(&state, &bucket).await; + } + if query.location.is_some() { + return config::get_location(&state, &bucket).await; + } + if query.encryption.is_some() { + return config::get_encryption(&state, &bucket).await; + } + if query.lifecycle.is_some() { + return config::get_lifecycle(&state, &bucket).await; + } + if query.acl.is_some() { + return config::get_acl(&state, &bucket).await; + } + if query.policy.is_some() { + return config::get_policy(&state, &bucket).await; + } + if query.policy_status.is_some() { + return config::get_policy_status(&state, &bucket).await; + } + if query.replication.is_some() { + return config::get_replication(&state, &bucket).await; + } + if query.website.is_some() { + return config::get_website(&state, &bucket).await; + } + if query.object_lock.is_some() { + return config::get_object_lock(&state, &bucket).await; + } + if query.notification.is_some() { + return config::get_notification(&state, &bucket).await; + } + if query.logging.is_some() { + return config::get_logging(&state, &bucket).await; + } + if query.versions.is_some() { + return config::list_object_versions( + &state, + &bucket, + query.prefix.as_deref(), + query.delimiter.as_deref(), + query.key_marker.as_deref(), + query.version_id_marker.as_deref(), + query.max_keys.unwrap_or(1000), + ) + .await; + } + if query.uploads.is_some() { + return list_multipart_uploads_handler(&state, &bucket).await; + } + + let prefix = query.prefix.clone().unwrap_or_default(); + let delimiter = query.delimiter.clone().unwrap_or_default(); + let max_keys = query.max_keys.unwrap_or(1000); + let marker = query.marker.clone().unwrap_or_default(); + let list_type = query.list_type.clone().unwrap_or_default(); + let is_v2 = list_type == "2"; + + let effective_start = if is_v2 { + if let Some(token) = query.continuation_token.as_deref() { + match URL_SAFE.decode(token) { + Ok(bytes) => match String::from_utf8(bytes) { + Ok(decoded) => Some(decoded), + Err(_) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid continuation token", + )); + } + }, + Err(_) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid continuation token", + )); + } + } + } else { + query.start_after.clone() + } + } else if marker.is_empty() { + None + } else { + Some(marker.clone()) + }; + + if max_keys == 0 { + let has_any = if delimiter.is_empty() { + state + .storage + .list_objects( + &bucket, + &myfsio_common::types::ListParams { + max_keys: 1, + continuation_token: effective_start.clone(), + prefix: if prefix.is_empty() { + None + } else { + Some(prefix.clone()) + }, + start_after: if is_v2 { + query.start_after.clone() + } else { + None + }, + }, + ) + .await + .map(|r| !r.objects.is_empty()) + .unwrap_or(false) + } else { + state + .storage + .list_objects_shallow( + &bucket, + &myfsio_common::types::ShallowListParams { + prefix: prefix.clone(), + delimiter: delimiter.clone(), + max_keys: 1, + continuation_token: effective_start.clone(), + }, + ) + .await + .map(|r| !r.objects.is_empty() || !r.common_prefixes.is_empty()) + .unwrap_or(false) + }; + let xml = if is_v2 { + myfsio_xml::response::list_objects_v2_xml( + &bucket, + &prefix, + &delimiter, + 0, + &[], + &[], + has_any, + query.continuation_token.as_deref(), + None, + 0, + ) + } else { + myfsio_xml::response::list_objects_v1_xml( + &bucket, + &prefix, + &marker, + &delimiter, + 0, + &[], + &[], + has_any, + None, + ) + }; + return (StatusCode::OK, [("content-type", "application/xml")], xml).into_response(); + } + + if delimiter.is_empty() { + let params = myfsio_common::types::ListParams { + max_keys, + continuation_token: effective_start.clone(), + prefix: if prefix.is_empty() { + None + } else { + Some(prefix.clone()) + }, + start_after: if is_v2 { + query.start_after.clone() + } else { + None + }, + }; + match state.storage.list_objects(&bucket, ¶ms).await { + Ok(result) => { + let next_marker = if result.is_truncated { + result + .next_continuation_token + .clone() + .or_else(|| result.objects.last().map(|o| o.key.clone())) + } else { + None + }; + let encoding_type = query.encoding_type.as_deref(); + let xml = if is_v2 { + let next_token = next_marker + .as_deref() + .map(|s| URL_SAFE.encode(s.as_bytes())); + myfsio_xml::response::list_objects_v2_xml_with_encoding( + &bucket, + &prefix, + &delimiter, + max_keys, + &result.objects, + &[], + result.is_truncated, + query.continuation_token.as_deref(), + next_token.as_deref(), + result.objects.len(), + encoding_type, + ) + } else { + myfsio_xml::response::list_objects_v1_xml_with_encoding( + &bucket, + &prefix, + &marker, + &delimiter, + max_keys, + &result.objects, + &[], + result.is_truncated, + next_marker.as_deref(), + encoding_type, + ) + }; + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } + } else { + let params = myfsio_common::types::ShallowListParams { + prefix, + delimiter: delimiter.clone(), + max_keys, + continuation_token: effective_start, + }; + match state.storage.list_objects_shallow(&bucket, ¶ms).await { + Ok(result) => { + let encoding_type = query.encoding_type.as_deref(); + let xml = if is_v2 { + let next_token = result + .next_continuation_token + .as_deref() + .map(|s| URL_SAFE.encode(s.as_bytes())); + myfsio_xml::response::list_objects_v2_xml_with_encoding( + &bucket, + ¶ms.prefix, + &delimiter, + max_keys, + &result.objects, + &result.common_prefixes, + result.is_truncated, + query.continuation_token.as_deref(), + next_token.as_deref(), + result.objects.len() + result.common_prefixes.len(), + encoding_type, + ) + } else { + myfsio_xml::response::list_objects_v1_xml_with_encoding( + &bucket, + ¶ms.prefix, + &marker, + &delimiter, + max_keys, + &result.objects, + &result.common_prefixes, + result.is_truncated, + result.next_continuation_token.as_deref(), + encoding_type, + ) + }; + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } + } +} + +pub async fn post_bucket( + State(state): State, + Path(bucket): Path, + Query(query): Query, + headers: HeaderMap, + body: Body, +) -> Response { + if let Some(host_bucket) = virtual_host_bucket_from_headers(&state, &headers).await { + if host_bucket != bucket { + return post_object( + State(state), + Path((host_bucket, bucket)), + Query(ObjectQuery::default()), + headers, + body, + ) + .await; + } + } + + if query.delete.is_some() { + return delete_objects_handler(&state, &bucket, body).await; + } + + if let Some(ct) = headers.get("content-type").and_then(|v| v.to_str().ok()) { + if ct.to_ascii_lowercase().starts_with("multipart/form-data") { + return post_object_form_handler(&state, &bucket, ct, body).await; + } + } + + (StatusCode::METHOD_NOT_ALLOWED).into_response() +} + +pub async fn delete_bucket( + State(state): State, + Path(bucket): Path, + Query(query): Query, + headers: HeaderMap, +) -> Response { + if let Some(host_bucket) = virtual_host_bucket_from_headers(&state, &headers).await { + if host_bucket != bucket { + return delete_object( + State(state), + Path((host_bucket, bucket)), + Query(ObjectQuery::default()), + headers, + ) + .await; + } + } + + if query.quota.is_some() { + return config::delete_quota(&state, &bucket).await; + } + if query.tagging.is_some() { + return config::delete_tagging(&state, &bucket).await; + } + if query.cors.is_some() { + return config::delete_cors(&state, &bucket).await; + } + if query.encryption.is_some() { + return config::delete_encryption(&state, &bucket).await; + } + if query.lifecycle.is_some() { + return config::delete_lifecycle(&state, &bucket).await; + } + if query.website.is_some() { + return config::delete_website(&state, &bucket).await; + } + if query.policy.is_some() { + return config::delete_policy(&state, &bucket).await; + } + if query.replication.is_some() { + return config::delete_replication(&state, &bucket).await; + } + if query.object_lock.is_some() { + return config::delete_object_lock(&state, &bucket).await; + } + if query.notification.is_some() { + return config::delete_notification(&state, &bucket).await; + } + if query.logging.is_some() { + return config::delete_logging(&state, &bucket).await; + } + + match state.storage.delete_bucket(&bucket).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err_response(e), + } +} + +pub async fn head_bucket( + State(state): State, + Path(bucket): Path, + headers: HeaderMap, +) -> Response { + if let Some(host_bucket) = virtual_host_bucket_from_headers(&state, &headers).await { + if host_bucket != bucket { + return head_object( + State(state), + Path((host_bucket, bucket)), + Query(ObjectQuery::default()), + headers, + ) + .await; + } + } + + match state.storage.bucket_exists(&bucket).await { + Ok(true) => { + let mut headers = HeaderMap::new(); + headers.insert("x-amz-bucket-region", state.config.region.parse().unwrap()); + (StatusCode::OK, headers).into_response() + } + Ok(false) => { + storage_err_response(myfsio_storage::error::StorageError::BucketNotFound(bucket)) + } + Err(e) => storage_err_response(e), + } +} + +#[derive(serde::Deserialize, Default)] +pub struct ObjectQuery { + pub uploads: Option, + pub attributes: Option, + pub select: Option, + #[serde(rename = "uploadId")] + pub upload_id: Option, + #[serde(rename = "partNumber")] + pub part_number: Option, + #[serde(rename = "versionId")] + pub version_id: Option, + pub tagging: Option, + pub acl: Option, + pub retention: Option, + #[serde(rename = "legal-hold")] + pub legal_hold: Option, + #[serde(rename = "response-content-type")] + pub response_content_type: Option, + #[serde(rename = "response-content-disposition")] + pub response_content_disposition: Option, + #[serde(rename = "response-content-language")] + pub response_content_language: Option, + #[serde(rename = "response-content-encoding")] + pub response_content_encoding: Option, + #[serde(rename = "response-cache-control")] + pub response_cache_control: Option, + #[serde(rename = "response-expires")] + pub response_expires: Option, +} + +fn apply_response_overrides(headers: &mut HeaderMap, query: &ObjectQuery) { + if let Some(ref v) = query.response_content_type { + if let Ok(val) = v.parse() { + headers.insert("content-type", val); + } + } + if let Some(ref v) = query.response_content_disposition { + if let Ok(val) = v.parse() { + headers.insert("content-disposition", val); + } + } + if let Some(ref v) = query.response_content_language { + if let Ok(val) = v.parse() { + headers.insert("content-language", val); + } + } + if let Some(ref v) = query.response_content_encoding { + if let Ok(val) = v.parse() { + headers.insert("content-encoding", val); + } + } + if let Some(ref v) = query.response_cache_control { + if let Ok(val) = v.parse() { + headers.insert("cache-control", val); + } + } + if let Some(ref v) = query.response_expires { + if let Ok(val) = v.parse() { + headers.insert("expires", val); + } + } +} + +fn guessed_content_type(key: &str, explicit: Option<&str>) -> String { + explicit + .filter(|v| !v.trim().is_empty()) + .map(|v| v.to_string()) + .unwrap_or_else(|| { + mime_guess::from_path(key) + .first_raw() + .unwrap_or("application/octet-stream") + .to_string() + }) +} + +fn is_aws_chunked(headers: &HeaderMap) -> bool { + if let Some(enc) = headers + .get("content-encoding") + .and_then(|v| v.to_str().ok()) + { + if enc.to_ascii_lowercase().contains("aws-chunked") { + return true; + } + } + if let Some(sha) = headers + .get("x-amz-content-sha256") + .and_then(|v| v.to_str().ok()) + { + let lower = sha.to_ascii_lowercase(); + if lower.starts_with("streaming-") { + return true; + } + } + false +} + +fn insert_content_type(headers: &mut HeaderMap, key: &str, explicit: Option<&str>) { + let value = guessed_content_type(key, explicit); + if let Ok(header_value) = value.parse() { + headers.insert("content-type", header_value); + } else { + headers.insert("content-type", "application/octet-stream".parse().unwrap()); + } +} + +fn internal_header_pairs() -> &'static [(&'static str, &'static str, &'static str)] { + &[ + ("cache-control", "__cache_control__", "cache-control"), + ( + "content-disposition", + "__content_disposition__", + "content-disposition", + ), + ( + "content-language", + "__content_language__", + "content-language", + ), + ( + "content-encoding", + "__content_encoding__", + "content-encoding", + ), + ("expires", "__expires__", "expires"), + ( + "x-amz-website-redirect-location", + "__website_redirect_location__", + "x-amz-website-redirect-location", + ), + ] +} + +fn decoded_content_encoding(value: &str) -> Option { + let filtered: Vec<&str> = value + .split(',') + .map(str::trim) + .filter(|part| !part.is_empty() && !part.eq_ignore_ascii_case("aws-chunked")) + .collect(); + if filtered.is_empty() { + None + } else { + Some(filtered.join(", ")) + } +} + +fn insert_standard_object_metadata( + headers: &HeaderMap, + metadata: &mut HashMap, +) -> Result<(), Response> { + for (request_header, metadata_key, _) in internal_header_pairs() { + if let Some(value) = headers.get(*request_header).and_then(|v| v.to_str().ok()) { + if *request_header == "content-encoding" { + if let Some(decoded_encoding) = decoded_content_encoding(value) { + metadata.insert((*metadata_key).to_string(), decoded_encoding); + } + } else { + metadata.insert((*metadata_key).to_string(), value.to_string()); + } + } + } + if let Some(value) = headers + .get("x-amz-storage-class") + .and_then(|v| v.to_str().ok()) + { + metadata.insert("__storage_class__".to_string(), value.to_ascii_uppercase()); + } + + if let Some(value) = headers + .get("x-amz-object-lock-legal-hold") + .and_then(|v| v.to_str().ok()) + { + object_lock::set_legal_hold(metadata, value.eq_ignore_ascii_case("ON")); + } + + let retention_mode = headers + .get("x-amz-object-lock-mode") + .and_then(|v| v.to_str().ok()); + let retain_until = headers + .get("x-amz-object-lock-retain-until-date") + .and_then(|v| v.to_str().ok()); + if let (Some(mode), Some(retain_until)) = (retention_mode, retain_until) { + let mode = match mode.to_ascii_uppercase().as_str() { + "GOVERNANCE" => object_lock::RetentionMode::GOVERNANCE, + "COMPLIANCE" => object_lock::RetentionMode::COMPLIANCE, + _ => { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid x-amz-object-lock-mode", + ))) + } + }; + let retain_until_date = DateTime::parse_from_rfc3339(retain_until) + .map(|value| value.with_timezone(&Utc)) + .map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid x-amz-object-lock-retain-until-date", + )) + })?; + object_lock::set_object_retention( + metadata, + &object_lock::ObjectLockRetention { + mode, + retain_until_date, + }, + ) + .map_err(|message| { + s3_error_response(S3Error::new(S3ErrorCode::InvalidArgument, message)) + })?; + } + Ok(()) +} + +const CANNED_ACL_VALUES: &[&str] = &[ + "private", + "public-read", + "public-read-write", + "authenticated-read", + "bucket-owner-read", + "bucket-owner-full-control", + "aws-exec-read", +]; + +fn apply_canned_acl_header( + headers: &HeaderMap, + metadata: &mut HashMap, +) -> Result<(), Response> { + let Some(raw) = headers.get("x-amz-acl").and_then(|v| v.to_str().ok()) else { + return Ok(()); + }; + let value = raw.trim(); + if value.is_empty() { + return Ok(()); + } + if !CANNED_ACL_VALUES + .iter() + .any(|known| known.eq_ignore_ascii_case(value)) + { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + format!("Unsupported canned ACL: {}", value), + ))); + } + let acl = crate::services::acl::create_canned_acl(value, "myfsio"); + crate::services::acl::store_object_acl(metadata, &acl); + Ok(()) +} + +fn validate_sse_request(state: &AppState, headers: &HeaderMap) -> Result<(), Response> { + let alg = headers + .get("x-amz-server-side-encryption") + .and_then(|v| v.to_str().ok()) + .map(str::trim) + .filter(|s| !s.is_empty()); + let Some(alg) = alg else { + return Ok(()); + }; + if alg != "AES256" && alg != "aws:kms" { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + format!("Unsupported server-side encryption algorithm: {}", alg), + ))); + } + if alg == "aws:kms" && !state.config.kms_enabled { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "KMS is not enabled on this server", + ))); + } + if state.encryption.is_none() { + return Err(s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Server-side encryption is not enabled on this server", + ))); + } + Ok(()) +} + +fn apply_stored_response_headers(headers: &mut HeaderMap, metadata: &HashMap) { + for (_, metadata_key, response_header) in internal_header_pairs() { + if let Some(value) = metadata + .get(*metadata_key) + .and_then(|value| value.parse().ok()) + { + headers.insert(*response_header, value); + } + } + if let Some(value) = metadata + .get("__storage_class__") + .and_then(|value| value.parse().ok()) + { + headers.insert("x-amz-storage-class", value); + } +} + +fn apply_user_metadata(headers: &mut HeaderMap, metadata: &HashMap) { + for (k, v) in metadata { + if let Ok(header_val) = v.parse() { + let header_name = format!("x-amz-meta-{}", k); + if let Ok(name) = header_name.parse::() { + headers.insert(name, header_val); + } + } + } +} + +fn is_null_version(version_id: Option<&str>) -> bool { + version_id.is_none_or(|value| value == "null") +} + +fn bad_digest_response(message: impl Into) -> Response { + s3_error_response(S3Error::new(S3ErrorCode::BadDigest, message)) +} + +fn base64_header_bytes(headers: &HeaderMap, name: &str) -> Result>, Response> { + let Some(value) = headers.get(name).and_then(|v| v.to_str().ok()) else { + return Ok(None); + }; + STANDARD + .decode(value.trim()) + .map(Some) + .map_err(|_| bad_digest_response(format!("Invalid base64 value for {}", name))) +} + +fn has_upload_checksum(headers: &HeaderMap) -> bool { + headers.contains_key("content-md5") + || headers.contains_key("x-amz-checksum-sha256") + || headers.contains_key("x-amz-checksum-crc32") +} + +fn persist_additional_checksums(headers: &HeaderMap, metadata: &mut HashMap) { + for algo in [ + "sha256", "sha1", "crc32", "crc32c", "crc64nvme", + ] { + let header_name = format!("x-amz-checksum-{}", algo); + if let Some(value) = headers.get(&header_name).and_then(|v| v.to_str().ok()) { + let trimmed = value.trim(); + if !trimmed.is_empty() { + metadata.insert(format!("__checksum_{}__", algo), trimmed.to_string()); + } + } + } + if let Some(value) = headers + .get("x-amz-sdk-checksum-algorithm") + .and_then(|v| v.to_str().ok()) + { + let trimmed = value.trim().to_ascii_uppercase(); + if !trimmed.is_empty() { + metadata.insert("__checksum_algorithm__".to_string(), trimmed); + } + } +} + +fn apply_stored_checksum_headers(resp_headers: &mut HeaderMap, metadata: &HashMap) { + for algo in [ + "sha256", "sha1", "crc32", "crc32c", "crc64nvme", + ] { + if let Some(value) = metadata.get(&format!("__checksum_{}__", algo)) { + if let Ok(parsed) = value.parse() { + resp_headers.insert( + axum::http::HeaderName::from_bytes( + format!("x-amz-checksum-{}", algo).as_bytes(), + ) + .unwrap(), + parsed, + ); + } + } + } +} + +fn validate_upload_checksums(headers: &HeaderMap, data: &[u8]) -> Result<(), Response> { + if let Some(expected) = base64_header_bytes(headers, "content-md5")? { + if expected.len() != 16 || Md5::digest(data).as_slice() != expected.as_slice() { + return Err(bad_digest_response( + "The Content-MD5 you specified did not match what we received", + )); + } + } + + if let Some(expected) = base64_header_bytes(headers, "x-amz-checksum-sha256")? { + if Sha256::digest(data).as_slice() != expected.as_slice() { + return Err(bad_digest_response( + "The x-amz-checksum-sha256 you specified did not match what we received", + )); + } + } + + if let Some(expected) = base64_header_bytes(headers, "x-amz-checksum-crc32")? { + let actual = crc32fast::hash(data).to_be_bytes(); + if expected.as_slice() != actual { + return Err(bad_digest_response( + "The x-amz-checksum-crc32 you specified did not match what we received", + )); + } + } + + Ok(()) +} + +async fn collect_upload_body(body: Body, aws_chunked: bool) -> Result { + if aws_chunked { + let mut reader = chunked::decode_body(body); + let mut data = Vec::new(); + reader.read_to_end(&mut data).await.map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidRequest, + "Failed to read aws-chunked request body", + )) + })?; + return Ok(bytes::Bytes::from(data)); + } + + http_body_util::BodyExt::collect(body) + .await + .map(|collected| collected.to_bytes()) + .map_err(|err| { + if let Some(message) = crate::middleware::sha_body::sha256_mismatch_message(&err) { + bad_digest_response(message) + } else { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidRequest, + "Failed to read request body", + )) + } + }) +} + +fn parse_tagging_header(value: &str) -> Result, Response> { + let mut tags = Vec::new(); + if value.trim().is_empty() { + return Ok(tags); + } + + for pair in value.split('&') { + let (raw_key, raw_value) = pair.split_once('=').ok_or_else(|| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidTag, + "The x-amz-tagging header must use query-string key=value pairs", + )) + })?; + let key = percent_decode_str(raw_key) + .decode_utf8() + .map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidTag, + "Tag keys must be valid UTF-8", + )) + })? + .to_string(); + let value = percent_decode_str(raw_value) + .decode_utf8() + .map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidTag, + "Tag values must be valid UTF-8", + )) + })? + .to_string(); + tags.push(myfsio_common::types::Tag { key, value }); + } + + Ok(tags) +} + +fn parse_copy_source(copy_source: &str) -> Result<(String, String, Option), Response> { + let source = copy_source.strip_prefix('/').unwrap_or(copy_source); + let (bucket_raw, key_and_query) = source.split_once('/').ok_or_else(|| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source", + )) + })?; + let (key_raw, query) = key_and_query + .split_once('?') + .map(|(key, query)| (key, Some(query))) + .unwrap_or((key_and_query, None)); + + let bucket = percent_decode_str(bucket_raw) + .decode_utf8() + .map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source bucket encoding", + )) + })? + .to_string(); + let key = percent_decode_str(key_raw) + .decode_utf8() + .map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source key encoding", + )) + })? + .to_string(); + + let mut version_id = None; + if let Some(query) = query { + for pair in query.split('&') { + let Some((name, value)) = pair.split_once('=') else { + continue; + }; + if name == "versionId" { + version_id = Some( + percent_decode_str(value) + .decode_utf8() + .map_err(|_| { + s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source versionId encoding", + )) + })? + .to_string(), + ); + break; + } + } + } + + Ok((bucket, key, version_id)) +} + +pub async fn put_object( + State(state): State, + Path((bucket, key)): Path<(String, String)>, + Query(query): Query, + headers: HeaderMap, + body: Body, +) -> Response { + if query.tagging.is_some() { + return config::put_object_tagging(&state, &bucket, &key, body).await; + } + if query.acl.is_some() { + return config::put_object_acl(&state, &bucket, &key, &headers, body).await; + } + if query.retention.is_some() { + return config::put_object_retention(&state, &bucket, &key, &headers, body).await; + } + if query.legal_hold.is_some() { + return config::put_object_legal_hold(&state, &bucket, &key, body).await; + } + + if let Some(ref upload_id) = query.upload_id { + if let Some(part_number) = query.part_number { + if let Some(copy_source) = headers + .get("x-amz-copy-source") + .and_then(|v| v.to_str().ok()) + { + let range = headers + .get("x-amz-copy-source-range") + .and_then(|v| v.to_str().ok()); + return upload_part_copy_handler( + &state, + &bucket, + upload_id, + part_number, + copy_source, + range, + &headers, + ) + .await; + } + return upload_part_handler_with_chunking( + &state, + &bucket, + upload_id, + part_number, + body, + is_aws_chunked(&headers), + ) + .await; + } + } + + if let Some(copy_source) = headers + .get("x-amz-copy-source") + .and_then(|v| v.to_str().ok()) + { + return copy_object_handler(&state, copy_source, &bucket, &key, &headers).await; + } + + if let Err(response) = + ensure_object_lock_allows_write(&state, &bucket, &key, Some(&headers)).await + { + return response; + } + if let Some(response) = evaluate_put_preconditions(&state, &bucket, &key, &headers).await { + return response; + } + + let content_type = guessed_content_type( + &key, + headers.get("content-type").and_then(|v| v.to_str().ok()), + ); + + let mut metadata = HashMap::new(); + metadata.insert("__content_type__".to_string(), content_type); + if let Err(response) = insert_standard_object_metadata(&headers, &mut metadata) { + return response; + } + if let Err(response) = apply_canned_acl_header(&headers, &mut metadata) { + return response; + } + if let Err(response) = validate_sse_request(&state, &headers) { + return response; + } + + for (name, value) in headers.iter() { + let name_str = name.as_str(); + if let Some(meta_key) = name_str.strip_prefix("x-amz-meta-") { + if let Ok(val) = value.to_str() { + metadata.insert(meta_key.to_string(), val.to_string()); + } + } + } + + let tags = match headers + .get("x-amz-tagging") + .and_then(|value| value.to_str().ok()) + .map(parse_tagging_header) + .transpose() + { + Ok(tags) => tags, + Err(response) => return response, + }; + if let Some(ref tags) = tags { + if tags.len() > state.config.object_tag_limit { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidTag, + format!("Maximum {} tags allowed", state.config.object_tag_limit), + )); + } + } + + persist_additional_checksums(&headers, &mut metadata); + + let aws_chunked = is_aws_chunked(&headers); + let boxed: myfsio_storage::traits::AsyncReadStream = if has_upload_checksum(&headers) { + let data = match collect_upload_body(body, aws_chunked).await { + Ok(data) => data, + Err(response) => return response, + }; + if let Err(response) = validate_upload_checksums(&headers, &data) { + return response; + } + Box::pin(std::io::Cursor::new(data)) + } else if aws_chunked { + Box::pin(chunked::decode_body(body)) + } else { + let stream = tokio_util::io::StreamReader::new( + body.into_data_stream() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)), + ); + Box::pin(stream) + }; + + match state + .storage + .put_object(&bucket, &key, boxed, Some(metadata)) + .await + { + Ok(meta) => { + if let Some(ref tags) = tags { + if let Err(e) = state.storage.set_object_tags(&bucket, &key, tags).await { + return storage_err_response(e); + } + } + if let Some(enc_ctx) = resolve_encryption_context(&state, &bucket, &headers).await { + if let Some(ref enc_svc) = state.encryption { + let obj_path = match state.storage.get_object_path(&bucket, &key).await { + Ok(p) => p, + Err(e) => return storage_err_response(e), + }; + let tmp_dir = state.config.storage_root.join(".myfsio.sys").join("tmp"); + let _ = tokio::fs::create_dir_all(&tmp_dir).await; + let enc_tmp = tmp_dir.join(format!("enc-{}", uuid::Uuid::new_v4())); + + match enc_svc.encrypt_object(&obj_path, &enc_tmp, &enc_ctx).await { + Ok(enc_meta) => { + if let Err(e) = tokio::fs::rename(&enc_tmp, &obj_path).await { + let _ = tokio::fs::remove_file(&enc_tmp).await; + return storage_err_response( + myfsio_storage::error::StorageError::Io(e), + ); + } + let enc_size = tokio::fs::metadata(&obj_path) + .await + .map(|m| m.len()) + .unwrap_or(0); + + let mut enc_metadata = enc_meta.to_metadata_map(); + let all_meta = + match state.storage.get_object_metadata(&bucket, &key).await { + Ok(m) => m, + Err(_) => HashMap::new(), + }; + for (k, v) in &all_meta { + enc_metadata.entry(k.clone()).or_insert_with(|| v.clone()); + } + enc_metadata.insert("__size__".to_string(), enc_size.to_string()); + let _ = state + .storage + .put_object_metadata(&bucket, &key, &enc_metadata) + .await; + + let mut resp_headers = HeaderMap::new(); + if let Some(ref etag) = meta.etag { + resp_headers + .insert("etag", format!("\"{}\"", etag).parse().unwrap()); + } + if let Some(ref vid) = meta.version_id { + if let Ok(value) = vid.parse() { + resp_headers.insert("x-amz-version-id", value); + } + } + resp_headers.insert( + "x-amz-server-side-encryption", + enc_ctx.algorithm.as_str().parse().unwrap(), + ); + apply_stored_checksum_headers(&mut resp_headers, &enc_metadata); + notifications::emit_object_created( + &state, + &bucket, + &key, + meta.size, + meta.etag.as_deref(), + "", + "", + "", + "Put", + ); + trigger_replication(&state, &bucket, &key, "write"); + return (StatusCode::OK, resp_headers).into_response(); + } + Err(e) => { + let _ = tokio::fs::remove_file(&enc_tmp).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InternalError, + format!("Encryption failed: {}", e), + )); + } + } + } + } + + let mut resp_headers = HeaderMap::new(); + if let Some(ref etag) = meta.etag { + resp_headers.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + } + if let Some(ref vid) = meta.version_id { + if let Ok(value) = vid.parse() { + resp_headers.insert("x-amz-version-id", value); + } + } + let stored = state + .storage + .get_object_metadata(&bucket, &key) + .await + .unwrap_or_default(); + apply_stored_checksum_headers(&mut resp_headers, &stored); + notifications::emit_object_created( + &state, + &bucket, + &key, + meta.size, + meta.etag.as_deref(), + "", + "", + "", + "Put", + ); + trigger_replication(&state, &bucket, &key, "write"); + (StatusCode::OK, resp_headers).into_response() + } + Err(e) => storage_err_response(e), + } +} + +pub async fn get_object( + State(state): State, + Path((bucket, key)): Path<(String, String)>, + Query(query): Query, + headers: HeaderMap, +) -> Response { + if query.tagging.is_some() { + return config::get_object_tagging(&state, &bucket, &key).await; + } + if query.acl.is_some() { + return config::get_object_acl(&state, &bucket, &key).await; + } + if query.retention.is_some() { + return config::get_object_retention(&state, &bucket, &key).await; + } + if query.legal_hold.is_some() { + return config::get_object_legal_hold(&state, &bucket, &key).await; + } + if query.attributes.is_some() { + return object_attributes_handler(&state, &bucket, &key, &headers).await; + } + if let Some(ref upload_id) = query.upload_id { + return list_parts_handler(&state, &bucket, &key, upload_id).await; + } + + let version_id = query + .version_id + .as_deref() + .filter(|value| !is_null_version(Some(*value))); + + let range_header = headers + .get("range") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + + if let Some(ref range_str) = range_header { + return range_get_handler(&state, &bucket, &key, range_str, &query, &headers).await; + } + + let stream_cap = state.config.stream_chunk_size.max(64 * 1024); + + // Take a single snapshot of the live object BEFORE deciding whether it's + // encrypted. If we sniffed encryption from head_meta first, a PUT could + // flip the object's encryption state between head and snapshot — leaving + // us either serving ciphertext through the raw path or failing because + // the snapshot no longer has encryption metadata. All decisions must + // come from this snapshot. + let tmp_dir = state.config.storage_root.join(".myfsio.sys").join("tmp"); + let _ = tokio::fs::create_dir_all(&tmp_dir).await; + let snap_link = tmp_dir.join(format!("src-{}", uuid::Uuid::new_v4())); + let snap_res = match version_id { + Some(v) => { + state + .storage + .snapshot_object_version_to_link(&bucket, &key, v, &snap_link) + .await + } + None => { + state + .storage + .snapshot_object_to_link(&bucket, &key, &snap_link) + .await + } + }; + let snap_meta = match snap_res { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }; + + // Evaluate preconditions against the served snapshot's metadata. A HEAD + // taken earlier could disagree with the snapshot if a concurrent PUT + // landed in between, causing us to serve a body that doesn't satisfy + // the caller's If-Match / If-None-Match / time conditions. + if let Some(resp) = evaluate_get_preconditions(&headers, &snap_meta) { + let _ = tokio::fs::remove_file(&snap_link).await; + return resp; + } + + let enc_info = myfsio_crypto::encryption::EncryptionMetadata::from_metadata( + &snap_meta.internal_metadata, + ); + + let (file, file_size, enc_header): (tokio::fs::File, u64, Option<&str>) = match ( + enc_info.as_ref(), + state.encryption.as_ref(), + ) { + (Some(enc_info), Some(enc_svc)) => { + let dec_tmp = tmp_dir.join(format!("dec-{}", uuid::Uuid::new_v4())); + let customer_key = extract_sse_c_key(&headers); + let decrypt_res = enc_svc + .decrypt_object(&snap_link, &dec_tmp, enc_info, customer_key.as_deref()) + .await; + // Hardlink served its purpose; the decrypted plaintext is in + // dec_tmp now. + let _ = tokio::fs::remove_file(&snap_link).await; + if let Err(e) = decrypt_res { + let _ = tokio::fs::remove_file(&dec_tmp).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InternalError, + format!("Decryption failed: {}", e), + )); + } + let file = match open_self_deleting(dec_tmp.clone()).await { + Ok(f) => f, + Err(e) => { + let _ = tokio::fs::remove_file(&dec_tmp).await; + return storage_err_response(myfsio_storage::error::StorageError::Io(e)); + } + }; + let file_size = file.metadata().await.map(|m| m.len()).unwrap_or(0); + (file, file_size, Some(enc_info.algorithm.as_str())) + } + (Some(_), None) => { + // Snapshot is encrypted but the server has no encryption + // service configured to decrypt it. Serving ciphertext as + // plaintext would be actively wrong; refuse explicitly. + let _ = tokio::fs::remove_file(&snap_link).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InternalError, + "Object is encrypted but encryption service is disabled".to_string(), + )); + } + (None, _) => { + // Raw path: stream directly from the hardlink, which becomes + // self-deleting on open (kernel keeps the inode alive via our + // fd). + let file = match open_self_deleting(snap_link.clone()).await { + Ok(f) => f, + Err(e) => { + let _ = tokio::fs::remove_file(&snap_link).await; + return storage_err_response(myfsio_storage::error::StorageError::Io(e)); + } + }; + (file, snap_meta.size, None) + } + }; + + let stream = ReaderStream::with_capacity(file, stream_cap); + let body = Body::from_stream(stream); + + let meta = &snap_meta; + let mut resp_headers = HeaderMap::new(); + resp_headers.insert("content-length", file_size.to_string().parse().unwrap()); + if let Some(ref etag) = meta.etag { + resp_headers.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + } + insert_content_type(&mut resp_headers, &key, meta.content_type.as_deref()); + resp_headers.insert( + "last-modified", + meta.last_modified + .format("%a, %d %b %Y %H:%M:%S GMT") + .to_string() + .parse() + .unwrap(), + ); + resp_headers.insert("accept-ranges", "bytes".parse().unwrap()); + if let Some(alg) = enc_header { + resp_headers.insert("x-amz-server-side-encryption", alg.parse().unwrap()); + } + apply_stored_response_headers(&mut resp_headers, &meta.internal_metadata); + apply_stored_checksum_headers(&mut resp_headers, &meta.internal_metadata); + if let Some(ref requested_version) = query.version_id { + if let Ok(value) = requested_version.parse() { + resp_headers.insert("x-amz-version-id", value); + } + } else if let Some(ref vid) = meta.version_id { + if let Ok(value) = vid.parse() { + resp_headers.insert("x-amz-version-id", value); + } + } + apply_user_metadata(&mut resp_headers, &meta.metadata); + apply_response_overrides(&mut resp_headers, &query); + + (StatusCode::OK, resp_headers, body).into_response() +} + +pub async fn post_object( + State(state): State, + Path((bucket, key)): Path<(String, String)>, + Query(query): Query, + headers: HeaderMap, + body: Body, +) -> Response { + if query.uploads.is_some() { + return initiate_multipart_handler(&state, &bucket, &key).await; + } + + if let Some(ref upload_id) = query.upload_id { + return complete_multipart_handler(&state, &bucket, &key, upload_id, body).await; + } + + if query.select.is_some() { + return select::post_select_object_content(&state, &bucket, &key, &headers, body).await; + } + + (StatusCode::METHOD_NOT_ALLOWED).into_response() +} + +pub async fn delete_object( + State(state): State, + Path((bucket, key)): Path<(String, String)>, + Query(query): Query, + headers: HeaderMap, +) -> Response { + if query.tagging.is_some() { + return config::delete_object_tagging(&state, &bucket, &key).await; + } + if query.acl.is_some() { + return StatusCode::NO_CONTENT.into_response(); + } + + if let Some(ref upload_id) = query.upload_id { + return abort_multipart_handler(&state, &bucket, upload_id).await; + } + + if let Some(version_id) = query + .version_id + .as_deref() + .filter(|value| !is_null_version(Some(*value))) + { + if let Err(response) = + ensure_object_version_lock_allows_delete(&state, &bucket, &key, version_id, &headers) + .await + { + return response; + } + return match state + .storage + .delete_object_version(&bucket, &key, version_id) + .await + { + Ok(outcome) => { + let mut resp_headers = HeaderMap::new(); + if let Some(ref vid) = outcome.version_id { + if let Ok(value) = vid.parse() { + resp_headers.insert("x-amz-version-id", value); + } + } + if outcome.is_delete_marker { + resp_headers.insert("x-amz-delete-marker", "true".parse().unwrap()); + } + notifications::emit_object_removed(&state, &bucket, &key, "", "", "", "Delete"); + trigger_replication(&state, &bucket, &key, "delete"); + (StatusCode::NO_CONTENT, resp_headers).into_response() + } + Err(e) => storage_err_response(e), + }; + } + + if let Err(response) = + ensure_object_lock_allows_write(&state, &bucket, &key, Some(&headers)).await + { + return response; + } + + match state.storage.delete_object(&bucket, &key).await { + Ok(outcome) => { + let mut resp_headers = HeaderMap::new(); + if let Some(ref vid) = outcome.version_id { + if let Ok(value) = vid.parse() { + resp_headers.insert("x-amz-version-id", value); + } + } + if outcome.is_delete_marker { + resp_headers.insert("x-amz-delete-marker", "true".parse().unwrap()); + } + notifications::emit_object_removed(&state, &bucket, &key, "", "", "", "Delete"); + trigger_replication(&state, &bucket, &key, "delete"); + (StatusCode::NO_CONTENT, resp_headers).into_response() + } + Err(e) => storage_err_response(e), + } +} + +pub async fn head_object( + State(state): State, + Path((bucket, key)): Path<(String, String)>, + Query(query): Query, + headers: HeaderMap, +) -> Response { + let version_id = query + .version_id + .as_deref() + .filter(|value| !is_null_version(Some(*value))); + let result = match version_id { + Some(version_id) => { + state + .storage + .head_object_version(&bucket, &key, version_id) + .await + } + None => state.storage.head_object(&bucket, &key).await, + }; + + match result { + Ok(meta) => { + if let Some(resp) = evaluate_get_preconditions(&headers, &meta) { + return resp; + } + let mut headers = HeaderMap::new(); + headers.insert("content-length", meta.size.to_string().parse().unwrap()); + if let Some(ref etag) = meta.etag { + headers.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + } + insert_content_type(&mut headers, &key, meta.content_type.as_deref()); + headers.insert( + "last-modified", + meta.last_modified + .format("%a, %d %b %Y %H:%M:%S GMT") + .to_string() + .parse() + .unwrap(), + ); + headers.insert("accept-ranges", "bytes".parse().unwrap()); + apply_stored_response_headers(&mut headers, &meta.internal_metadata); + apply_stored_checksum_headers(&mut headers, &meta.internal_metadata); + if let Some(ref requested_version) = query.version_id { + if let Ok(value) = requested_version.parse() { + headers.insert("x-amz-version-id", value); + } + } else if let Some(ref vid) = meta.version_id { + if let Ok(value) = vid.parse() { + headers.insert("x-amz-version-id", value); + } + } + + apply_user_metadata(&mut headers, &meta.metadata); + + (StatusCode::OK, headers).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn initiate_multipart_handler(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.initiate_multipart(bucket, key, None).await { + Ok(upload_id) => { + let xml = myfsio_xml::response::initiate_multipart_upload_xml(bucket, key, &upload_id); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn upload_part_handler_with_chunking( + state: &AppState, + bucket: &str, + upload_id: &str, + part_number: u32, + body: Body, + aws_chunked: bool, +) -> Response { + let boxed: myfsio_storage::traits::AsyncReadStream = if aws_chunked { + Box::pin(chunked::decode_body(body)) + } else { + let stream = tokio_util::io::StreamReader::new( + body.into_data_stream() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)), + ); + Box::pin(stream) + }; + + match state + .storage + .upload_part(bucket, upload_id, part_number, boxed) + .await + { + Ok(etag) => { + let mut headers = HeaderMap::new(); + headers.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + (StatusCode::OK, headers).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn upload_part_copy_handler( + state: &AppState, + dst_bucket: &str, + upload_id: &str, + part_number: u32, + copy_source: &str, + range_header: Option<&str>, + headers: &HeaderMap, +) -> Response { + let source = copy_source.strip_prefix('/').unwrap_or(copy_source); + let source = match percent_encoding::percent_decode_str(source).decode_utf8() { + Ok(s) => s.into_owned(), + Err(_) => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source encoding", + )); + } + }; + let (src_bucket, src_key) = match source.split_once('/') { + Some((b, k)) => (b.to_string(), k.to_string()), + None => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source", + )); + } + }; + + let source_meta = match state.storage.head_object(&src_bucket, &src_key).await { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }; + if let Some(resp) = evaluate_copy_preconditions(headers, &source_meta) { + return resp; + } + + let range = match range_header { + Some(r) => match parse_copy_source_range(r) { + Some(parsed) => Some(parsed), + None => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InvalidArgument, + "Invalid x-amz-copy-source-range", + )); + } + }, + None => None, + }; + + match state + .storage + .upload_part_copy( + dst_bucket, + upload_id, + part_number, + &src_bucket, + &src_key, + range, + ) + .await + { + Ok((etag, last_modified)) => { + let lm = myfsio_xml::response::format_s3_datetime(&last_modified); + let xml = myfsio_xml::response::copy_part_result_xml(&etag, &lm); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +fn parse_copy_source_range(value: &str) -> Option<(u64, u64)> { + let v = value.trim(); + let v = v.strip_prefix("bytes=")?; + let (start, end) = v.split_once('-')?; + let start: u64 = start.trim().parse().ok()?; + let end: u64 = end.trim().parse().ok()?; + if start > end { + return None; + } + Some((start, end)) +} + +async fn complete_multipart_handler( + state: &AppState, + bucket: &str, + key: &str, + upload_id: &str, + body: Body, +) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::MalformedXML, + "Failed to read request body", + )); + } + }; + + let xml_str = String::from_utf8_lossy(&body_bytes); + let parsed = match myfsio_xml::request::parse_complete_multipart_upload(&xml_str) { + Ok(p) => p, + Err(e) => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::MalformedXML, + e, + )); + } + }; + + if parsed.parts.is_empty() { + return s3_error_response(S3Error::new( + S3ErrorCode::MalformedXML, + "CompleteMultipartUpload requires at least one part", + )); + } + + let mut last_part_num: u32 = 0; + for p in &parsed.parts { + if p.part_number == 0 { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPartOrder, + "Part numbers must be greater than zero", + )); + } + if p.part_number <= last_part_num { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPartOrder, + "Parts must be specified in ascending order with no duplicates", + )); + } + last_part_num = p.part_number; + } + + let stored_parts = match state.storage.list_parts(bucket, upload_id).await { + Ok(list) => list, + Err(e) => return storage_err_response(e), + }; + let stored_map: HashMap = stored_parts + .iter() + .map(|p| (p.part_number, (p.etag.clone(), p.size))) + .collect(); + let min_part_size: u64 = state.config.multipart_min_part_size; + let total_parts = parsed.parts.len(); + for (idx, p) in parsed.parts.iter().enumerate() { + let stored = match stored_map.get(&p.part_number) { + Some(s) => s, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPart, + format!("Part {} not found", p.part_number), + )); + } + }; + let client_etag = p.etag.trim().trim_matches('"').to_ascii_lowercase(); + let stored_etag = stored.0.trim().trim_matches('"').to_ascii_lowercase(); + if !client_etag.is_empty() && client_etag != stored_etag { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPart, + format!("ETag mismatch for part {}", p.part_number), + )); + } + let is_final = idx + 1 == total_parts; + if !is_final && stored.1 < min_part_size { + return s3_error_response(S3Error::new( + S3ErrorCode::EntityTooSmall, + format!( + "Part {} is smaller than the minimum allowed size of {} bytes", + p.part_number, min_part_size + ), + )); + } + } + + let parts: Vec = parsed + .parts + .iter() + .map(|p| PartInfo { + part_number: p.part_number, + etag: p.etag.clone(), + }) + .collect(); + + match state + .storage + .complete_multipart(bucket, upload_id, &parts) + .await + { + Ok(meta) => { + let etag = meta.etag.as_deref().unwrap_or(""); + let xml = myfsio_xml::response::complete_multipart_upload_xml( + bucket, + key, + etag, + &format!("/{}/{}", bucket, key), + ); + trigger_replication(state, bucket, key, "write"); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn abort_multipart_handler(state: &AppState, bucket: &str, upload_id: &str) -> Response { + match state.storage.abort_multipart(bucket, upload_id).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => storage_err_response(e), + } +} + +async fn list_multipart_uploads_handler(state: &AppState, bucket: &str) -> Response { + match state.storage.list_multipart_uploads(bucket).await { + Ok(uploads) => { + let xml = myfsio_xml::response::list_multipart_uploads_xml(bucket, &uploads); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn list_parts_handler( + state: &AppState, + bucket: &str, + key: &str, + upload_id: &str, +) -> Response { + match state.storage.list_parts(bucket, upload_id).await { + Ok(parts) => { + let xml = myfsio_xml::response::list_parts_xml(bucket, key, upload_id, &parts); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn object_attributes_handler( + state: &AppState, + bucket: &str, + key: &str, + headers: &HeaderMap, +) -> Response { + let meta = match state.storage.head_object(bucket, key).await { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }; + + let requested = headers + .get("x-amz-object-attributes") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let attrs: std::collections::HashSet = requested + .split(',') + .map(|s| s.trim().to_ascii_lowercase()) + .filter(|s| !s.is_empty()) + .collect(); + let all = attrs.is_empty(); + + let stored_meta = state + .storage + .get_object_metadata(bucket, key) + .await + .unwrap_or_default(); + + let mut xml = String::from(""); + xml.push_str(""); + + if all || attrs.contains("etag") { + if let Some(etag) = &meta.etag { + let trimmed = etag.trim_matches('"'); + xml.push_str(&format!("\"{}\"", xml_escape(trimmed))); + } + } + if all || attrs.contains("storageclass") { + let sc = meta.storage_class.as_deref().unwrap_or("STANDARD"); + xml.push_str(&format!("{}", xml_escape(sc))); + } + if all || attrs.contains("objectsize") { + xml.push_str(&format!("{}", meta.size)); + } + if all || attrs.contains("checksum") { + let mut checksum_xml = String::new(); + for (algo, tag) in [ + ("sha256", "ChecksumSHA256"), + ("sha1", "ChecksumSHA1"), + ("crc32", "ChecksumCRC32"), + ("crc32c", "ChecksumCRC32C"), + ("crc64nvme", "ChecksumCRC64NVME"), + ] { + let key_name = format!("__checksum_{}__", algo); + if let Some(value) = stored_meta.get(&key_name) { + let trimmed = value.trim(); + if !trimmed.is_empty() { + checksum_xml.push_str(&format!( + "<{tag}>{}", + xml_escape(trimmed), + tag = tag + )); + } + } + } + if !checksum_xml.is_empty() { + xml.push_str(""); + xml.push_str(&checksum_xml); + xml.push_str(""); + } + } + if attrs.contains("objectparts") { + xml.push_str(""); + } + + xml.push_str(""); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() +} + +async fn copy_object_handler( + state: &AppState, + copy_source: &str, + dst_bucket: &str, + dst_key: &str, + headers: &HeaderMap, +) -> Response { + if let Err(response) = + ensure_object_lock_allows_write(state, dst_bucket, dst_key, Some(headers)).await + { + return response; + } + + let (src_bucket, src_key, src_version_id) = match parse_copy_source(copy_source) { + Ok(parts) => parts, + Err(response) => return response, + }; + + let source_meta = match src_version_id.as_deref() { + Some(version_id) if version_id != "null" => match state + .storage + .head_object_version(&src_bucket, &src_key, version_id) + .await + { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }, + _ => match state.storage.head_object(&src_bucket, &src_key).await { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }, + }; + if let Some(resp) = evaluate_copy_preconditions(headers, &source_meta) { + return resp; + } + + let metadata_directive = headers + .get("x-amz-metadata-directive") + .and_then(|v| v.to_str().ok()) + .map(|v| v.trim().to_ascii_uppercase()) + .unwrap_or_else(|| "COPY".to_string()); + let tagging_directive = headers + .get("x-amz-tagging-directive") + .and_then(|v| v.to_str().ok()) + .map(|v| v.trim().to_ascii_uppercase()) + .unwrap_or_else(|| "COPY".to_string()); + let replace_metadata = metadata_directive == "REPLACE"; + let replace_tagging = tagging_directive == "REPLACE"; + + let same_object = src_bucket == dst_bucket + && src_key == dst_key + && src_version_id.as_deref().unwrap_or("") == ""; + if same_object && !replace_metadata && !replace_tagging { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidRequest, + "This copy request is illegal because it is trying to copy an object to itself without changing the object's metadata, storage class, website redirect location or encryption attributes.", + )); + } + + let source_metadata_existing = match src_version_id.as_deref() { + Some(version_id) if version_id != "null" => { + match state + .storage + .get_object_version_metadata(&src_bucket, &src_key, version_id) + .await + { + Ok(metadata) => metadata, + Err(e) => return storage_err_response(e), + } + } + _ => match state + .storage + .get_object_metadata(&src_bucket, &src_key) + .await + { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }, + }; + + let dst_metadata = if replace_metadata { + let mut m: HashMap = HashMap::new(); + for (request_header, metadata_key, _) in internal_header_pairs() { + if let Some(value) = headers.get(*request_header).and_then(|v| v.to_str().ok()) { + if *request_header == "content-encoding" { + if let Some(decoded_encoding) = decoded_content_encoding(value) { + m.insert((*metadata_key).to_string(), decoded_encoding); + } + } else { + m.insert((*metadata_key).to_string(), value.to_string()); + } + } + } + let content_type = guessed_content_type( + dst_key, + headers.get("content-type").and_then(|v| v.to_str().ok()), + ); + m.insert("__content_type__".to_string(), content_type); + for (name, value) in headers.iter() { + let name_str = name.as_str(); + if let Some(meta_key) = name_str.strip_prefix("x-amz-meta-") { + if let Ok(val) = value.to_str() { + m.insert(meta_key.to_string(), val.to_string()); + } + } + } + if let Some(value) = headers + .get("x-amz-storage-class") + .and_then(|v| v.to_str().ok()) + { + m.insert("__storage_class__".to_string(), value.to_ascii_uppercase()); + } + m + } else { + source_metadata_existing.clone() + }; + + let (_meta, reader) = match src_version_id.as_deref() { + Some(version_id) if version_id != "null" => { + match state + .storage + .get_object_version(&src_bucket, &src_key, version_id) + .await + { + Ok(result) => result, + Err(e) => return storage_err_response(e), + } + } + _ => match state.storage.get_object(&src_bucket, &src_key).await { + Ok(result) => result, + Err(e) => return storage_err_response(e), + }, + }; + + let copy_result = state + .storage + .put_object(dst_bucket, dst_key, reader, Some(dst_metadata)) + .await; + + match copy_result { + Ok(meta) => { + if replace_tagging { + let tags = match headers + .get("x-amz-tagging") + .and_then(|value| value.to_str().ok()) + .map(parse_tagging_header) + .transpose() + { + Ok(tags) => tags, + Err(response) => return response, + }; + if let Some(ref tags) = tags { + if tags.len() > state.config.object_tag_limit { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidTag, + format!("Maximum {} tags allowed", state.config.object_tag_limit), + )); + } + if let Err(e) = state + .storage + .set_object_tags(dst_bucket, dst_key, tags) + .await + { + return storage_err_response(e); + } + } else { + let _ = state + .storage + .set_object_tags(dst_bucket, dst_key, &[]) + .await; + } + } + let etag = meta.etag.as_deref().unwrap_or(""); + let last_modified = myfsio_xml::response::format_s3_datetime(&meta.last_modified); + let xml = myfsio_xml::response::copy_object_result_xml(etag, &last_modified); + trigger_replication(state, dst_bucket, dst_key, "write"); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() + } + Err(e) => storage_err_response(e), + } +} + +async fn delete_objects_handler(state: &AppState, bucket: &str, body: Body) -> Response { + let body_bytes = match http_body_util::BodyExt::collect(body).await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::MalformedXML, + "Failed to read request body", + )); + } + }; + + let xml_str = String::from_utf8_lossy(&body_bytes); + let parsed = match myfsio_xml::request::parse_delete_objects(&xml_str) { + Ok(p) => p, + Err(e) => { + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::MalformedXML, + e, + )); + } + }; + + if parsed.objects.len() > 1000 { + return s3_error_response(S3Error::new( + S3ErrorCode::MalformedXML, + "The request must not contain more than 1000 keys", + )); + } + + use futures::stream::{self, StreamExt}; + + let results: Vec<(String, Option, Result)> = + stream::iter(parsed.objects.iter().cloned()) + .map(|obj| { + let state = state.clone(); + let bucket = bucket.to_string(); + async move { + let key = obj.key.clone(); + let requested_vid = obj.version_id.clone(); + let lock_check: Result<(), (String, String)> = match obj.version_id.as_deref() { + Some(version_id) if version_id != "null" => match state + .storage + .get_object_version_metadata(&bucket, &obj.key, version_id) + .await + { + Ok(metadata) => object_lock::can_delete_object(&metadata, false) + .map_err(|m| { + (S3ErrorCode::AccessDenied.as_str().to_string(), m) + }), + Err(err) => { + let s3err = S3Error::from(err); + Err((s3err.code.as_str().to_string(), s3err.message)) + } + }, + _ => match state.storage.head_object(&bucket, &obj.key).await { + Ok(_) => { + match state + .storage + .get_object_metadata(&bucket, &obj.key) + .await + { + Ok(metadata) => object_lock::can_delete_object(&metadata, false) + .map_err(|m| { + ( + S3ErrorCode::AccessDenied.as_str().to_string(), + m, + ) + }), + Err(err) => { + let s3err = S3Error::from(err); + Err((s3err.code.as_str().to_string(), s3err.message)) + } + } + } + Err(myfsio_storage::error::StorageError::ObjectNotFound { .. }) => { + Ok(()) + } + Err(myfsio_storage::error::StorageError::DeleteMarker { .. }) => { + Ok(()) + } + Err(err) => { + let s3err = S3Error::from(err); + Err((s3err.code.as_str().to_string(), s3err.message)) + } + }, + }; + + let result = match lock_check { + Err(e) => Err(e), + Ok(()) => { + let outcome = match obj.version_id.as_deref() { + Some(version_id) if version_id != "null" => { + state + .storage + .delete_object_version(&bucket, &obj.key, version_id) + .await + } + _ => state.storage.delete_object(&bucket, &obj.key).await, + }; + outcome.map_err(|e| { + let s3err = S3Error::from(e); + (s3err.code.as_str().to_string(), s3err.message) + }) + } + }; + (key, requested_vid, result) + } + }) + .buffer_unordered(32) + .collect() + .await; + + let mut deleted: Vec = Vec::new(); + let mut errors: Vec<(String, String, String)> = Vec::new(); + for (key, requested_vid, result) in results { + match result { + Ok(outcome) => { + notifications::emit_object_removed(state, bucket, &key, "", "", "", "Delete"); + trigger_replication(state, bucket, &key, "delete"); + let delete_marker_version_id = if outcome.is_delete_marker { + outcome.version_id.clone() + } else { + None + }; + deleted.push(myfsio_xml::response::DeletedEntry { + key, + version_id: requested_vid, + delete_marker: outcome.is_delete_marker, + delete_marker_version_id, + }); + } + Err((code, message)) => { + errors.push((key, code, message)); + } + } + } + + let xml = myfsio_xml::response::delete_result_xml(&deleted, &errors, parsed.quiet); + (StatusCode::OK, [("content-type", "application/xml")], xml).into_response() +} + +async fn range_get_handler( + state: &AppState, + bucket: &str, + key: &str, + range_str: &str, + query: &ObjectQuery, + headers: &HeaderMap, +) -> Response { + let version_id = query + .version_id + .as_deref() + .filter(|value| !is_null_version(Some(*value))); + + let tmp_dir = state.config.storage_root.join(".myfsio.sys").join("tmp"); + let _ = tokio::fs::create_dir_all(&tmp_dir).await; + let snap_link = tmp_dir.join(format!("rsrc-{}", uuid::Uuid::new_v4())); + + let snap_meta = match version_id { + Some(v) => { + state + .storage + .snapshot_object_version_to_link(bucket, key, v, &snap_link) + .await + } + None => { + state + .storage + .snapshot_object_to_link(bucket, key, &snap_link) + .await + } + }; + let meta = match snap_meta { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }; + + if let Some(resp) = evaluate_get_preconditions(headers, &meta) { + let _ = tokio::fs::remove_file(&snap_link).await; + return resp; + } + + let enc_info = + myfsio_crypto::encryption::EncryptionMetadata::from_metadata(&meta.internal_metadata); + + let (body_path, plaintext_size, enc_header): (std::path::PathBuf, u64, Option<&str>) = + match (enc_info.as_ref(), state.encryption.as_ref()) { + (Some(enc_info), Some(enc_svc)) => { + let customer_key = extract_sse_c_key(headers); + let has_fast_path = enc_info.chunk_size.is_some() + && enc_info.plaintext_size.is_some(); + + if has_fast_path { + let plaintext_size = enc_info.plaintext_size.unwrap(); + let (start, end) = match parse_range(range_str, plaintext_size) { + Some(r) => r, + None => { + let _ = tokio::fs::remove_file(&snap_link).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InvalidRange, + format!("Range not satisfiable for size {}", plaintext_size), + )); + } + }; + + let dec_tmp = tmp_dir.join(format!("rdec-{}", uuid::Uuid::new_v4())); + let res = enc_svc + .decrypt_object_range( + &snap_link, + &dec_tmp, + enc_info, + customer_key.as_deref(), + start, + end, + ) + .await; + let _ = tokio::fs::remove_file(&snap_link).await; + if let Err(e) = res { + let _ = tokio::fs::remove_file(&dec_tmp).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InternalError, + format!("Decryption failed: {}", e), + )); + } + + return stream_partial_content( + state, + &dec_tmp, + start, + end, + plaintext_size, + &meta, + key, + query, + Some(enc_info.algorithm.as_str()), + /* already_trimmed */ true, + ) + .await; + } + + let dec_tmp = tmp_dir.join(format!("rdec-{}", uuid::Uuid::new_v4())); + let res = enc_svc + .decrypt_object(&snap_link, &dec_tmp, enc_info, customer_key.as_deref()) + .await; + let _ = tokio::fs::remove_file(&snap_link).await; + if let Err(e) = res { + let _ = tokio::fs::remove_file(&dec_tmp).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InternalError, + format!("Decryption failed: {}", e), + )); + } + let plaintext_size = tokio::fs::metadata(&dec_tmp) + .await + .map(|m| m.len()) + .unwrap_or(0); + (dec_tmp, plaintext_size, Some(enc_info.algorithm.as_str())) + } + (Some(_), None) => { + let _ = tokio::fs::remove_file(&snap_link).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InternalError, + "Object is encrypted but encryption service is disabled".to_string(), + )); + } + (None, _) => (snap_link.clone(), meta.size, None), + }; + + let (start, end) = match parse_range(range_str, plaintext_size) { + Some(r) => r, + None => { + let _ = tokio::fs::remove_file(&body_path).await; + return s3_error_response(S3Error::new( + myfsio_common::error::S3ErrorCode::InvalidRange, + format!("Range not satisfiable for size {}", plaintext_size), + )); + } + }; + + stream_partial_content( + state, + &body_path, + start, + end, + plaintext_size, + &meta, + key, + query, + enc_header, + /* already_trimmed */ false, + ) + .await +} + +async fn stream_partial_content( + state: &AppState, + body_path: &std::path::Path, + start: u64, + end: u64, + plaintext_size: u64, + meta: &myfsio_common::types::ObjectMeta, + key: &str, + query: &ObjectQuery, + enc_header: Option<&str>, + already_trimmed: bool, +) -> Response { + let length = end - start + 1; + + let mut file = match open_self_deleting(body_path.to_path_buf()).await { + Ok(f) => f, + Err(e) => { + let _ = tokio::fs::remove_file(body_path).await; + return storage_err_response(myfsio_storage::error::StorageError::Io(e)); + } + }; + + if !already_trimmed { + if let Err(e) = file.seek(std::io::SeekFrom::Start(start)).await { + return storage_err_response(myfsio_storage::error::StorageError::Io(e)); + } + } + let limited = file.take(length); + + let stream_cap = state.config.stream_chunk_size.max(64 * 1024); + let stream = ReaderStream::with_capacity(limited, stream_cap); + let body = Body::from_stream(stream); + + let mut headers = HeaderMap::new(); + headers.insert("content-length", length.to_string().parse().unwrap()); + headers.insert( + "content-range", + format!("bytes {}-{}/{}", start, end, plaintext_size) + .parse() + .unwrap(), + ); + if let Some(ref etag) = meta.etag { + headers.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + } + insert_content_type(&mut headers, key, meta.content_type.as_deref()); + headers.insert("accept-ranges", "bytes".parse().unwrap()); + if let Some(alg) = enc_header { + headers.insert("x-amz-server-side-encryption", alg.parse().unwrap()); + } + apply_stored_response_headers(&mut headers, &meta.internal_metadata); + if start == 0 && end + 1 == plaintext_size { + apply_stored_checksum_headers(&mut headers, &meta.internal_metadata); + } + if let Some(ref requested_version) = query.version_id { + if let Ok(value) = requested_version.parse() { + headers.insert("x-amz-version-id", value); + } + } else if let Some(ref vid) = meta.version_id { + if let Ok(value) = vid.parse() { + headers.insert("x-amz-version-id", value); + } + } + + apply_response_overrides(&mut headers, query); + + (StatusCode::PARTIAL_CONTENT, headers, body).into_response() +} + +fn evaluate_get_preconditions( + headers: &HeaderMap, + meta: &myfsio_common::types::ObjectMeta, +) -> Option { + if let Some(value) = headers.get("if-match").and_then(|v| v.to_str().ok()) { + if !etag_condition_matches(value, meta.etag.as_deref()) { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + + if let Some(value) = headers + .get("if-unmodified-since") + .and_then(|v| v.to_str().ok()) + { + if let Some(t) = parse_http_date(value) { + if meta.last_modified > t { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + } + + if let Some(value) = headers.get("if-none-match").and_then(|v| v.to_str().ok()) { + if etag_condition_matches(value, meta.etag.as_deref()) { + return Some(StatusCode::NOT_MODIFIED.into_response()); + } + } + + if let Some(value) = headers + .get("if-modified-since") + .and_then(|v| v.to_str().ok()) + { + if let Some(t) = parse_http_date(value) { + if meta.last_modified <= t { + return Some(StatusCode::NOT_MODIFIED.into_response()); + } + } + } + + None +} + +async fn evaluate_put_preconditions( + state: &AppState, + bucket: &str, + key: &str, + headers: &HeaderMap, +) -> Option { + let has_if_match = headers.contains_key("if-match"); + let has_if_none_match = headers.contains_key("if-none-match"); + if !has_if_match && !has_if_none_match { + return None; + } + + match state.storage.head_object(bucket, key).await { + Ok(meta) => { + if let Some(value) = headers.get("if-match").and_then(|v| v.to_str().ok()) { + if !etag_condition_matches(value, meta.etag.as_deref()) { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + if let Some(value) = headers.get("if-none-match").and_then(|v| v.to_str().ok()) { + if etag_condition_matches(value, meta.etag.as_deref()) { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + None + } + Err(myfsio_storage::error::StorageError::ObjectNotFound { .. }) + | Err(myfsio_storage::error::StorageError::DeleteMarker { .. }) => { + if has_if_match { + Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))) + } else { + None + } + } + Err(err) => Some(storage_err_response(err)), + } +} + +fn evaluate_copy_preconditions( + headers: &HeaderMap, + source_meta: &myfsio_common::types::ObjectMeta, +) -> Option { + if let Some(value) = headers + .get("x-amz-copy-source-if-match") + .and_then(|v| v.to_str().ok()) + { + if !etag_condition_matches(value, source_meta.etag.as_deref()) { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + + if let Some(value) = headers + .get("x-amz-copy-source-if-none-match") + .and_then(|v| v.to_str().ok()) + { + if etag_condition_matches(value, source_meta.etag.as_deref()) { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + + if let Some(value) = headers + .get("x-amz-copy-source-if-modified-since") + .and_then(|v| v.to_str().ok()) + { + if let Some(t) = parse_http_date(value) { + if source_meta.last_modified <= t { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + } + + if let Some(value) = headers + .get("x-amz-copy-source-if-unmodified-since") + .and_then(|v| v.to_str().ok()) + { + if let Some(t) = parse_http_date(value) { + if source_meta.last_modified > t { + return Some(s3_error_response(S3Error::from_code( + S3ErrorCode::PreconditionFailed, + ))); + } + } + } + + None +} + +fn parse_http_date(value: &str) -> Option> { + let trimmed = value.trim(); + if let Ok(dt) = DateTime::parse_from_rfc2822(trimmed) { + return Some(dt.with_timezone(&Utc)); + } + if let Ok(dt) = DateTime::parse_from_rfc3339(trimmed) { + return Some(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(trimmed, "%A, %d-%b-%y %H:%M:%S GMT") { + return Some(naive.and_utc()); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(trimmed, "%a %b %e %H:%M:%S %Y") { + return Some(naive.and_utc()); + } + None +} + +fn etag_condition_matches(condition: &str, etag: Option<&str>) -> bool { + let trimmed = condition.trim(); + if trimmed == "*" { + return true; + } + + let current = match etag { + Some(e) => e.trim_matches('"'), + None => return false, + }; + + trimmed + .split(',') + .map(|v| v.trim().trim_matches('"')) + .any(|candidate| candidate == current || candidate == "*") +} + +fn xml_escape(value: &str) -> String { + value + .replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +fn parse_range(range_str: &str, total_size: u64) -> Option<(u64, u64)> { + let range_spec = range_str.strip_prefix("bytes=")?; + + if let Some(suffix) = range_spec.strip_prefix('-') { + let suffix_len: u64 = suffix.parse().ok()?; + if suffix_len == 0 || suffix_len > total_size { + return None; + } + return Some((total_size - suffix_len, total_size - 1)); + } + + let (start_str, end_str) = range_spec.split_once('-')?; + let start: u64 = start_str.parse().ok()?; + + let end = if end_str.is_empty() { + total_size - 1 + } else { + let e: u64 = end_str.parse().ok()?; + e.min(total_size - 1) + }; + + if start > end || start >= total_size { + return None; + } + + Some((start, end)) +} + +use futures::TryStreamExt; +use http_body_util; +use tokio::io::AsyncReadExt; + +async fn resolve_encryption_context( + state: &AppState, + bucket: &str, + headers: &HeaderMap, +) -> Option { + if let Some(alg) = headers + .get("x-amz-server-side-encryption") + .and_then(|v| v.to_str().ok()) + { + let algorithm = match alg { + "AES256" => myfsio_crypto::encryption::SseAlgorithm::Aes256, + "aws:kms" => myfsio_crypto::encryption::SseAlgorithm::AwsKms, + _ => return None, + }; + let kms_key_id = headers + .get("x-amz-server-side-encryption-aws-kms-key-id") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + return Some(myfsio_crypto::encryption::EncryptionContext { + algorithm, + kms_key_id, + customer_key: None, + }); + } + + if let Some(sse_c_alg) = headers + .get("x-amz-server-side-encryption-customer-algorithm") + .and_then(|v| v.to_str().ok()) + { + if sse_c_alg == "AES256" { + let customer_key = extract_sse_c_key(headers); + if let Some(ck) = customer_key { + return Some(myfsio_crypto::encryption::EncryptionContext { + algorithm: myfsio_crypto::encryption::SseAlgorithm::CustomerProvided, + kms_key_id: None, + customer_key: Some(ck), + }); + } + } + return None; + } + + if state.encryption.is_some() { + if let Ok(config) = state.storage.get_bucket_config(bucket).await { + if let Some(enc_val) = &config.encryption { + let enc_str = enc_val.to_string(); + if enc_str.contains("AES256") { + return Some(myfsio_crypto::encryption::EncryptionContext { + algorithm: myfsio_crypto::encryption::SseAlgorithm::Aes256, + kms_key_id: None, + customer_key: None, + }); + } + if enc_str.contains("aws:kms") { + return Some(myfsio_crypto::encryption::EncryptionContext { + algorithm: myfsio_crypto::encryption::SseAlgorithm::AwsKms, + kms_key_id: None, + customer_key: None, + }); + } + } + } + } + + None +} + +fn extract_sse_c_key(headers: &HeaderMap) -> Option> { + use base64::engine::general_purpose::STANDARD as B64; + use base64::Engine; + + let key_b64 = headers + .get("x-amz-server-side-encryption-customer-key") + .and_then(|v| v.to_str().ok())?; + B64.decode(key_b64).ok() +} + +async fn post_object_form_handler( + state: &AppState, + bucket: &str, + content_type: &str, + body: Body, +) -> Response { + use base64::engine::general_purpose::STANDARD as B64; + use base64::Engine; + use futures::TryStreamExt; + + if !state.storage.bucket_exists(bucket).await.unwrap_or(false) { + return s3_error_response(S3Error::from_code(S3ErrorCode::NoSuchBucket)); + } + + let boundary = match multer::parse_boundary(content_type) { + Ok(b) => b, + Err(_) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing multipart boundary", + )); + } + }; + + let stream = http_body_util::BodyStream::new(body) + .map_ok(|frame| frame.into_data().unwrap_or_default()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)); + let mut multipart = multer::Multipart::new(stream, boundary); + + let mut fields: HashMap = HashMap::new(); + let mut file_bytes: Option = None; + let mut file_name: Option = None; + + while let Some(field) = match multipart.next_field().await { + Ok(f) => f, + Err(e) => { + return s3_error_response(S3Error::new( + S3ErrorCode::MalformedXML, + format!("Malformed multipart: {}", e), + )); + } + } { + let name = field.name().map(|s| s.to_string()).unwrap_or_default(); + if name.eq_ignore_ascii_case("file") { + file_name = field.file_name().map(|s| s.to_string()); + match field.bytes().await { + Ok(b) => file_bytes = Some(b), + Err(e) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InternalError, + format!("Failed to read file: {}", e), + )); + } + } + } else if !name.is_empty() { + match field.text().await { + Ok(t) => { + fields.insert(name, t); + } + Err(_) => {} + } + } + } + + let key_template = match fields.get("key").cloned() { + Some(k) => k, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing key field", + )) + } + }; + let policy_b64 = match fields.get("policy").cloned() { + Some(v) => v, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing policy field", + )) + } + }; + let signature = match fields + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("x-amz-signature")) + .map(|(_, v)| v.clone()) + { + Some(v) => v, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing signature", + )) + } + }; + let credential = match fields + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("x-amz-credential")) + .map(|(_, v)| v.clone()) + { + Some(v) => v, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing credential", + )) + } + }; + let algorithm = match fields + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("x-amz-algorithm")) + .map(|(_, v)| v.clone()) + { + Some(v) => v, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing algorithm", + )) + } + }; + if algorithm != "AWS4-HMAC-SHA256" { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Unsupported signing algorithm", + )); + } + + let policy_bytes = match B64.decode(policy_b64.as_bytes()) { + Ok(b) => b, + Err(e) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPolicyDocument, + format!("Invalid policy base64: {}", e), + )); + } + }; + let policy_value: serde_json::Value = match serde_json::from_slice(&policy_bytes) { + Ok(v) => v, + Err(e) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPolicyDocument, + format!("Invalid policy JSON: {}", e), + )); + } + }; + + if let Some(exp) = policy_value.get("expiration").and_then(|v| v.as_str()) { + let normalized = exp.replace('Z', "+00:00"); + match chrono::DateTime::parse_from_rfc3339(&normalized) { + Ok(exp_time) => { + if Utc::now() > exp_time.with_timezone(&Utc) { + return s3_error_response(S3Error::new( + S3ErrorCode::AccessDenied, + "Policy expired", + )); + } + } + Err(_) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidPolicyDocument, + "Invalid expiration format", + )); + } + } + } + + let content_length = file_bytes.as_ref().map(|b| b.len() as u64).unwrap_or(0); + let object_key = if key_template.contains("${filename}") { + let fname = file_name.clone().unwrap_or_else(|| "upload".to_string()); + key_template.replace("${filename}", &fname) + } else { + key_template.clone() + }; + + if let Some(conditions) = policy_value.get("conditions").and_then(|v| v.as_array()) { + if let Err(msg) = validate_post_policy_conditions( + bucket, + &object_key, + conditions, + &fields, + content_length, + ) { + return s3_error_response(S3Error::new(S3ErrorCode::AccessDenied, msg)); + } + } + + let credential_parts: Vec<&str> = credential.split('/').collect(); + if credential_parts.len() != 5 { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid credential format", + )); + } + let access_key = credential_parts[0]; + let date_stamp = credential_parts[1]; + let region = credential_parts[2]; + let service = credential_parts[3]; + + let secret_key = match state.iam.get_secret_key(access_key) { + Some(s) => s, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::AccessDenied, + "Invalid access key", + )) + } + }; + let signing_key = + myfsio_auth::sigv4::derive_signing_key(&secret_key, date_stamp, region, service); + let expected = myfsio_auth::sigv4::compute_post_policy_signature(&signing_key, &policy_b64); + if !myfsio_auth::sigv4::constant_time_compare(&expected, &signature) { + return s3_error_response(S3Error::new( + S3ErrorCode::SignatureDoesNotMatch, + "Signature verification failed", + )); + } + + let file_data = match file_bytes { + Some(b) => b, + None => { + return s3_error_response(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing file field", + )) + } + }; + + let mut metadata = HashMap::new(); + for (k, v) in &fields { + let lower = k.to_ascii_lowercase(); + if let Some(meta_key) = lower.strip_prefix("x-amz-meta-") { + if !meta_key.is_empty() && !(meta_key.starts_with("__") && meta_key.ends_with("__")) { + metadata.insert(meta_key.to_string(), v.clone()); + } + } + } + let content_type_value = fields + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("content-type")) + .map(|(_, v)| v.clone()); + metadata.insert( + "__content_type__".to_string(), + guessed_content_type(&object_key, content_type_value.as_deref()), + ); + + let cursor = std::io::Cursor::new(file_data.to_vec()); + let boxed: myfsio_storage::traits::AsyncReadStream = Box::pin(cursor); + + let meta = match state + .storage + .put_object(bucket, &object_key, boxed, Some(metadata)) + .await + { + Ok(m) => m, + Err(e) => return storage_err_response(e), + }; + + let etag = meta.etag.as_deref().unwrap_or(""); + let success_status = fields + .get("success_action_status") + .cloned() + .unwrap_or_else(|| "204".to_string()); + let location = format!("/{}/{}", bucket, object_key); + let xml = myfsio_xml::response::post_object_result_xml(&location, bucket, &object_key, etag); + + let status = match success_status.as_str() { + "200" => StatusCode::OK, + "201" => StatusCode::CREATED, + _ => { + let mut hdrs = HeaderMap::new(); + hdrs.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + return (StatusCode::NO_CONTENT, hdrs).into_response(); + } + }; + + let mut hdrs = HeaderMap::new(); + hdrs.insert("content-type", "application/xml".parse().unwrap()); + hdrs.insert("etag", format!("\"{}\"", etag).parse().unwrap()); + (status, hdrs, xml).into_response() +} + +fn validate_post_policy_conditions( + bucket: &str, + object_key: &str, + conditions: &[serde_json::Value], + form: &HashMap, + content_length: u64, +) -> Result<(), String> { + for cond in conditions { + if let Some(obj) = cond.as_object() { + for (k, v) in obj { + let expected = v.as_str().unwrap_or(""); + match k.as_str() { + "bucket" => { + if bucket != expected { + return Err(format!("Bucket must be {}", expected)); + } + } + "key" => { + if object_key != expected { + return Err(format!("Key must be {}", expected)); + } + } + other => { + let actual = form + .iter() + .find(|(fk, _)| fk.eq_ignore_ascii_case(other)) + .map(|(_, fv)| fv.as_str()) + .unwrap_or(""); + if actual != expected { + return Err(format!("Field {} must be {}", other, expected)); + } + } + } + } + } else if let Some(arr) = cond.as_array() { + if arr.len() < 2 { + continue; + } + let op = arr[0].as_str().unwrap_or("").to_ascii_lowercase(); + if op == "starts-with" && arr.len() == 3 { + let field = arr[1].as_str().unwrap_or("").trim_start_matches('$'); + let prefix = arr[2].as_str().unwrap_or(""); + if field == "key" { + if !object_key.starts_with(prefix) { + return Err(format!("Key must start with {}", prefix)); + } + } else { + let actual = form + .iter() + .find(|(fk, _)| fk.eq_ignore_ascii_case(field)) + .map(|(_, fv)| fv.as_str()) + .unwrap_or(""); + if !actual.starts_with(prefix) { + return Err(format!("Field {} must start with {}", field, prefix)); + } + } + } else if op == "eq" && arr.len() == 3 { + let field = arr[1].as_str().unwrap_or("").trim_start_matches('$'); + let expected = arr[2].as_str().unwrap_or(""); + if field == "key" { + if object_key != expected { + return Err(format!("Key must equal {}", expected)); + } + } else { + let actual = form + .iter() + .find(|(fk, _)| fk.eq_ignore_ascii_case(field)) + .map(|(_, fv)| fv.as_str()) + .unwrap_or(""); + if actual != expected { + return Err(format!("Field {} must equal {}", field, expected)); + } + } + } else if op == "content-length-range" && arr.len() == 3 { + let min = arr[1].as_i64().unwrap_or(0) as u64; + let max = arr[2].as_i64().unwrap_or(0) as u64; + if content_length < min || content_length > max { + return Err(format!( + "Content length must be between {} and {}", + min, max + )); + } + } + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::ServerConfig; + use crate::services::acl::{acl_to_xml, create_canned_acl}; + use http_body_util::BodyExt; + use serde_json::Value; + use tower::ServiceExt; + + const TEST_ACCESS_KEY: &str = "AKIAIOSFODNN7EXAMPLE"; + const TEST_SECRET_KEY: &str = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"; + + fn test_state() -> (AppState, tempfile::TempDir) { + let tmp = tempfile::tempdir().unwrap(); + let config_dir = tmp.path().join(".myfsio.sys").join("config"); + std::fs::create_dir_all(&config_dir).unwrap(); + std::fs::write( + config_dir.join("iam.json"), + serde_json::json!({ + "version": 2, + "users": [{ + "user_id": "u-test1234", + "display_name": "admin", + "enabled": true, + "access_keys": [{ + "access_key": TEST_ACCESS_KEY, + "secret_key": TEST_SECRET_KEY, + "status": "active" + }], + "policies": [{ + "bucket": "*", + "actions": ["*"], + "prefix": "*" + }] + }] + }) + .to_string(), + ) + .unwrap(); + + let manifest_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let config = ServerConfig { + bind_addr: "127.0.0.1:0".parse().unwrap(), + ui_bind_addr: "127.0.0.1:0".parse().unwrap(), + storage_root: tmp.path().to_path_buf(), + region: "us-east-1".to_string(), + iam_config_path: config_dir.join("iam.json"), + sigv4_timestamp_tolerance_secs: 900, + presigned_url_min_expiry: 1, + presigned_url_max_expiry: 604800, + secret_key: None, + encryption_enabled: false, + kms_enabled: false, + gc_enabled: false, + integrity_enabled: false, + metrics_enabled: false, + metrics_history_enabled: false, + metrics_interval_minutes: 5, + metrics_retention_hours: 24, + metrics_history_interval_minutes: 5, + metrics_history_retention_hours: 24, + lifecycle_enabled: false, + website_hosting_enabled: false, + replication_connect_timeout_secs: 1, + replication_read_timeout_secs: 1, + replication_max_retries: 1, + replication_streaming_threshold_bytes: 10_485_760, + replication_max_failures_per_bucket: 50, + site_sync_enabled: false, + site_sync_interval_secs: 60, + site_sync_batch_size: 100, + site_sync_connect_timeout_secs: 10, + site_sync_read_timeout_secs: 120, + site_sync_max_retries: 2, + site_sync_clock_skew_tolerance: 1.0, + ui_enabled: false, + templates_dir: manifest_dir.join("templates"), + static_dir: manifest_dir.join("static"), + ..ServerConfig::default() + }; + (AppState::new(config), tmp) + } + + fn auth_request( + method: axum::http::Method, + uri: &str, + body: Body, + ) -> axum::http::Request { + axum::http::Request::builder() + .method(method) + .uri(uri) + .header("x-access-key", TEST_ACCESS_KEY) + .header("x-secret-key", TEST_SECRET_KEY) + .body(body) + .unwrap() + } + + #[test] + fn aws_chunked_wire_encoding_is_not_persisted_as_object_encoding() { + let mut headers = HeaderMap::new(); + headers.insert("content-encoding", "aws-chunked".parse().unwrap()); + let mut metadata = HashMap::new(); + insert_standard_object_metadata(&headers, &mut metadata).unwrap(); + assert!(!metadata.contains_key("__content_encoding__")); + + headers.insert("content-encoding", "aws-chunked, gzip".parse().unwrap()); + let mut metadata = HashMap::new(); + insert_standard_object_metadata(&headers, &mut metadata).unwrap(); + assert_eq!(metadata.get("__content_encoding__").unwrap(), "gzip"); + } + + #[tokio::test] + async fn public_bucket_acl_allows_anonymous_reads() { + let (state, _tmp) = test_state(); + state.storage.create_bucket("public").await.unwrap(); + state + .storage + .put_object( + "public", + "hello.txt", + Box::pin(std::io::Cursor::new(b"hello".to_vec())), + None, + ) + .await + .unwrap(); + + let mut config = state.storage.get_bucket_config("public").await.unwrap(); + config.acl = Some(Value::String(acl_to_xml(&create_canned_acl( + "public-read", + "myfsio", + )))); + state + .storage + .set_bucket_config("public", &config) + .await + .unwrap(); + + let app = crate::create_router(state); + let response = app + .oneshot( + axum::http::Request::builder() + .method(axum::http::Method::GET) + .uri("/public/hello.txt") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + } + + #[tokio::test] + async fn object_retention_blocks_delete_without_bypass() { + let (state, _tmp) = test_state(); + state.storage.create_bucket("locked").await.unwrap(); + state + .storage + .put_object( + "locked", + "obj.txt", + Box::pin(std::io::Cursor::new(b"data".to_vec())), + None, + ) + .await + .unwrap(); + let app = crate::create_router(state); + + let retention_xml = r#" + + GOVERNANCE + 2099-01-01T00:00:00Z + "#; + let response = app + .clone() + .oneshot(auth_request( + axum::http::Method::PUT, + "/locked/obj.txt?retention", + Body::from(retention_xml), + )) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + + let response = app + .clone() + .oneshot(auth_request( + axum::http::Method::DELETE, + "/locked/obj.txt", + Body::empty(), + )) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::FORBIDDEN); + + let response = app + .oneshot( + axum::http::Request::builder() + .method(axum::http::Method::DELETE) + .uri("/locked/obj.txt") + .header("x-access-key", TEST_ACCESS_KEY) + .header("x-secret-key", TEST_SECRET_KEY) + .header("x-amz-bypass-governance-retention", "true") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::NO_CONTENT); + } + + #[tokio::test] + async fn object_acl_round_trip_uses_metadata() { + let (state, _tmp) = test_state(); + state.storage.create_bucket("acl").await.unwrap(); + state + .storage + .put_object( + "acl", + "photo.jpg", + Box::pin(std::io::Cursor::new(b"image".to_vec())), + None, + ) + .await + .unwrap(); + let app = crate::create_router(state); + + let response = app + .clone() + .oneshot( + axum::http::Request::builder() + .method(axum::http::Method::PUT) + .uri("/acl/photo.jpg?acl") + .header("x-access-key", TEST_ACCESS_KEY) + .header("x-secret-key", TEST_SECRET_KEY) + .header("x-amz-acl", "public-read") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + + let response = app + .oneshot(auth_request( + axum::http::Method::GET, + "/acl/photo.jpg?acl", + Body::empty(), + )) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + let body = String::from_utf8( + response + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .to_vec(), + ) + .unwrap(); + assert!(body.contains("AllUsers")); + assert!(body.contains("READ")); + } +} diff --git a/crates/myfsio-server/src/handlers/select.rs b/crates/myfsio-server/src/handlers/select.rs new file mode 100644 index 0000000..cf77f80 --- /dev/null +++ b/crates/myfsio-server/src/handlers/select.rs @@ -0,0 +1,578 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use axum::body::Body; +use axum::http::{HeaderMap, HeaderName, StatusCode}; +use axum::response::{IntoResponse, Response}; +use base64::Engine; +use bytes::Bytes; +use crc32fast::Hasher; +use duckdb::types::ValueRef; +use duckdb::Connection; +use futures::stream; +use http_body_util::BodyExt; +use myfsio_common::error::{S3Error, S3ErrorCode}; +use myfsio_storage::traits::StorageEngine; + +use crate::state::AppState; + +#[cfg(target_os = "windows")] +#[link(name = "Rstrtmgr")] +extern "system" {} + +const CHUNK_SIZE: usize = 65_536; + +pub async fn post_select_object_content( + state: &AppState, + bucket: &str, + key: &str, + headers: &HeaderMap, + body: Body, +) -> Response { + if let Some(resp) = require_xml_content_type(headers) { + return resp; + } + + let body_bytes = match body.collect().await { + Ok(collected) => collected.to_bytes(), + Err(_) => { + return s3_error_response(S3Error::new( + S3ErrorCode::MalformedXML, + "Unable to parse XML document", + )); + } + }; + + let request = match parse_select_request(&body_bytes) { + Ok(r) => r, + Err(err) => return s3_error_response(err), + }; + + let object_path = match state.storage.get_object_path(bucket, key).await { + Ok(path) => path, + Err(_) => { + return s3_error_response(S3Error::new(S3ErrorCode::NoSuchKey, "Object not found")); + } + }; + + let join_res = + tokio::task::spawn_blocking(move || execute_select_query(object_path, request)).await; + let chunks = match join_res { + Ok(Ok(chunks)) => chunks, + Ok(Err(message)) => { + return s3_error_response(S3Error::new(S3ErrorCode::InvalidRequest, message)); + } + Err(_) => { + return s3_error_response(S3Error::new( + S3ErrorCode::InternalError, + "SelectObjectContent execution failed", + )); + } + }; + + let bytes_returned: usize = chunks.iter().map(|c| c.len()).sum(); + let mut events: Vec = Vec::with_capacity(chunks.len() + 2); + for chunk in chunks { + events.push(Bytes::from(encode_select_event("Records", &chunk))); + } + + let stats_payload = build_stats_xml(0, bytes_returned); + events.push(Bytes::from(encode_select_event( + "Stats", + stats_payload.as_bytes(), + ))); + events.push(Bytes::from(encode_select_event("End", b""))); + + let stream = stream::iter(events.into_iter().map(Ok::)); + let body = Body::from_stream(stream); + + let mut response = (StatusCode::OK, body).into_response(); + response.headers_mut().insert( + HeaderName::from_static("content-type"), + "application/octet-stream".parse().unwrap(), + ); + response.headers_mut().insert( + HeaderName::from_static("x-amz-request-charged"), + "requester".parse().unwrap(), + ); + response +} + +#[derive(Clone)] +struct SelectRequest { + expression: String, + input_format: InputFormat, + output_format: OutputFormat, +} + +#[derive(Clone)] +enum InputFormat { + Csv(CsvInputConfig), + Json(JsonInputConfig), + Parquet, +} + +#[derive(Clone)] +struct CsvInputConfig { + file_header_info: String, + field_delimiter: String, + quote_character: String, +} + +#[derive(Clone)] +struct JsonInputConfig { + json_type: String, +} + +#[derive(Clone)] +enum OutputFormat { + Csv(CsvOutputConfig), + Json(JsonOutputConfig), +} + +#[derive(Clone)] +struct CsvOutputConfig { + field_delimiter: String, + record_delimiter: String, + quote_character: String, +} + +#[derive(Clone)] +struct JsonOutputConfig { + record_delimiter: String, +} + +fn parse_select_request(payload: &[u8]) -> Result { + let xml = String::from_utf8_lossy(payload); + let doc = roxmltree::Document::parse(&xml) + .map_err(|_| S3Error::new(S3ErrorCode::MalformedXML, "Unable to parse XML document"))?; + + let root = doc.root_element(); + if root.tag_name().name() != "SelectObjectContentRequest" { + return Err(S3Error::new( + S3ErrorCode::MalformedXML, + "Root element must be SelectObjectContentRequest", + )); + } + + let expression = child_text(&root, "Expression") + .filter(|v| !v.is_empty()) + .ok_or_else(|| S3Error::new(S3ErrorCode::InvalidRequest, "Expression is required"))?; + + let expression_type = child_text(&root, "ExpressionType").unwrap_or_else(|| "SQL".to_string()); + if !expression_type.eq_ignore_ascii_case("SQL") { + return Err(S3Error::new( + S3ErrorCode::InvalidRequest, + "Only SQL expression type is supported", + )); + } + + let input_node = child(&root, "InputSerialization").ok_or_else(|| { + S3Error::new( + S3ErrorCode::InvalidRequest, + "InputSerialization is required", + ) + })?; + let output_node = child(&root, "OutputSerialization").ok_or_else(|| { + S3Error::new( + S3ErrorCode::InvalidRequest, + "OutputSerialization is required", + ) + })?; + + let input_format = parse_input_format(&input_node)?; + let output_format = parse_output_format(&output_node)?; + + Ok(SelectRequest { + expression, + input_format, + output_format, + }) +} + +fn parse_input_format(node: &roxmltree::Node<'_, '_>) -> Result { + if let Some(csv_node) = child(node, "CSV") { + return Ok(InputFormat::Csv(CsvInputConfig { + file_header_info: child_text(&csv_node, "FileHeaderInfo") + .unwrap_or_else(|| "NONE".to_string()) + .to_ascii_uppercase(), + field_delimiter: child_text(&csv_node, "FieldDelimiter") + .unwrap_or_else(|| ",".to_string()), + quote_character: child_text(&csv_node, "QuoteCharacter") + .unwrap_or_else(|| "\"".to_string()), + })); + } + + if let Some(json_node) = child(node, "JSON") { + return Ok(InputFormat::Json(JsonInputConfig { + json_type: child_text(&json_node, "Type") + .unwrap_or_else(|| "DOCUMENT".to_string()) + .to_ascii_uppercase(), + })); + } + + if child(node, "Parquet").is_some() { + return Ok(InputFormat::Parquet); + } + + Err(S3Error::new( + S3ErrorCode::InvalidRequest, + "InputSerialization must specify CSV, JSON, or Parquet", + )) +} + +fn parse_output_format(node: &roxmltree::Node<'_, '_>) -> Result { + if let Some(csv_node) = child(node, "CSV") { + return Ok(OutputFormat::Csv(CsvOutputConfig { + field_delimiter: child_text(&csv_node, "FieldDelimiter") + .unwrap_or_else(|| ",".to_string()), + record_delimiter: child_text(&csv_node, "RecordDelimiter") + .unwrap_or_else(|| "\n".to_string()), + quote_character: child_text(&csv_node, "QuoteCharacter") + .unwrap_or_else(|| "\"".to_string()), + })); + } + + if let Some(json_node) = child(node, "JSON") { + return Ok(OutputFormat::Json(JsonOutputConfig { + record_delimiter: child_text(&json_node, "RecordDelimiter") + .unwrap_or_else(|| "\n".to_string()), + })); + } + + Err(S3Error::new( + S3ErrorCode::InvalidRequest, + "OutputSerialization must specify CSV or JSON", + )) +} + +fn child<'a, 'input>( + node: &'a roxmltree::Node<'a, 'input>, + name: &str, +) -> Option> { + node.children() + .find(|n| n.is_element() && n.tag_name().name() == name) +} + +fn child_text(node: &roxmltree::Node<'_, '_>, name: &str) -> Option { + child(node, name) + .and_then(|n| n.text()) + .map(|s| s.to_string()) +} + +fn execute_select_query(path: PathBuf, request: SelectRequest) -> Result>, String> { + let conn = + Connection::open_in_memory().map_err(|e| format!("DuckDB connection error: {}", e))?; + + load_input_table(&conn, &path, &request.input_format)?; + + let expression = request + .expression + .replace("s3object", "data") + .replace("S3Object", "data"); + + let mut stmt = conn + .prepare(&expression) + .map_err(|e| format!("SQL execution error: {}", e))?; + let mut rows = stmt + .query([]) + .map_err(|e| format!("SQL execution error: {}", e))?; + let stmt_ref = rows + .as_ref() + .ok_or_else(|| "SQL execution error: statement metadata unavailable".to_string())?; + let col_count = stmt_ref.column_count(); + let mut columns: Vec = Vec::with_capacity(col_count); + for i in 0..col_count { + let name = stmt_ref + .column_name(i) + .map(|s| s.to_string()) + .unwrap_or_else(|_| format!("_{}", i)); + columns.push(name); + } + + match request.output_format { + OutputFormat::Csv(cfg) => collect_csv_chunks(&mut rows, col_count, cfg), + OutputFormat::Json(cfg) => collect_json_chunks(&mut rows, col_count, &columns, cfg), + } +} + +fn load_input_table(conn: &Connection, path: &Path, input: &InputFormat) -> Result<(), String> { + let path_str = path.to_string_lossy().replace('\\', "/"); + match input { + InputFormat::Csv(cfg) => { + let header = cfg.file_header_info == "USE" || cfg.file_header_info == "IGNORE"; + let delimiter = normalize_single_char(&cfg.field_delimiter, ','); + let quote = normalize_single_char(&cfg.quote_character, '"'); + + let sql = format!( + "CREATE TABLE data AS SELECT * FROM read_csv('{}', header={}, delim='{}', quote='{}')", + sql_escape(&path_str), + if header { "true" } else { "false" }, + sql_escape(&delimiter), + sql_escape("e) + ); + conn.execute_batch(&sql) + .map_err(|e| format!("Failed loading CSV data: {}", e))?; + } + InputFormat::Json(cfg) => { + let format = if cfg.json_type == "LINES" { + "newline_delimited" + } else { + "array" + }; + let sql = format!( + "CREATE TABLE data AS SELECT * FROM read_json_auto('{}', format='{}')", + sql_escape(&path_str), + format + ); + conn.execute_batch(&sql) + .map_err(|e| format!("Failed loading JSON data: {}", e))?; + } + InputFormat::Parquet => { + let sql = format!( + "CREATE TABLE data AS SELECT * FROM read_parquet('{}')", + sql_escape(&path_str) + ); + conn.execute_batch(&sql) + .map_err(|e| format!("Failed loading Parquet data: {}", e))?; + } + } + Ok(()) +} + +fn sql_escape(value: &str) -> String { + value.replace('\'', "''") +} + +fn normalize_single_char(value: &str, default_char: char) -> String { + value.chars().next().unwrap_or(default_char).to_string() +} + +fn collect_csv_chunks( + rows: &mut duckdb::Rows<'_>, + col_count: usize, + cfg: CsvOutputConfig, +) -> Result>, String> { + let delimiter = cfg.field_delimiter; + let record_delimiter = cfg.record_delimiter; + let quote = cfg.quote_character; + + let mut chunks: Vec> = Vec::new(); + let mut buffer = String::new(); + + while let Some(row) = rows + .next() + .map_err(|e| format!("SQL execution error: {}", e))? + { + let mut fields: Vec = Vec::with_capacity(col_count); + for i in 0..col_count { + let value = row + .get_ref(i) + .map_err(|e| format!("SQL execution error: {}", e))?; + if matches!(value, ValueRef::Null) { + fields.push(String::new()); + continue; + } + + let mut text = value_ref_to_string(value); + if text.contains(&delimiter) + || text.contains("e) + || text.contains(&record_delimiter) + { + text = text.replace("e, &(quote.clone() + "e)); + text = format!("{}{}{}", quote, text, quote); + } + fields.push(text); + } + buffer.push_str(&fields.join(&delimiter)); + buffer.push_str(&record_delimiter); + + while buffer.len() >= CHUNK_SIZE { + let rest = buffer.split_off(CHUNK_SIZE); + chunks.push(buffer.into_bytes()); + buffer = rest; + } + } + + if !buffer.is_empty() { + chunks.push(buffer.into_bytes()); + } + Ok(chunks) +} + +fn collect_json_chunks( + rows: &mut duckdb::Rows<'_>, + col_count: usize, + columns: &[String], + cfg: JsonOutputConfig, +) -> Result>, String> { + let record_delimiter = cfg.record_delimiter; + let mut chunks: Vec> = Vec::new(); + let mut buffer = String::new(); + + while let Some(row) = rows + .next() + .map_err(|e| format!("SQL execution error: {}", e))? + { + let mut record: HashMap = HashMap::with_capacity(col_count); + for i in 0..col_count { + let value = row + .get_ref(i) + .map_err(|e| format!("SQL execution error: {}", e))?; + let key = columns.get(i).cloned().unwrap_or_else(|| format!("_{}", i)); + record.insert(key, value_ref_to_json(value)); + } + let line = serde_json::to_string(&record) + .map_err(|e| format!("JSON output encoding failed: {}", e))?; + buffer.push_str(&line); + buffer.push_str(&record_delimiter); + + while buffer.len() >= CHUNK_SIZE { + let rest = buffer.split_off(CHUNK_SIZE); + chunks.push(buffer.into_bytes()); + buffer = rest; + } + } + + if !buffer.is_empty() { + chunks.push(buffer.into_bytes()); + } + Ok(chunks) +} + +fn value_ref_to_string(value: ValueRef<'_>) -> String { + match value { + ValueRef::Null => String::new(), + ValueRef::Boolean(v) => v.to_string(), + ValueRef::TinyInt(v) => v.to_string(), + ValueRef::SmallInt(v) => v.to_string(), + ValueRef::Int(v) => v.to_string(), + ValueRef::BigInt(v) => v.to_string(), + ValueRef::UTinyInt(v) => v.to_string(), + ValueRef::USmallInt(v) => v.to_string(), + ValueRef::UInt(v) => v.to_string(), + ValueRef::UBigInt(v) => v.to_string(), + ValueRef::Float(v) => v.to_string(), + ValueRef::Double(v) => v.to_string(), + ValueRef::Decimal(v) => v.to_string(), + ValueRef::Text(v) => String::from_utf8_lossy(v).into_owned(), + ValueRef::Blob(v) => base64::engine::general_purpose::STANDARD.encode(v), + _ => format!("{:?}", value), + } +} + +fn value_ref_to_json(value: ValueRef<'_>) -> serde_json::Value { + match value { + ValueRef::Null => serde_json::Value::Null, + ValueRef::Boolean(v) => serde_json::Value::Bool(v), + ValueRef::TinyInt(v) => serde_json::json!(v), + ValueRef::SmallInt(v) => serde_json::json!(v), + ValueRef::Int(v) => serde_json::json!(v), + ValueRef::BigInt(v) => serde_json::json!(v), + ValueRef::UTinyInt(v) => serde_json::json!(v), + ValueRef::USmallInt(v) => serde_json::json!(v), + ValueRef::UInt(v) => serde_json::json!(v), + ValueRef::UBigInt(v) => serde_json::json!(v), + ValueRef::Float(v) => serde_json::json!(v), + ValueRef::Double(v) => serde_json::json!(v), + ValueRef::Decimal(v) => serde_json::Value::String(v.to_string()), + ValueRef::Text(v) => serde_json::Value::String(String::from_utf8_lossy(v).into_owned()), + ValueRef::Blob(v) => { + serde_json::Value::String(base64::engine::general_purpose::STANDARD.encode(v)) + } + _ => serde_json::Value::String(format!("{:?}", value)), + } +} + +fn require_xml_content_type(headers: &HeaderMap) -> Option { + let value = headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .trim(); + if value.is_empty() { + return None; + } + let lowered = value.to_ascii_lowercase(); + if lowered.starts_with("application/xml") || lowered.starts_with("text/xml") { + return None; + } + Some(s3_error_response(S3Error::new( + S3ErrorCode::InvalidRequest, + "Content-Type must be application/xml or text/xml", + ))) +} + +fn s3_error_response(err: S3Error) -> Response { + let status = + StatusCode::from_u16(err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR); + let resource = if err.resource.is_empty() { + "/".to_string() + } else { + err.resource.clone() + }; + let body = err + .with_resource(resource) + .with_request_id(uuid::Uuid::new_v4().simple().to_string()) + .to_xml(); + (status, [("content-type", "application/xml")], body).into_response() +} + +fn build_stats_xml(bytes_scanned: usize, bytes_returned: usize) -> String { + format!( + "{}{}{}", + bytes_scanned, + bytes_scanned, + bytes_returned + ) +} + +fn encode_select_event(event_type: &str, payload: &[u8]) -> Vec { + let mut headers = Vec::new(); + headers.extend(encode_select_header(":event-type", event_type)); + if event_type == "Records" { + headers.extend(encode_select_header( + ":content-type", + "application/octet-stream", + )); + } else if event_type == "Stats" { + headers.extend(encode_select_header(":content-type", "text/xml")); + } + headers.extend(encode_select_header(":message-type", "event")); + + let headers_len = headers.len() as u32; + let total_len = 4 + 4 + 4 + headers.len() + payload.len() + 4; + + let mut message = Vec::with_capacity(total_len); + let mut prelude = Vec::with_capacity(8); + prelude.extend((total_len as u32).to_be_bytes()); + prelude.extend(headers_len.to_be_bytes()); + + let prelude_crc = crc32(&prelude); + message.extend(prelude); + message.extend(prelude_crc.to_be_bytes()); + message.extend(headers); + message.extend(payload); + + let msg_crc = crc32(&message); + message.extend(msg_crc.to_be_bytes()); + message +} + +fn encode_select_header(name: &str, value: &str) -> Vec { + let name_bytes = name.as_bytes(); + let value_bytes = value.as_bytes(); + let mut header = Vec::with_capacity(1 + name_bytes.len() + 1 + 2 + value_bytes.len()); + header.push(name_bytes.len() as u8); + header.extend(name_bytes); + header.push(7); + header.extend((value_bytes.len() as u16).to_be_bytes()); + header.extend(value_bytes); + header +} + +fn crc32(data: &[u8]) -> u32 { + let mut hasher = Hasher::new(); + hasher.update(data); + hasher.finalize() +} diff --git a/crates/myfsio-server/src/handlers/ui.rs b/crates/myfsio-server/src/handlers/ui.rs new file mode 100644 index 0000000..e08473d --- /dev/null +++ b/crates/myfsio-server/src/handlers/ui.rs @@ -0,0 +1,226 @@ +use std::collections::HashMap; +use std::error::Error as StdError; + +use axum::extract::{Extension, Form, State}; +use axum::http::{header, HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Redirect, Response}; +use tera::Context; + +use crate::middleware::session::SessionHandle; +use crate::session::FlashMessage; +use crate::state::AppState; + +pub async fn login_page( + State(state): State, + Extension(session): Extension, +) -> Response { + if session.read(|s| s.is_authenticated()) { + return Redirect::to("/ui/buckets").into_response(); + } + + let mut ctx = base_context(&session, None); + let flashed = session.write(|s| s.take_flash()); + inject_flash(&mut ctx, flashed); + + render(&state, "login.html", &ctx) +} + +#[derive(serde::Deserialize)] +pub struct LoginForm { + pub access_key: String, + pub secret_key: String, + #[serde(default)] + pub csrf_token: String, + #[serde(default)] + pub next: Option, +} + +pub async fn login_submit( + State(state): State, + Extension(session): Extension, + Form(form): Form, +) -> Response { + let access_key = form.access_key.trim(); + let secret_key = form.secret_key.trim(); + + match state.iam.get_secret_key(access_key) { + Some(expected) if constant_time_eq_str(&expected, secret_key) => { + let display = state + .iam + .get_user(access_key) + .await + .and_then(|v| { + v.get("display_name") + .and_then(|d| d.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_else(|| access_key.to_string()); + + session.write(|s| { + s.user_id = Some(access_key.to_string()); + s.display_name = Some(display); + s.rotate_csrf(); + s.push_flash("success", "Signed in successfully."); + }); + + let next = form + .next + .as_deref() + .filter(|n| is_allowed_redirect(n, &state.config.allowed_redirect_hosts)) + .unwrap_or("/ui/buckets") + .to_string(); + Redirect::to(&next).into_response() + } + _ => { + session.write(|s| { + s.push_flash("danger", "Invalid access key or secret key."); + }); + Redirect::to("/login").into_response() + } + } +} + +fn is_allowed_redirect(target: &str, allowed_hosts: &[String]) -> bool { + if target == "/ui" || target.starts_with("/ui/") { + return true; + } + let Some(rest) = target + .strip_prefix("https://") + .or_else(|| target.strip_prefix("http://")) + else { + return false; + }; + let host = rest + .split('/') + .next() + .unwrap_or_default() + .split('@') + .last() + .unwrap_or_default() + .split(':') + .next() + .unwrap_or_default() + .to_ascii_lowercase(); + allowed_hosts + .iter() + .any(|allowed| allowed.eq_ignore_ascii_case(&host)) +} + +pub async fn logout(Extension(session): Extension) -> Response { + session.write(|s| { + s.user_id = None; + s.display_name = None; + s.flash.clear(); + s.rotate_csrf(); + s.push_flash("info", "Signed out."); + }); + Redirect::to("/login").into_response() +} + +pub async fn root_redirect() -> Response { + Redirect::to("/ui/buckets").into_response() +} + +pub async fn not_found_page( + State(state): State, + Extension(session): Extension, +) -> Response { + let ctx = base_context(&session, None); + let mut resp = render(&state, "404.html", &ctx); + *resp.status_mut() = StatusCode::NOT_FOUND; + resp +} + +pub async fn require_login( + Extension(session): Extension, + req: axum::extract::Request, + next: axum::middleware::Next, +) -> Response { + if session.read(|s| s.is_authenticated()) { + return next.run(req).await; + } + let path = req.uri().path().to_string(); + let query = req + .uri() + .query() + .map(|q| format!("?{}", q)) + .unwrap_or_default(); + let next_url = format!("{}{}", path, query); + let encoded = + percent_encoding::utf8_percent_encode(&next_url, percent_encoding::NON_ALPHANUMERIC) + .to_string(); + let target = format!("/login?next={}", encoded); + Redirect::to(&target).into_response() +} + +pub fn render(state: &AppState, template: &str, ctx: &Context) -> Response { + let engine = match &state.templates { + Some(e) => e, + None => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + "Templates not configured", + ) + .into_response(); + } + }; + match engine.render(template, ctx) { + Ok(html) => { + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + "text/html; charset=utf-8".parse().unwrap(), + ); + (StatusCode::OK, headers, html).into_response() + } + Err(e) => { + let mut detail = format!("{}", e); + let mut src = StdError::source(&e); + while let Some(s) = src { + detail.push_str(" | "); + detail.push_str(&s.to_string()); + src = s.source(); + } + tracing::error!("Template render failed ({}): {}", template, detail); + let fallback_ctx = Context::new(); + let body = if template != "500.html" { + engine + .render("500.html", &fallback_ctx) + .unwrap_or_else(|_| "Internal Server Error".to_string()) + } else { + "Internal Server Error".to_string() + }; + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + "text/html; charset=utf-8".parse().unwrap(), + ); + (StatusCode::INTERNAL_SERVER_ERROR, headers, body).into_response() + } + } +} + +pub fn base_context(session: &SessionHandle, endpoint: Option<&str>) -> Context { + let mut ctx = Context::new(); + let snapshot = session.snapshot(); + ctx.insert("csrf_token_value", &snapshot.csrf_token); + ctx.insert("is_authenticated", &snapshot.user_id.is_some()); + ctx.insert("current_user", &snapshot.user_id); + ctx.insert("current_user_display_name", &snapshot.display_name); + ctx.insert("current_endpoint", &endpoint.unwrap_or("")); + ctx.insert("request_args", &HashMap::::new()); + ctx.insert("null", &serde_json::Value::Null); + ctx.insert("none", &serde_json::Value::Null); + ctx +} + +pub fn inject_flash(ctx: &mut Context, flashed: Vec) { + ctx.insert("flashed_messages", &flashed); +} + +fn constant_time_eq_str(a: &str, b: &str) -> bool { + if a.len() != b.len() { + return false; + } + subtle::ConstantTimeEq::ct_eq(a.as_bytes(), b.as_bytes()).into() +} diff --git a/crates/myfsio-server/src/handlers/ui_api.rs b/crates/myfsio-server/src/handlers/ui_api.rs new file mode 100644 index 0000000..3e3b494 --- /dev/null +++ b/crates/myfsio-server/src/handlers/ui_api.rs @@ -0,0 +1,3790 @@ +use std::collections::{BTreeMap, HashMap}; +use std::io::Cursor; +use std::path::{Component, Path as FsPath, PathBuf}; +use std::sync::{Mutex, OnceLock}; + +use axum::body::{to_bytes, Body}; +use axum::extract::{Extension, Path, Query, State}; +use axum::http::{header, HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::Json; +use chrono::{DateTime, Datelike, Timelike, Utc}; +use futures::TryStreamExt; +use http_body_util::BodyStream; +use myfsio_auth::sigv4; +use myfsio_common::constants::{BUCKET_VERSIONS_DIR, SYSTEM_BUCKETS_DIR, SYSTEM_ROOT}; +use myfsio_common::types::{ListParams, PartInfo, Tag}; +use myfsio_crypto::encryption::EncryptionMetadata; +use myfsio_storage::error::StorageError; +use myfsio_storage::traits::StorageEngine; +use percent_encoding::{utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC}; +use roxmltree::Document; +use serde::de::DeserializeOwned; +use serde::Deserialize; +use serde_json::{json, Value}; +use sysinfo::{Disks, System}; +use tokio::io::AsyncReadExt; + +use crate::handlers::{self, ObjectQuery}; +use crate::middleware::session::SessionHandle; +use crate::state::AppState; +use crate::stores::connections::RemoteConnection; + +const UI_KEY_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC + .remove(b'-') + .remove(b'_') + .remove(b'.') + .remove(b'~') + .remove(b'/'); + +const PATH_SEGMENT_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC + .remove(b'-') + .remove(b'_') + .remove(b'.') + .remove(b'~'); + +const AWS_QUERY_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC + .remove(b'-') + .remove(b'_') + .remove(b'.') + .remove(b'~'); + +const UI_OBJECT_BROWSER_MAX_KEYS: usize = 5000; + +fn url_templates_for(bucket: &str) -> Value { + json!({ + "download": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/download", bucket), + "preview": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/preview", bucket), + "delete": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/delete", bucket), + "presign": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/presign", bucket), + "metadata": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/metadata", bucket), + "versions": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/versions", bucket), + "restore": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/restore/VERSION_ID_PLACEHOLDER", bucket), + "tags": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/tags", bucket), + "copy": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/copy", bucket), + "move": format!("/ui/buckets/{}/objects/KEY_PLACEHOLDER/move", bucket), + }) +} + +fn encode_object_key(key: &str) -> String { + utf8_percent_encode(key, UI_KEY_ENCODE_SET).to_string() +} + +fn encode_path_segment(value: &str) -> String { + utf8_percent_encode(value, PATH_SEGMENT_ENCODE_SET).to_string() +} + +fn build_ui_object_url(bucket: &str, key: &str, action: &str) -> String { + format!( + "/ui/buckets/{}/objects/{}/{}", + bucket, + encode_object_key(key), + action + ) +} + +fn human_size(bytes: u64) -> String { + const UNITS: [&str; 6] = ["B", "KB", "MB", "GB", "TB", "PB"]; + let mut size = bytes as f64; + let mut idx = 0; + while size >= 1024.0 && idx < UNITS.len() - 1 { + size /= 1024.0; + idx += 1; + } + if idx == 0 { + format!("{} {}", bytes, UNITS[idx]) + } else { + format!("{:.1} {}", size, UNITS[idx]) + } +} + +fn json_error(status: StatusCode, message: impl Into) -> Response { + (status, Json(json!({ "error": message.into() }))).into_response() +} + +fn json_ok(value: Value) -> Response { + Json(value).into_response() +} + +fn push_issue(result: &mut Value, issue: Value) { + if let Some(items) = result + .get_mut("issues") + .and_then(|value| value.as_array_mut()) + { + items.push(issue); + } +} + +fn storage_status(err: &StorageError) -> StatusCode { + match err { + StorageError::BucketNotFound(_) + | StorageError::ObjectNotFound { .. } + | StorageError::VersionNotFound { .. } + | StorageError::UploadNotFound(_) => StatusCode::NOT_FOUND, + StorageError::DeleteMarker { .. } => StatusCode::NOT_FOUND, + StorageError::MethodNotAllowed(_) => StatusCode::METHOD_NOT_ALLOWED, + StorageError::InvalidBucketName(_) + | StorageError::InvalidObjectKey(_) + | StorageError::InvalidRange + | StorageError::QuotaExceeded(_) => StatusCode::BAD_REQUEST, + StorageError::BucketAlreadyExists(_) => StatusCode::CONFLICT, + StorageError::BucketNotEmpty(_) => StatusCode::CONFLICT, + StorageError::Io(_) + | StorageError::Json(_) + | StorageError::Internal(_) + | StorageError::ObjectCorrupted { .. } => StatusCode::INTERNAL_SERVER_ERROR, + } +} + +fn storage_json_error(err: StorageError) -> Response { + json_error(storage_status(&err), err.to_string()) +} + +fn parse_bool_flag(value: Option<&str>) -> bool { + matches!( + value.map(|v| v.trim().to_ascii_lowercase()), + Some(v) if v == "1" || v == "true" || v == "on" || v == "yes" + ) +} + +fn parse_form_body(bytes: &[u8]) -> HashMap { + String::from_utf8_lossy(bytes) + .split('&') + .filter(|pair| !pair.is_empty()) + .map(|pair| { + let mut parts = pair.splitn(2, '='); + let key = parts.next().unwrap_or_default(); + let value = parts.next().unwrap_or_default(); + (decode_form_value(key), decode_form_value(value)) + }) + .collect() +} + +fn decode_form_value(value: &str) -> String { + percent_encoding::percent_decode_str(&value.replace('+', " ")) + .decode_utf8_lossy() + .into_owned() +} + +fn current_access_key(session: &SessionHandle) -> Option { + session.read(|s| s.user_id.clone()) +} + +fn owner_id_or_default(session: &SessionHandle) -> String { + current_access_key(session).unwrap_or_else(|| "myfsio".to_string()) +} + +fn safe_attachment_filename(key: &str) -> String { + let raw = key.rsplit('/').next().unwrap_or(key); + let sanitized = raw + .replace('"', "'") + .replace('\\', "_") + .chars() + .filter(|c| c.is_ascii_graphic() || *c == ' ') + .collect::(); + if sanitized.trim().is_empty() { + "download".to_string() + } else { + sanitized + } +} + +fn parse_api_base(state: &AppState) -> String { + state.config.api_base_url.trim_end_matches('/').to_string() +} + +fn aws_query_encode(value: &str) -> String { + utf8_percent_encode(value, AWS_QUERY_ENCODE_SET).to_string() +} + +fn xml_escape(value: &str) -> String { + value + .replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +fn key_relative_path(key: &str) -> Result { + let mut out = PathBuf::new(); + for component in FsPath::new(key).components() { + match component { + Component::Normal(part) => out.push(part), + _ => return Err("Invalid object key".to_string()), + } + } + if out.as_os_str().is_empty() { + return Err("Invalid object key".to_string()); + } + Ok(out) +} + +fn object_live_path(state: &AppState, bucket: &str, key: &str) -> Result { + let rel = key_relative_path(key)?; + Ok(state.config.storage_root.join(bucket).join(rel)) +} + +fn version_root_for_bucket(state: &AppState, bucket: &str) -> PathBuf { + state + .config + .storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_VERSIONS_DIR) +} + +fn version_dir_for_object(state: &AppState, bucket: &str, key: &str) -> Result { + let rel = key_relative_path(key)?; + Ok(version_root_for_bucket(state, bucket).join(rel)) +} + +#[derive(Debug, Clone, Default, Deserialize)] +struct VersionManifest { + #[serde(default)] + version_id: String, + #[serde(default)] + key: String, + #[serde(default)] + size: u64, + #[serde(default)] + archived_at: Option, + #[serde(default)] + etag: Option, + #[serde(default)] + metadata: HashMap, + #[serde(default)] + reason: Option, +} + +fn manifest_timestamp(value: &VersionManifest) -> DateTime { + value + .archived_at + .as_deref() + .and_then(|s| DateTime::parse_from_rfc3339(s).ok()) + .map(|dt| dt.with_timezone(&Utc)) + .unwrap_or_else(Utc::now) +} + +fn manifest_to_json(record: &VersionManifest) -> Value { + let ts = manifest_timestamp(record); + json!({ + "version_id": record.version_id, + "key": record.key, + "size": record.size, + "etag": record.etag, + "archived_at": ts.to_rfc3339(), + "last_modified": ts.to_rfc3339(), + "metadata": record.metadata, + "reason": record.reason.clone().unwrap_or_else(|| "update".to_string()), + "is_latest": false, + }) +} + +fn read_version_manifests_for_object( + state: &AppState, + bucket: &str, + key: &str, +) -> Result, String> { + let version_dir = version_dir_for_object(state, bucket, key)?; + if !version_dir.exists() { + return Ok(Vec::new()); + } + + let mut entries = Vec::new(); + for entry in std::fs::read_dir(&version_dir).map_err(|e| e.to_string())? { + let entry = entry.map_err(|e| e.to_string())?; + if !entry.file_type().map_err(|e| e.to_string())?.is_file() { + continue; + } + if entry.path().extension().and_then(|ext| ext.to_str()) != Some("json") { + continue; + } + let text = std::fs::read_to_string(entry.path()).map_err(|e| e.to_string())?; + let mut manifest: VersionManifest = + serde_json::from_str(&text).map_err(|e| e.to_string())?; + if manifest.version_id.is_empty() { + manifest.version_id = entry + .path() + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or_default() + .to_string(); + } + if manifest.key.is_empty() { + manifest.key = key.to_string(); + } + entries.push(manifest); + } + + entries.sort_by(|a, b| manifest_timestamp(b).cmp(&manifest_timestamp(a))); + Ok(entries) +} + +async fn read_object_bytes_for_zip( + state: &AppState, + bucket: &str, + key: &str, +) -> Result, String> { + let all_meta = state + .storage + .get_object_metadata(bucket, key) + .await + .map_err(|e| e.to_string())?; + + if let Some(enc_meta) = EncryptionMetadata::from_metadata(&all_meta) { + let enc_svc = state + .encryption + .as_ref() + .ok_or_else(|| "Encryption service is not available".to_string())?; + let obj_path = state + .storage + .get_object_path(bucket, key) + .await + .map_err(|e| e.to_string())?; + let tmp_dir = state.config.storage_root.join(SYSTEM_ROOT).join("tmp"); + let _ = tokio::fs::create_dir_all(&tmp_dir).await; + let dec_tmp = tmp_dir.join(format!("zip-dec-{}", uuid::Uuid::new_v4())); + enc_svc + .decrypt_object(&obj_path, &dec_tmp, &enc_meta, None) + .await + .map_err(|e| e.to_string())?; + let bytes = tokio::fs::read(&dec_tmp).await.map_err(|e| e.to_string())?; + let _ = tokio::fs::remove_file(&dec_tmp).await; + return Ok(bytes); + } + + let (_meta, mut reader) = state + .storage + .get_object(bucket, key) + .await + .map_err(|e| e.to_string())?; + let mut bytes = Vec::new(); + reader + .read_to_end(&mut bytes) + .await + .map_err(|e| e.to_string())?; + Ok(bytes) +} + +fn value_to_string_vec(value: Option<&Value>, field_name: &str) -> Vec { + match value { + Some(Value::Array(items)) => items + .iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect(), + Some(Value::String(s)) if !s.trim().is_empty() => vec![s.to_string()], + Some(Value::Null) | None => Vec::new(), + Some(_) => vec![field_name.to_string()], + } +} + +fn xml_child<'a>(node: roxmltree::Node<'a, 'a>, name: &str) -> Option> { + node.children() + .find(|child| child.is_element() && child.tag_name().name() == name) +} + +fn xml_child_text(node: roxmltree::Node<'_, '_>, name: &str) -> Option { + xml_child(node, name) + .and_then(|child| child.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()) +} + +fn xml_children_texts(node: roxmltree::Node<'_, '_>, name: &str) -> Vec { + node.children() + .filter(|child| child.is_element() && child.tag_name().name() == name) + .filter_map(|child| child.text().map(|text| text.trim().to_string())) + .filter(|text| !text.is_empty()) + .collect() +} + +fn parse_acl_value(value: Option<&Value>, owner: &str) -> Value { + let default_grant = json!({ + "grantee": owner, + "permission": "FULL_CONTROL", + "grantee_type": "CanonicalUser", + "display_name": owner, + "grantee_id": owner, + "grantee_uri": Value::Null, + }); + + let Some(value) = value else { + return json!({ + "owner": owner, + "grants": [default_grant], + "canned_acls": ["private", "public-read", "public-read-write", "authenticated-read"], + }); + }; + + match value { + Value::String(xml) => { + let doc = match Document::parse(xml) { + Ok(doc) => doc, + Err(_) => { + return json!({ + "owner": owner, + "grants": [default_grant], + "canned_acls": ["private", "public-read", "public-read-write", "authenticated-read"], + }); + } + }; + let owner_node = doc + .descendants() + .find(|node| node.is_element() && node.tag_name().name() == "Owner"); + let owner_id = owner_node + .and_then(|node| xml_child_text(node, "ID")) + .unwrap_or_else(|| owner.to_string()); + + let grants = doc + .descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "Grant") + .map(|grant| { + let grantee = xml_child(grant, "Grantee"); + let permission = xml_child_text(grant, "Permission").unwrap_or_default(); + let grantee_id = grantee.and_then(|node| xml_child_text(node, "ID")); + let display_name = grantee.and_then(|node| xml_child_text(node, "DisplayName")); + let grantee_uri = grantee.and_then(|node| xml_child_text(node, "URI")); + let grantee_type = grantee + .and_then(|node| { + node.attributes() + .find(|attr| { + attr.name() == "type" || attr.name().ends_with(":type") + }) + .map(|attr| attr.value().to_string()) + }) + .or_else(|| { + if grantee_uri.is_some() { + Some("Group".to_string()) + } else { + Some("CanonicalUser".to_string()) + } + }) + .unwrap_or_else(|| "CanonicalUser".to_string()); + let grantee_label = display_name + .clone() + .or_else(|| grantee_id.clone()) + .or_else(|| grantee_uri.clone()) + .unwrap_or_else(|| "unknown".to_string()); + + json!({ + "grantee": grantee_label, + "permission": permission, + "grantee_type": grantee_type, + "display_name": display_name, + "grantee_id": grantee_id, + "grantee_uri": grantee_uri, + }) + }) + .collect::>(); + + json!({ + "owner": owner_id, + "grants": if grants.is_empty() { vec![default_grant] } else { grants }, + "canned_acls": ["private", "public-read", "public-read-write", "authenticated-read"], + }) + } + Value::Object(map) => { + let grants = map + .get("grants") + .and_then(|value| value.as_array()) + .cloned() + .unwrap_or_else(|| vec![default_grant]); + json!({ + "owner": map.get("owner").and_then(|v| v.as_str()).unwrap_or(owner), + "grants": grants, + "canned_acls": ["private", "public-read", "public-read-write", "authenticated-read"], + }) + } + _ => json!({ + "owner": owner, + "grants": [default_grant], + "canned_acls": ["private", "public-read", "public-read-write", "authenticated-read"], + }), + } +} + +fn parse_cors_value(value: Option<&Value>) -> Value { + let Some(value) = value else { + return json!({ "rules": [] }); + }; + + match value { + Value::String(xml) => { + let doc = match Document::parse(xml) { + Ok(doc) => doc, + Err(_) => return json!({ "rules": [] }), + }; + let rules = doc + .descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "CORSRule") + .map(|rule| { + let allowed_origins = xml_children_texts(rule, "AllowedOrigin"); + let allowed_methods = xml_children_texts(rule, "AllowedMethod"); + let allowed_headers = xml_children_texts(rule, "AllowedHeader"); + let expose_headers = xml_children_texts(rule, "ExposeHeader"); + let max_age_seconds = + xml_child_text(rule, "MaxAgeSeconds").and_then(|v| v.parse::().ok()); + json!({ + "AllowedOrigins": allowed_origins, + "AllowedMethods": allowed_methods, + "AllowedHeaders": allowed_headers, + "ExposeHeaders": expose_headers, + "MaxAgeSeconds": max_age_seconds, + "allowed_origins": allowed_origins, + "allowed_methods": allowed_methods, + "allowed_headers": allowed_headers, + "expose_headers": expose_headers, + "max_age_seconds": max_age_seconds, + }) + }) + .collect::>(); + json!({ "rules": rules }) + } + Value::Array(rules) => json!({ "rules": rules }), + Value::Object(map) => { + if let Some(rules) = map.get("rules").and_then(|value| value.as_array()) { + json!({ "rules": rules }) + } else { + json!({ "rules": [map] }) + } + } + _ => json!({ "rules": [] }), + } +} + +fn parse_lifecycle_value(value: Option<&Value>) -> Value { + let Some(value) = value else { + return json!({ "rules": [] }); + }; + + match value { + Value::String(xml) => { + let doc = match Document::parse(xml) { + Ok(doc) => doc, + Err(_) => return json!({ "rules": [] }), + }; + let rules = doc + .descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "Rule") + .map(|rule| { + let rule_id = xml_child_text(rule, "ID").unwrap_or_default(); + let status = xml_child_text(rule, "Status").unwrap_or_else(|| "Enabled".to_string()); + let prefix = xml_child(rule, "Filter") + .and_then(|filter| xml_child_text(filter, "Prefix")) + .or_else(|| xml_child_text(rule, "Prefix")) + .unwrap_or_default(); + let expiration_days = xml_child(rule, "Expiration") + .and_then(|node| xml_child_text(node, "Days")) + .and_then(|v| v.parse::().ok()); + let noncurrent_days = xml_child(rule, "NoncurrentVersionExpiration") + .and_then(|node| xml_child_text(node, "NoncurrentDays")) + .and_then(|v| v.parse::().ok()); + let abort_days = xml_child(rule, "AbortIncompleteMultipartUpload") + .and_then(|node| xml_child_text(node, "DaysAfterInitiation")) + .and_then(|v| v.parse::().ok()); + + json!({ + "ID": rule_id, + "Status": status, + "Filter": { "Prefix": prefix }, + "Expiration": expiration_days.map(|days| json!({ "Days": days })), + "NoncurrentVersionExpiration": noncurrent_days.map(|days| json!({ "NoncurrentDays": days })), + "AbortIncompleteMultipartUpload": abort_days.map(|days| json!({ "DaysAfterInitiation": days })), + "id": rule_id, + "status": status, + "prefix": prefix, + "expiration_days": expiration_days, + "noncurrent_days": noncurrent_days, + "abort_mpu_days": abort_days, + }) + }) + .collect::>(); + json!({ "rules": rules }) + } + Value::Array(rules) => json!({ "rules": rules }), + Value::Object(map) => { + if let Some(rules) = map.get("rules").and_then(|value| value.as_array()) { + json!({ "rules": rules }) + } else { + json!({ "rules": [map] }) + } + } + _ => json!({ "rules": [] }), + } +} + +fn bucket_acl_xml_for_canned(owner_id: &str, canned_acl: &str) -> Result { + let mut grants = vec![format!( + "{}{}FULL_CONTROL", + xml_escape(owner_id), + xml_escape(owner_id), + )]; + + match canned_acl { + "private" => {} + "public-read" => grants.push( + "http://acs.amazonaws.com/groups/global/AllUsersREAD".to_string() + ), + "public-read-write" => { + grants.push( + "http://acs.amazonaws.com/groups/global/AllUsersREAD".to_string() + ); + grants.push( + "http://acs.amazonaws.com/groups/global/AllUsersWRITE".to_string() + ); + } + "authenticated-read" => grants.push( + "http://acs.amazonaws.com/groups/global/AuthenticatedUsersREAD".to_string() + ), + _ => return Err(format!("Invalid canned ACL: {}", canned_acl)), + } + + Ok(format!( + "{}{}{}", + xml_escape(owner_id), + xml_escape(owner_id), + grants.join("") + )) +} + +fn cors_xml_from_rules(rules: &[Value]) -> String { + let mut xml = String::from( + "", + ); + for rule in rules { + xml.push_str(""); + for origin in value_to_string_vec(rule.get("AllowedOrigins"), "AllowedOrigin") { + xml.push_str(&format!( + "{}", + xml_escape(&origin) + )); + } + for method in value_to_string_vec(rule.get("AllowedMethods"), "AllowedMethod") { + xml.push_str(&format!( + "{}", + xml_escape(&method) + )); + } + for header in value_to_string_vec(rule.get("AllowedHeaders"), "AllowedHeader") { + xml.push_str(&format!( + "{}", + xml_escape(&header) + )); + } + for header in value_to_string_vec(rule.get("ExposeHeaders"), "ExposeHeader") { + xml.push_str(&format!( + "{}", + xml_escape(&header) + )); + } + if let Some(max_age) = rule.get("MaxAgeSeconds").and_then(|v| v.as_u64()) { + xml.push_str(&format!("{}", max_age)); + } + xml.push_str(""); + } + xml.push_str(""); + xml +} + +fn lifecycle_xml_from_rules(rules: &[Value]) -> String { + let mut xml = String::from( + "", + ); + for rule in rules { + xml.push_str(""); + + let id = rule.get("ID").and_then(|v| v.as_str()).unwrap_or_default(); + if !id.is_empty() { + xml.push_str(&format!("{}", xml_escape(id))); + } + + let status = rule + .get("Status") + .and_then(|v| v.as_str()) + .unwrap_or("Enabled"); + xml.push_str(&format!("{}", xml_escape(status))); + + let prefix = rule + .get("Filter") + .and_then(|v| v.get("Prefix")) + .and_then(|v| v.as_str()) + .or_else(|| rule.get("Prefix").and_then(|v| v.as_str())) + .unwrap_or_default(); + xml.push_str(""); + xml.push_str(&format!("{}", xml_escape(prefix))); + xml.push_str(""); + + if let Some(days) = rule + .get("Expiration") + .and_then(|v| v.get("Days")) + .and_then(|v| v.as_u64()) + { + xml.push_str(&format!("{}", days)); + } + + if let Some(days) = rule + .get("NoncurrentVersionExpiration") + .and_then(|v| v.get("NoncurrentDays")) + .and_then(|v| v.as_u64()) + { + xml.push_str(&format!( + "{}", + days + )); + } + + if let Some(days) = rule + .get("AbortIncompleteMultipartUpload") + .and_then(|v| v.get("DaysAfterInitiation")) + .and_then(|v| v.as_u64()) + { + xml.push_str(&format!( + "{}", + days + )); + } + + xml.push_str(""); + } + xml.push_str(""); + xml +} + +fn zip_dos_time(dt: DateTime) -> (u16, u16) { + let year = dt.year().clamp(1980, 2107) as u16; + let month = dt.month() as u16; + let day = dt.day() as u16; + let hour = dt.hour() as u16; + let minute = dt.minute() as u16; + let second = (dt.second() / 2) as u16; + let dos_time = (hour << 11) | (minute << 5) | second; + let dos_date = ((year - 1980) << 9) | (month << 5) | day; + (dos_time, dos_date) +} + +fn write_u16(buf: &mut Vec, value: u16) { + buf.extend_from_slice(&value.to_le_bytes()); +} + +fn write_u32(buf: &mut Vec, value: u32) { + buf.extend_from_slice(&value.to_le_bytes()); +} + +fn build_zip_archive(entries: Vec<(String, Vec, DateTime)>) -> Result, String> { + #[derive(Clone)] + struct CentralEntry { + name: Vec, + crc32: u32, + size: u32, + offset: u32, + mod_time: u16, + mod_date: u16, + } + + let mut output = Vec::new(); + let mut central_entries = Vec::new(); + + for (name, data, modified) in entries { + if data.len() > u32::MAX as usize { + return Err(format!("Object '{}' is too large for ZIP export", name)); + } + let offset = output.len(); + if offset > u32::MAX as usize { + return Err("ZIP archive is too large".to_string()); + } + + let name_bytes = name.into_bytes(); + let mut hasher = crc32fast::Hasher::new(); + hasher.update(&data); + let crc32 = hasher.finalize(); + let size = data.len() as u32; + let (mod_time, mod_date) = zip_dos_time(modified); + let flags = 0x0800u16; + + write_u32(&mut output, 0x04034b50); + write_u16(&mut output, 20); + write_u16(&mut output, flags); + write_u16(&mut output, 0); + write_u16(&mut output, mod_time); + write_u16(&mut output, mod_date); + write_u32(&mut output, crc32); + write_u32(&mut output, size); + write_u32(&mut output, size); + write_u16(&mut output, name_bytes.len() as u16); + write_u16(&mut output, 0); + output.extend_from_slice(&name_bytes); + output.extend_from_slice(&data); + + central_entries.push(CentralEntry { + name: name_bytes, + crc32, + size, + offset: offset as u32, + mod_time, + mod_date, + }); + } + + let central_start = output.len(); + for entry in ¢ral_entries { + write_u32(&mut output, 0x02014b50); + write_u16(&mut output, 20); + write_u16(&mut output, 20); + write_u16(&mut output, 0x0800); + write_u16(&mut output, 0); + write_u16(&mut output, entry.mod_time); + write_u16(&mut output, entry.mod_date); + write_u32(&mut output, entry.crc32); + write_u32(&mut output, entry.size); + write_u32(&mut output, entry.size); + write_u16(&mut output, entry.name.len() as u16); + write_u16(&mut output, 0); + write_u16(&mut output, 0); + write_u16(&mut output, 0); + write_u16(&mut output, 0); + write_u32(&mut output, 0); + write_u32(&mut output, entry.offset); + output.extend_from_slice(&entry.name); + } + + let central_size = output.len() - central_start; + if central_entries.len() > u16::MAX as usize + || central_start > u32::MAX as usize + || central_size > u32::MAX as usize + { + return Err("ZIP archive exceeds classic ZIP limits".to_string()); + } + + write_u32(&mut output, 0x06054b50); + write_u16(&mut output, 0); + write_u16(&mut output, 0); + write_u16(&mut output, central_entries.len() as u16); + write_u16(&mut output, central_entries.len() as u16); + write_u32(&mut output, central_size as u32); + write_u32(&mut output, central_start as u32); + write_u16(&mut output, 0); + + Ok(output) +} + +fn dangerous_preview_content_type(content_type: Option<&str>, key: &str) -> Option { + let guessed = content_type + .map(|value| value.to_ascii_lowercase()) + .unwrap_or_else(|| { + mime_guess::from_path(key) + .first_raw() + .unwrap_or("application/octet-stream") + .to_ascii_lowercase() + }); + + match guessed.split(';').next().unwrap_or_default().trim() { + "text/html" + | "text/xml" + | "application/xml" + | "application/xhtml+xml" + | "image/svg+xml" => Some("text/plain; charset=utf-8".to_string()), + _ => None, + } +} + +async fn parse_json_body(body: Body) -> Result { + let bytes = to_bytes(body, usize::MAX) + .await + .map_err(|_| json_error(StatusCode::BAD_REQUEST, "Failed to read request body"))?; + serde_json::from_slice::(&bytes) + .map_err(|e| json_error(StatusCode::BAD_REQUEST, format!("Invalid JSON body: {}", e))) +} + +#[derive(Deserialize, Default)] +pub struct ListObjectsQuery { + #[serde(default)] + pub max_keys: Option, + #[serde(default)] + pub continuation_token: Option, + #[serde(default)] + pub prefix: Option, + #[serde(default)] + pub start_after: Option, + #[serde(default)] + pub delimiter: Option, +} + +fn object_json(bucket_name: &str, o: &myfsio_common::types::ObjectMeta) -> Value { + json!({ + "key": o.key, + "size": o.size, + "last_modified": o.last_modified.to_rfc3339(), + "last_modified_iso": o.last_modified.to_rfc3339(), + "last_modified_display": o.last_modified.format("%Y-%m-%d %H:%M:%S").to_string(), + "etag": o.etag.clone().unwrap_or_default(), + "storage_class": o.storage_class.clone().unwrap_or_else(|| "STANDARD".to_string()), + "content_type": o.content_type.clone().unwrap_or_default(), + "download_url": build_ui_object_url(bucket_name, &o.key, "download"), + "preview_url": build_ui_object_url(bucket_name, &o.key, "preview"), + "delete_endpoint": build_ui_object_url(bucket_name, &o.key, "delete"), + "presign_endpoint": build_ui_object_url(bucket_name, &o.key, "presign"), + "metadata_url": build_ui_object_url(bucket_name, &o.key, "metadata"), + "versions_endpoint": build_ui_object_url(bucket_name, &o.key, "versions"), + "restore_template": format!( + "/ui/buckets/{}/objects/{}/restore/VERSION_ID_PLACEHOLDER", + bucket_name, + encode_object_key(&o.key) + ), + "tags_url": build_ui_object_url(bucket_name, &o.key, "tags"), + "copy_url": build_ui_object_url(bucket_name, &o.key, "copy"), + "move_url": build_ui_object_url(bucket_name, &o.key, "move"), + }) +} + +pub async fn list_bucket_objects( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + if !matches!(state.storage.bucket_exists(&bucket_name).await, Ok(true)) { + return json_error(StatusCode::NOT_FOUND, "Bucket not found"); + } + + let max_keys = q.max_keys.unwrap_or(1000).min(5000); + let versioning_enabled = state + .storage + .is_versioning_enabled(&bucket_name) + .await + .unwrap_or(false); + let stats = state.storage.bucket_stats(&bucket_name).await.ok(); + let total_count = stats.as_ref().map(|s| s.objects).unwrap_or(0); + + let use_shallow = q.delimiter.as_deref() == Some("/"); + + if use_shallow { + let params = myfsio_common::types::ShallowListParams { + prefix: q.prefix.clone().unwrap_or_default(), + delimiter: "/".to_string(), + max_keys, + continuation_token: q.continuation_token.clone(), + }; + return match state + .storage + .list_objects_shallow(&bucket_name, ¶ms) + .await + { + Ok(res) => { + let objects: Vec = res + .objects + .iter() + .map(|o| object_json(&bucket_name, o)) + .collect(); + Json(json!({ + "versioning_enabled": versioning_enabled, + "total_count": total_count, + "is_truncated": res.is_truncated, + "next_continuation_token": res.next_continuation_token, + "url_templates": url_templates_for(&bucket_name), + "objects": objects, + "common_prefixes": res.common_prefixes, + })) + .into_response() + } + Err(e) => storage_json_error(e), + }; + } + + let params = ListParams { + max_keys, + continuation_token: q.continuation_token.clone(), + prefix: q.prefix.clone(), + start_after: q.start_after.clone(), + }; + + match state.storage.list_objects(&bucket_name, ¶ms).await { + Ok(res) => { + let objects: Vec = res + .objects + .iter() + .map(|o| object_json(&bucket_name, o)) + .collect(); + + Json(json!({ + "versioning_enabled": versioning_enabled, + "total_count": total_count, + "is_truncated": res.is_truncated, + "next_continuation_token": res.next_continuation_token, + "url_templates": url_templates_for(&bucket_name), + "objects": objects, + })) + .into_response() + } + Err(e) => storage_json_error(e), + } +} + +#[derive(Deserialize, Default)] +pub struct StreamObjectsQuery { + #[serde(default)] + pub prefix: Option, + #[serde(default)] + pub delimiter: Option, +} + +pub async fn stream_bucket_objects( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + if !matches!(state.storage.bucket_exists(&bucket_name).await, Ok(true)) { + return (StatusCode::NOT_FOUND, "Bucket not found").into_response(); + } + + let versioning_enabled = state + .storage + .is_versioning_enabled(&bucket_name) + .await + .unwrap_or(false); + let stats = state.storage.bucket_stats(&bucket_name).await.ok(); + let total_count = stats.as_ref().map(|s| s.objects).unwrap_or(0); + + let use_delimiter = q.delimiter.as_deref() == Some("/"); + let prefix = q.prefix.clone().unwrap_or_default(); + + let (tx, rx) = tokio::sync::mpsc::channel::>(64); + + let meta_line = json!({ + "type": "meta", + "url_templates": url_templates_for(&bucket_name), + "versioning_enabled": versioning_enabled, + }) + .to_string() + + "\n"; + let count_line = json!({ "type": "count", "total_count": total_count }).to_string() + "\n"; + + let storage = state.storage.clone(); + let bucket = bucket_name.clone(); + + tokio::spawn(async move { + if tx + .send(Ok(bytes::Bytes::from(meta_line.into_bytes()))) + .await + .is_err() + { + return; + } + if tx + .send(Ok(bytes::Bytes::from(count_line.into_bytes()))) + .await + .is_err() + { + return; + } + + if use_delimiter { + let mut token: Option = None; + loop { + let params = myfsio_common::types::ShallowListParams { + prefix: prefix.clone(), + delimiter: "/".to_string(), + max_keys: UI_OBJECT_BROWSER_MAX_KEYS, + continuation_token: token.clone(), + }; + match storage.list_objects_shallow(&bucket, ¶ms).await { + Ok(res) => { + for p in &res.common_prefixes { + let line = json!({ "type": "folder", "prefix": p }).to_string() + "\n"; + if tx + .send(Ok(bytes::Bytes::from(line.into_bytes()))) + .await + .is_err() + { + return; + } + } + for o in &res.objects { + let line = json!({ + "type": "object", + "key": o.key, + "size": o.size, + "last_modified": o.last_modified.to_rfc3339(), + "last_modified_iso": o.last_modified.to_rfc3339(), + "last_modified_display": o.last_modified.format("%Y-%m-%d %H:%M:%S").to_string(), + "etag": o.etag.clone().unwrap_or_default(), + "storage_class": o.storage_class.clone().unwrap_or_else(|| "STANDARD".to_string()), + }) + .to_string() + + "\n"; + if tx + .send(Ok(bytes::Bytes::from(line.into_bytes()))) + .await + .is_err() + { + return; + } + } + if !res.is_truncated || res.next_continuation_token.is_none() { + break; + } + token = res.next_continuation_token; + } + Err(e) => { + let line = + json!({ "type": "error", "error": e.to_string() }).to_string() + "\n"; + let _ = tx.send(Ok(bytes::Bytes::from(line.into_bytes()))).await; + return; + } + } + } + } else { + let mut token: Option = None; + loop { + let params = ListParams { + max_keys: 1000, + continuation_token: token.clone(), + prefix: if prefix.is_empty() { + None + } else { + Some(prefix.clone()) + }, + start_after: None, + }; + match storage.list_objects(&bucket, ¶ms).await { + Ok(res) => { + for o in &res.objects { + let line = json!({ + "type": "object", + "key": o.key, + "size": o.size, + "last_modified": o.last_modified.to_rfc3339(), + "last_modified_iso": o.last_modified.to_rfc3339(), + "last_modified_display": o.last_modified.format("%Y-%m-%d %H:%M:%S").to_string(), + "etag": o.etag.clone().unwrap_or_default(), + "storage_class": o.storage_class.clone().unwrap_or_else(|| "STANDARD".to_string()), + }) + .to_string() + + "\n"; + if tx + .send(Ok(bytes::Bytes::from(line.into_bytes()))) + .await + .is_err() + { + return; + } + } + if !res.is_truncated || res.next_continuation_token.is_none() { + break; + } + token = res.next_continuation_token; + } + Err(e) => { + let line = + json!({ "type": "error", "error": e.to_string() }).to_string() + "\n"; + let _ = tx.send(Ok(bytes::Bytes::from(line.into_bytes()))).await; + return; + } + } + } + } + + let done_line = json!({ "type": "done" }).to_string() + "\n"; + let _ = tx + .send(Ok(bytes::Bytes::from(done_line.into_bytes()))) + .await; + }); + + let stream = tokio_stream::wrappers::ReceiverStream::new(rx); + let body = Body::from_stream(stream); + + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + "application/x-ndjson; charset=utf-8".parse().unwrap(), + ); + headers.insert(header::CACHE_CONTROL, "no-cache".parse().unwrap()); + headers.insert("x-accel-buffering", "no".parse().unwrap()); + + (StatusCode::OK, headers, body).into_response() +} + +#[derive(Deserialize, Default)] +pub struct SearchObjectsQuery { + #[serde(default)] + pub q: Option, + #[serde(default)] + pub prefix: Option, + #[serde(default)] + pub limit: Option, + #[serde(default)] + pub start_after: Option, +} + +pub async fn search_bucket_objects( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + if !matches!(state.storage.bucket_exists(&bucket_name).await, Ok(true)) { + return json_error(StatusCode::NOT_FOUND, "Bucket not found"); + } + + let term = q.q.unwrap_or_default().to_lowercase(); + let limit = q.limit.unwrap_or(500).clamp(1, 1000); + let prefix = q.prefix.clone().unwrap_or_default(); + let start_after = q.start_after.clone().filter(|s| !s.is_empty()); + + if term.is_empty() { + return Json(json!({ "results": [], "truncated": false, "next_token": Value::Null })) + .into_response(); + } + + let mut results: Vec = Vec::new(); + let mut truncated = false; + let mut last_match_key: Option = None; + let mut token: Option = None; + let mut start_after_arg = start_after; + loop { + let params = ListParams { + max_keys: 1000, + continuation_token: token.clone(), + prefix: if prefix.is_empty() { + None + } else { + Some(prefix.clone()) + }, + start_after: start_after_arg.take(), + }; + match state.storage.list_objects(&bucket_name, ¶ms).await { + Ok(res) => { + for o in &res.objects { + if o.key.to_lowercase().contains(&term) { + if results.len() >= limit { + truncated = true; + break; + } + last_match_key = Some(o.key.clone()); + results.push(object_json(&bucket_name, o)); + } + } + if truncated || !res.is_truncated || res.next_continuation_token.is_none() { + if res.is_truncated && results.len() >= limit { + truncated = true; + } + break; + } + token = res.next_continuation_token; + } + Err(e) => return storage_json_error(e), + } + } + + let next_token = if truncated { last_match_key } else { None }; + Json(json!({ + "results": results, + "truncated": truncated, + "next_token": next_token, + })) + .into_response() +} + +pub async fn bucket_stats_json( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + if !matches!(state.storage.bucket_exists(&bucket_name).await, Ok(true)) { + return json_error(StatusCode::NOT_FOUND, "Bucket not found"); + } + match state.storage.bucket_stats(&bucket_name).await { + Ok(stats) => Json(json!({ + "objects": stats.objects, + "bytes": stats.bytes, + "version_count": stats.version_count, + "version_bytes": stats.version_bytes, + "total_objects": stats.objects + stats.version_count, + "total_bytes": stats.bytes + stats.version_bytes, + })) + .into_response(), + Err(e) => storage_json_error(e), + } +} + +pub async fn list_bucket_folders( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + if !matches!(state.storage.bucket_exists(&bucket_name).await, Ok(true)) { + return json_error(StatusCode::NOT_FOUND, "Bucket not found"); + } + + let prefix = q.prefix.clone().unwrap_or_default(); + let params = myfsio_common::types::ShallowListParams { + prefix: prefix.clone(), + delimiter: "/".to_string(), + max_keys: UI_OBJECT_BROWSER_MAX_KEYS, + continuation_token: None, + }; + match state + .storage + .list_objects_shallow(&bucket_name, ¶ms) + .await + { + Ok(res) => Json(json!({ + "prefixes": res.common_prefixes, + "current_prefix": prefix, + })) + .into_response(), + Err(e) => storage_json_error(e), + } +} + +pub async fn list_copy_targets( + State(state): State, + Extension(_session): Extension, + Path(_bucket_name): Path, +) -> Response { + let buckets: Vec = state + .storage + .list_buckets() + .await + .map(|list| list.into_iter().map(|b| b.name).collect()) + .unwrap_or_default(); + Json(json!({ "buckets": buckets })).into_response() +} + +#[derive(Deserialize)] +pub struct ConnectionTestPayload { + pub endpoint_url: String, + pub access_key: String, + pub secret_key: String, + #[serde(default = "default_region")] + pub region: String, +} + +fn default_region() -> String { + "us-east-1".to_string() +} + +pub async fn test_connection( + State(state): State, + Extension(_session): Extension, + body: Body, +) -> Response { + let payload: ConnectionTestPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(_) => { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ + "status": "error", + "message": "Invalid JSON payload", + })), + ) + .into_response() + } + }; + + if payload.endpoint_url.trim().is_empty() + || payload.access_key.trim().is_empty() + || payload.secret_key.trim().is_empty() + { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ + "status": "error", + "message": "Missing credentials", + })), + ) + .into_response(); + } + + let connection = RemoteConnection { + id: "test".to_string(), + name: "Test".to_string(), + endpoint_url: payload.endpoint_url.trim().to_string(), + access_key: payload.access_key.trim().to_string(), + secret_key: payload.secret_key.trim().to_string(), + region: payload.region.trim().to_string(), + }; + + if state.replication.check_endpoint(&connection).await { + Json(json!({ + "status": "ok", + "message": "Connection successful", + })) + .into_response() + } else { + ( + StatusCode::BAD_REQUEST, + Json(json!({ + "status": "error", + "message": format!("Connection failed or endpoint is unreachable: {}", connection.endpoint_url), + })), + ) + .into_response() + } +} + +pub async fn connection_health( + State(state): State, + Extension(_session): Extension, + Path(connection_id): Path, +) -> Response { + let Some(connection) = state.connections.get(&connection_id) else { + return ( + StatusCode::NOT_FOUND, + Json(json!({ + "healthy": false, + "error": "Connection not found", + })), + ) + .into_response(); + }; + + let healthy = state.replication.check_endpoint(&connection).await; + Json(json!({ + "healthy": healthy, + "error": if healthy { + Value::Null + } else { + Value::String(format!("Cannot reach endpoint: {}", connection.endpoint_url)) + } + })) + .into_response() +} + +async fn peer_health_payload(state: &AppState, site_id: &str) -> Result { + let Some(registry) = &state.site_registry else { + return Err(json_error( + StatusCode::NOT_FOUND, + "Site registry not available", + )); + }; + let Some(peer) = registry.get_peer(site_id) else { + return Err(json_error(StatusCode::NOT_FOUND, "Peer not found")); + }; + + let checked_at = chrono::Utc::now().timestamp_millis() as f64 / 1000.0; + let mut healthy = false; + let mut error: Option = None; + + if let Some(connection_id) = peer.connection_id.as_deref() { + if let Some(connection) = state.connections.get(connection_id) { + healthy = state.replication.check_endpoint(&connection).await; + if !healthy { + error = Some(format!( + "Cannot reach endpoint: {}", + connection.endpoint_url + )); + } + } else { + error = Some(format!("Connection '{}' not found", connection_id)); + } + } else { + error = Some("No connection configured for this peer".to_string()); + } + + registry.update_health(site_id, healthy); + Ok(json!({ + "site_id": site_id, + "is_healthy": healthy, + "checked_at": checked_at, + "error": error, + })) +} + +pub async fn peer_health( + State(state): State, + Extension(_session): Extension, + Path(site_id): Path, +) -> Response { + match peer_health_payload(&state, &site_id).await { + Ok(payload) => Json(payload).into_response(), + Err(response) => response, + } +} + +pub async fn peer_sync_stats( + State(state): State, + Extension(_session): Extension, + Path(site_id): Path, +) -> Response { + let Some(registry) = &state.site_registry else { + return json_error(StatusCode::NOT_FOUND, "Site registry not available"); + }; + let Some(peer) = registry.get_peer(&site_id) else { + return json_error(StatusCode::NOT_FOUND, "Peer not found"); + }; + let Some(connection_id) = peer.connection_id.as_deref() else { + return json_error(StatusCode::BAD_REQUEST, "No connection configured"); + }; + + let rules = state.replication.list_rules(); + let mut buckets: Vec = Vec::new(); + let mut buckets_syncing = 0u64; + let mut objects_synced = 0u64; + let mut objects_pending = 0u64; + let mut objects_failed = 0u64; + let mut bytes_synced = 0u64; + let mut last_sync_at: Option = None; + + for rule in rules + .into_iter() + .filter(|rule| rule.target_connection_id == connection_id) + { + buckets_syncing += 1; + objects_synced += rule.stats.objects_synced; + objects_pending += rule.stats.objects_pending; + bytes_synced += rule.stats.bytes_synced; + if let Some(sync_at) = rule.stats.last_sync_at { + if last_sync_at + .map(|current| sync_at > current) + .unwrap_or(true) + { + last_sync_at = Some(sync_at); + } + } + + let failures = state.replication.get_failure_count(&rule.bucket_name) as u64; + objects_failed += failures; + buckets.push(json!({ + "bucket_name": rule.bucket_name, + "target_bucket": rule.target_bucket, + "mode": rule.mode, + "enabled": rule.enabled, + "last_sync_at": rule.stats.last_sync_at, + "objects_synced": rule.stats.objects_synced, + "objects_pending": rule.stats.objects_pending, + "failures": failures, + })); + } + + Json(json!({ + "buckets_syncing": buckets_syncing, + "objects_synced": objects_synced, + "objects_pending": objects_pending, + "objects_failed": objects_failed, + "bytes_synced": bytes_synced, + "last_sync_at": last_sync_at, + "buckets": buckets, + })) + .into_response() +} + +pub async fn peer_bidirectional_status( + State(state): State, + Extension(_session): Extension, + Path(site_id): Path, +) -> Response { + let Some(registry) = &state.site_registry else { + return json_error(StatusCode::NOT_FOUND, "Site registry not available"); + }; + let Some(peer) = registry.get_peer(&site_id) else { + return json_error(StatusCode::NOT_FOUND, "Peer not found"); + }; + + let local_site = registry.get_local_site(); + let local_bidirectional_rules: Vec = state + .replication + .list_rules() + .into_iter() + .filter(|rule| { + peer.connection_id + .as_deref() + .map(|connection_id| rule.target_connection_id == connection_id) + .unwrap_or(false) + && rule.mode == crate::services::replication::MODE_BIDIRECTIONAL + }) + .map(|rule| { + json!({ + "bucket_name": rule.bucket_name, + "target_bucket": rule.target_bucket, + "enabled": rule.enabled, + }) + }) + .collect(); + + let mut result = json!({ + "site_id": site_id, + "local_site_id": local_site.as_ref().map(|site| site.site_id.clone()), + "local_endpoint": local_site.as_ref().map(|site| site.endpoint.clone()), + "local_bidirectional_rules": local_bidirectional_rules, + "local_site_sync_enabled": state.config.site_sync_enabled, + "remote_status": Value::Null, + "issues": Vec::::new(), + "is_fully_configured": false, + }); + + if local_site + .as_ref() + .map(|site| site.site_id.trim().is_empty()) + .unwrap_or(true) + { + push_issue( + &mut result, + json!({ + "code": "NO_LOCAL_SITE_ID", + "message": "Local site identity not configured", + "severity": "error", + }), + ); + } + if local_site + .as_ref() + .map(|site| site.endpoint.trim().is_empty()) + .unwrap_or(true) + { + push_issue( + &mut result, + json!({ + "code": "NO_LOCAL_ENDPOINT", + "message": "Local site endpoint not configured (remote site cannot reach back)", + "severity": "error", + }), + ); + } + + let Some(connection_id) = peer.connection_id.as_deref() else { + push_issue( + &mut result, + json!({ + "code": "NO_CONNECTION", + "message": "No connection configured for this peer", + "severity": "error", + }), + ); + return Json(result).into_response(); + }; + + let Some(connection) = state.connections.get(connection_id) else { + push_issue( + &mut result, + json!({ + "code": "CONNECTION_NOT_FOUND", + "message": format!("Connection '{}' not found", connection_id), + "severity": "error", + }), + ); + return Json(result).into_response(); + }; + + if result["local_bidirectional_rules"] + .as_array() + .map(|rules| rules.is_empty()) + .unwrap_or(true) + { + push_issue( + &mut result, + json!({ + "code": "NO_LOCAL_BIDIRECTIONAL_RULES", + "message": "No bidirectional replication rules configured on this site", + "severity": "warning", + }), + ); + } + if !state.config.site_sync_enabled { + push_issue( + &mut result, + json!({ + "code": "SITE_SYNC_DISABLED", + "message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.", + "severity": "warning", + }), + ); + } + if !state.replication.check_endpoint(&connection).await { + push_issue( + &mut result, + json!({ + "code": "REMOTE_UNREACHABLE", + "message": "Remote endpoint is not reachable", + "severity": "error", + }), + ); + return Json(result).into_response(); + } + + let admin_url = format!( + "{}/admin/sites", + connection.endpoint_url.trim_end_matches('/') + ); + match reqwest::Client::new() + .get(&admin_url) + .header("accept", "application/json") + .header("x-access-key", &connection.access_key) + .header("x-secret-key", &connection.secret_key) + .timeout(std::time::Duration::from_secs(10)) + .send() + .await + { + Ok(resp) if resp.status().is_success() => match resp.json::().await { + Ok(remote_data) => { + let remote_local = remote_data.get("local").cloned().unwrap_or(Value::Null); + let remote_peers = remote_data + .get("peers") + .and_then(|value| value.as_array()) + .cloned() + .unwrap_or_default(); + let mut has_peer_for_us = false; + let mut peer_connection_configured = false; + + for remote_peer in &remote_peers { + let matches_site = local_site + .as_ref() + .map(|site| { + remote_peer.get("site_id").and_then(|v| v.as_str()) + == Some(site.site_id.as_str()) + || remote_peer.get("endpoint").and_then(|v| v.as_str()) + == Some(site.endpoint.as_str()) + }) + .unwrap_or(false); + if matches_site { + has_peer_for_us = true; + peer_connection_configured = remote_peer + .get("connection_id") + .and_then(|v| v.as_str()) + .map(|v| !v.trim().is_empty()) + .unwrap_or(false); + break; + } + } + + result["remote_status"] = json!({ + "reachable": true, + "local_site": remote_local, + "site_sync_enabled": Value::Null, + "has_peer_for_us": has_peer_for_us, + "peer_connection_configured": peer_connection_configured, + "has_bidirectional_rules_for_us": Value::Null, + }); + + if !has_peer_for_us { + push_issue( + &mut result, + json!({ + "code": "REMOTE_NO_PEER_FOR_US", + "message": "Remote site does not have this site registered as a peer", + "severity": "error", + }), + ); + } else if !peer_connection_configured { + push_issue( + &mut result, + json!({ + "code": "REMOTE_NO_CONNECTION_FOR_US", + "message": "Remote site has us as peer but no connection configured (cannot push back)", + "severity": "error", + }), + ); + } + } + Err(_) => { + result["remote_status"] = json!({ + "reachable": true, + "invalid_response": true, + }); + push_issue( + &mut result, + json!({ + "code": "REMOTE_INVALID_RESPONSE", + "message": "Remote admin API returned invalid JSON", + "severity": "warning", + }), + ); + } + }, + Ok(resp) + if resp.status() == StatusCode::UNAUTHORIZED + || resp.status() == StatusCode::FORBIDDEN => + { + result["remote_status"] = json!({ + "reachable": true, + "admin_access_denied": true, + }); + push_issue( + &mut result, + json!({ + "code": "REMOTE_ADMIN_ACCESS_DENIED", + "message": "Cannot verify remote configuration (admin access denied)", + "severity": "warning", + }), + ); + } + Ok(resp) => { + result["remote_status"] = json!({ + "reachable": true, + "admin_api_error": resp.status().as_u16(), + }); + push_issue( + &mut result, + json!({ + "code": "REMOTE_ADMIN_API_ERROR", + "message": format!("Remote admin API returned status {}", resp.status().as_u16()), + "severity": "warning", + }), + ); + } + Err(_) => { + result["remote_status"] = json!({ + "reachable": false, + "error": "Connection failed", + }); + push_issue( + &mut result, + json!({ + "code": "REMOTE_ADMIN_UNREACHABLE", + "message": "Could not reach remote admin API", + "severity": "warning", + }), + ); + } + } + + let has_errors = result["issues"] + .as_array() + .map(|items| { + items.iter().any(|issue| { + issue.get("severity").and_then(|value| value.as_str()) == Some("error") + }) + }) + .unwrap_or(true); + result["is_fully_configured"] = json!( + !has_errors + && result["local_bidirectional_rules"] + .as_array() + .map(|rules| !rules.is_empty()) + .unwrap_or(false) + ); + + Json(result).into_response() +} + +#[derive(Clone, Copy)] +struct MetricsSettingsSnapshot { + enabled: bool, + retention_hours: u64, + interval_minutes: u64, +} + +static METRICS_SETTINGS: OnceLock> = OnceLock::new(); + +fn metrics_settings_snapshot(state: &AppState) -> MetricsSettingsSnapshot { + *METRICS_SETTINGS + .get_or_init(|| { + Mutex::new(MetricsSettingsSnapshot { + enabled: state.config.metrics_history_enabled, + retention_hours: state.config.metrics_history_retention_hours, + interval_minutes: state.config.metrics_history_interval_minutes, + }) + }) + .lock() + .unwrap() +} + +pub async fn metrics_settings(State(state): State) -> Response { + let settings = metrics_settings_snapshot(&state); + Json(json!({ + "enabled": settings.enabled, + "retention_hours": settings.retention_hours, + "interval_minutes": settings.interval_minutes, + })) + .into_response() +} + +pub async fn update_metrics_settings(State(state): State, body: Body) -> Response { + let payload: Value = parse_json_body(body).await.unwrap_or_else(|_| json!({})); + let mut settings = METRICS_SETTINGS + .get_or_init(|| { + Mutex::new(MetricsSettingsSnapshot { + enabled: state.config.metrics_history_enabled, + retention_hours: state.config.metrics_history_retention_hours, + interval_minutes: state.config.metrics_history_interval_minutes, + }) + }) + .lock() + .unwrap(); + let enabled = payload + .get("enabled") + .and_then(|value| value.as_bool()) + .unwrap_or(settings.enabled); + let retention_hours = payload + .get("retention_hours") + .and_then(|value| value.as_u64()) + .unwrap_or(settings.retention_hours) + .max(1); + let interval_minutes = payload + .get("interval_minutes") + .and_then(|value| value.as_u64()) + .unwrap_or(settings.interval_minutes) + .max(1); + *settings = MetricsSettingsSnapshot { + enabled, + retention_hours, + interval_minutes, + }; + + Json(json!({ + "enabled": enabled, + "retention_hours": retention_hours, + "interval_minutes": interval_minutes, + })) + .into_response() +} + +#[derive(Deserialize, Default)] +struct MultipartInitPayload { + #[serde(default)] + object_key: String, + #[serde(default)] + metadata: Option>, +} + +pub async fn upload_object( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + headers: HeaderMap, + body: Body, +) -> Response { + let content_type = match headers + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + { + Some(value) + if value + .to_ascii_lowercase() + .starts_with("multipart/form-data") => + { + value.to_string() + } + _ => return json_error(StatusCode::BAD_REQUEST, "Expected multipart form upload"), + }; + + let boundary = match multer::parse_boundary(&content_type) { + Ok(value) => value, + Err(_) => return json_error(StatusCode::BAD_REQUEST, "Missing multipart boundary"), + }; + + let stream = BodyStream::new(body) + .map_ok(|frame| frame.into_data().unwrap_or_default()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)); + let mut multipart = multer::Multipart::new(stream, boundary); + + let mut object_key: Option = None; + let mut metadata_raw: Option = None; + let mut file_name: Option = None; + let mut file_content_type: Option = None; + let mut file_bytes: Option> = None; + + while let Some(field) = match multipart.next_field().await { + Ok(field) => field, + Err(e) => { + return json_error( + StatusCode::BAD_REQUEST, + format!("Malformed multipart body: {}", e), + ) + } + } { + let name = field.name().unwrap_or_default().to_string(); + match name.as_str() { + "object_key" => match field.text().await { + Ok(value) if !value.trim().is_empty() => { + object_key = Some(value.trim().to_string()) + } + _ => {} + }, + "metadata" => match field.text().await { + Ok(value) if !value.trim().is_empty() => metadata_raw = Some(value), + _ => {} + }, + "object" => { + file_name = field.file_name().map(|s| s.to_string()); + file_content_type = field.content_type().map(|mime| mime.to_string()); + match field.bytes().await { + Ok(bytes) => file_bytes = Some(bytes.to_vec()), + Err(e) => { + return json_error( + StatusCode::BAD_REQUEST, + format!("Failed to read upload: {}", e), + ) + } + } + } + _ => { + let _ = field.bytes().await; + } + } + } + + let bytes = match file_bytes { + Some(bytes) if !bytes.is_empty() => bytes, + _ => return json_error(StatusCode::BAD_REQUEST, "Choose a file to upload"), + }; + + let key = object_key + .or(file_name.clone()) + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) + .ok_or_else(|| json_error(StatusCode::BAD_REQUEST, "Object key is required")); + let key = match key { + Ok(key) => key, + Err(response) => return response, + }; + + let metadata = if let Some(raw) = metadata_raw { + match serde_json::from_str::>(&raw) { + Ok(map) => Some( + map.into_iter() + .map(|(k, v)| (k, v.as_str().unwrap_or(&v.to_string()).to_string())) + .collect::>(), + ), + Err(_) => return json_error(StatusCode::BAD_REQUEST, "Metadata must be a JSON object"), + } + } else { + None + }; + + let mut upload_headers = HeaderMap::new(); + if let Some(content_type) = file_content_type.as_deref() { + if let Ok(value) = content_type.parse() { + upload_headers.insert(header::CONTENT_TYPE, value); + } + } + if let Some(metadata) = &metadata { + for (key, value) in metadata { + let header_name = format!("x-amz-meta-{}", key); + if let Ok(name) = header_name.parse::() { + if let Ok(value) = value.parse() { + upload_headers.insert(name, value); + } + } + } + } + + let response = handlers::put_object( + State(state), + Path((bucket_name.clone(), key.clone())), + Query(ObjectQuery::default()), + upload_headers, + Body::from(bytes), + ) + .await; + + if !response.status().is_success() { + return response; + } + + let mut message = format!("Uploaded '{}'", key); + if metadata.is_some() { + message.push_str(" with metadata"); + } + json_ok(json!({ + "status": "ok", + "message": message, + "key": key, + })) +} + +pub async fn initiate_multipart_upload( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + body: Body, +) -> Response { + let payload: MultipartInitPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + let object_key = payload.object_key.trim(); + if object_key.is_empty() { + return json_error(StatusCode::BAD_REQUEST, "object_key is required"); + } + + match state + .storage + .initiate_multipart(&bucket_name, object_key, payload.metadata) + .await + { + Ok(upload_id) => json_ok(json!({ "upload_id": upload_id })), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +pub struct MultipartPartQuery { + #[serde(rename = "partNumber")] + part_number: Option, +} + +pub async fn upload_multipart_part( + State(state): State, + Extension(_session): Extension, + Path((bucket_name, upload_id)): Path<(String, String)>, + Query(query): Query, + body: Body, +) -> Response { + let Some(part_number) = query.part_number else { + return json_error(StatusCode::BAD_REQUEST, "partNumber is required"); + }; + if !(1..=10_000).contains(&part_number) { + return json_error( + StatusCode::BAD_REQUEST, + "partNumber must be between 1 and 10000", + ); + } + + let bytes = match to_bytes(body, usize::MAX).await { + Ok(bytes) if !bytes.is_empty() => bytes, + Ok(_) => return json_error(StatusCode::BAD_REQUEST, "Empty request body"), + Err(_) => return json_error(StatusCode::BAD_REQUEST, "Failed to read request body"), + }; + let reader: myfsio_storage::traits::AsyncReadStream = Box::pin(Cursor::new(bytes.to_vec())); + match state + .storage + .upload_part(&bucket_name, &upload_id, part_number, reader) + .await + { + Ok(etag) => json_ok(json!({ "etag": etag, "part_number": part_number })), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +struct CompleteMultipartPayload { + #[serde(default)] + parts: Vec, +} + +#[derive(Deserialize, Default)] +struct CompleteMultipartPartPayload { + #[serde(default, alias = "PartNumber")] + part_number: u32, + #[serde(default, alias = "ETag")] + etag: String, +} + +pub async fn complete_multipart_upload( + State(state): State, + Extension(_session): Extension, + Path((bucket_name, upload_id)): Path<(String, String)>, + body: Body, +) -> Response { + let payload: CompleteMultipartPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + if payload.parts.is_empty() { + return json_error(StatusCode::BAD_REQUEST, "parts array required"); + } + + let parts = payload + .parts + .iter() + .map(|part| PartInfo { + part_number: part.part_number, + etag: part.etag.trim_matches('"').to_string(), + }) + .collect::>(); + + match state + .storage + .complete_multipart(&bucket_name, &upload_id, &parts) + .await + { + Ok(meta) => { + super::trigger_replication(&state, &bucket_name, &meta.key, "write"); + json_ok(json!({ + "key": meta.key, + "size": meta.size, + "etag": meta.etag.unwrap_or_default(), + "last_modified": meta.last_modified.to_rfc3339(), + })) + } + Err(err) => storage_json_error(err), + } +} + +pub async fn abort_multipart_upload( + State(state): State, + Extension(_session): Extension, + Path((bucket_name, upload_id)): Path<(String, String)>, +) -> Response { + match state + .storage + .abort_multipart(&bucket_name, &upload_id) + .await + { + Ok(()) => json_ok(json!({ "status": "aborted" })), + Err(err) => storage_json_error(err), + } +} + +async fn get_bucket_config_json( + state: &AppState, + bucket: &str, +) -> Result { + state.storage.get_bucket_config(bucket).await +} + +pub async fn bucket_acl( + State(state): State, + Extension(session): Extension, + Path(bucket_name): Path, +) -> Response { + match get_bucket_config_json(&state, &bucket_name).await { + Ok(config) => Json(parse_acl_value( + config.acl.as_ref(), + &owner_id_or_default(&session), + )) + .into_response(), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +struct BucketAclPayload { + #[serde(default)] + canned_acl: String, +} + +pub async fn update_bucket_acl( + State(state): State, + Extension(session): Extension, + Path(bucket_name): Path, + body: Body, +) -> Response { + let payload: BucketAclPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + if payload.canned_acl.trim().is_empty() { + return json_error(StatusCode::BAD_REQUEST, "canned_acl is required"); + } + + let acl_xml = match bucket_acl_xml_for_canned( + &owner_id_or_default(&session), + payload.canned_acl.trim(), + ) { + Ok(xml) => xml, + Err(message) => return json_error(StatusCode::BAD_REQUEST, message), + }; + + match state.storage.get_bucket_config(&bucket_name).await { + Ok(mut config) => { + config.acl = Some(Value::String(acl_xml)); + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => json_ok(json!({ + "status": "ok", + "message": format!("ACL set to {}", payload.canned_acl.trim()), + })), + Err(err) => storage_json_error(err), + } + } + Err(err) => storage_json_error(err), + } +} + +pub async fn bucket_cors( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + match get_bucket_config_json(&state, &bucket_name).await { + Ok(config) => Json(parse_cors_value(config.cors.as_ref())).into_response(), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +struct BucketCorsPayload { + #[serde(default)] + rules: Vec, +} + +pub async fn update_bucket_cors( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + body: Body, +) -> Response { + let payload: BucketCorsPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + match state.storage.get_bucket_config(&bucket_name).await { + Ok(mut config) => { + config.cors = if payload.rules.is_empty() { + None + } else { + Some(Value::String(cors_xml_from_rules(&payload.rules))) + }; + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => json_ok(json!({ + "status": "ok", + "message": "CORS configuration saved", + "rules": payload.rules, + })), + Err(err) => storage_json_error(err), + } + } + Err(err) => storage_json_error(err), + } +} + +pub async fn bucket_lifecycle( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + match get_bucket_config_json(&state, &bucket_name).await { + Ok(config) => Json(parse_lifecycle_value(config.lifecycle.as_ref())).into_response(), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +struct BucketLifecyclePayload { + #[serde(default)] + rules: Vec, +} + +pub async fn update_bucket_lifecycle( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + body: Body, +) -> Response { + let payload: BucketLifecyclePayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + match state.storage.get_bucket_config(&bucket_name).await { + Ok(mut config) => { + config.lifecycle = if payload.rules.is_empty() { + None + } else { + Some(Value::String(lifecycle_xml_from_rules(&payload.rules))) + }; + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => json_ok(json!({ + "status": "ok", + "message": "Lifecycle rules saved", + "rules": payload.rules, + })), + Err(err) => storage_json_error(err), + } + } + Err(err) => storage_json_error(err), + } +} + +async fn serve_object_download_or_preview( + state: AppState, + bucket: String, + key: String, + headers: HeaderMap, + is_download: bool, +) -> Response { + let content_type = state + .storage + .head_object(&bucket, &key) + .await + .ok() + .and_then(|meta| meta.content_type); + + let mut query = ObjectQuery::default(); + if is_download { + query.response_content_disposition = Some(format!( + "attachment; filename=\"{}\"", + safe_attachment_filename(&key) + )); + } else if let Some(forced) = dangerous_preview_content_type(content_type.as_deref(), &key) { + query.response_content_type = Some(forced); + } + + let mut response = + handlers::get_object(State(state), Path((bucket, key)), Query(query), headers).await; + response + .headers_mut() + .insert("x-content-type-options", "nosniff".parse().unwrap()); + response +} + +async fn object_metadata_json(state: &AppState, bucket: &str, key: &str) -> Response { + let head = match state.storage.head_object(bucket, key).await { + Ok(meta) => meta, + Err(err) => return storage_json_error(err), + }; + let metadata = state + .storage + .get_object_metadata(bucket, key) + .await + .unwrap_or_default(); + + let mut out: std::collections::HashMap = metadata + .iter() + .filter(|(k, _)| !(k.starts_with("__") && k.ends_with("__"))) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + if let Some(content_type) = head.content_type { + out.insert("Content-Type".to_string(), content_type); + } + let display_length = metadata + .get("__size__") + .cloned() + .unwrap_or_else(|| head.size.to_string()); + out.insert("Content-Length".to_string(), display_length); + if let Some(algorithm) = metadata.get("x-amz-server-side-encryption") { + out.insert( + "x-amz-server-side-encryption".to_string(), + algorithm.to_string(), + ); + } + Json(json!({ "metadata": out })).into_response() +} + +async fn object_versions_json(state: &AppState, bucket: &str, key: &str) -> Response { + match read_version_manifests_for_object(state, bucket, key) { + Ok(entries) => Json(json!({ + "versions": entries.into_iter().map(|entry| manifest_to_json(&entry)).collect::>(), + })) + .into_response(), + Err(err) => json_error(StatusCode::BAD_REQUEST, err), + } +} + +async fn object_tags_json(state: &AppState, bucket: &str, key: &str) -> Response { + match state.storage.get_object_tags(bucket, key).await { + Ok(tags) => Json(json!({ + "tags": tags.into_iter().map(|tag| json!({ "Key": tag.key, "Value": tag.value })).collect::>(), + })) + .into_response(), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +struct PresignPayload { + #[serde(default = "default_presign_method")] + method: String, + #[serde(default)] + expires_in: Option, +} + +fn default_presign_method() -> String { + "GET".to_string() +} + +async fn object_presign_json( + state: &AppState, + session: &SessionHandle, + bucket: &str, + key: &str, + body: Body, +) -> Response { + let payload: PresignPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + let method = payload.method.trim().to_ascii_uppercase(); + if !matches!(method.as_str(), "GET" | "PUT" | "DELETE") { + return json_error( + StatusCode::BAD_REQUEST, + "Method must be GET, PUT, or DELETE", + ); + } + + let access_key = match current_access_key(session) { + Some(key) => key, + None => return json_error(StatusCode::FORBIDDEN, "Missing authenticated session"), + }; + let secret_key = match state.iam.get_secret_key(&access_key) { + Some(secret) => secret, + None => { + return json_error( + StatusCode::FORBIDDEN, + "Session credentials are no longer valid", + ) + } + }; + + let min_expiry = state.config.presigned_url_min_expiry; + let max_expiry = state.config.presigned_url_max_expiry; + let expires = payload + .expires_in + .unwrap_or(900) + .clamp(min_expiry, max_expiry); + + let api_base = parse_api_base(state); + let parsed = match reqwest::Url::parse(&api_base) { + Ok(url) => url, + Err(err) => { + return json_error( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Invalid API_BASE_URL: {}", err), + ) + } + }; + let host = match parsed.host_str() { + Some(host) => { + if let Some(port) = parsed.port() { + format!("{}:{}", host, port) + } else { + host.to_string() + } + } + None => { + return json_error( + StatusCode::INTERNAL_SERVER_ERROR, + "Unable to determine API host", + ) + } + }; + + let now = Utc::now(); + let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); + let date_stamp = now.format("%Y%m%d").to_string(); + let region = state.config.region.as_str(); + let credential = format!("{}/{}/{}/s3/aws4_request", access_key, date_stamp, region); + + let canonical_uri = format!("/{}/{}", bucket, encode_object_key(key)); + let mut query_params = vec![ + ( + "X-Amz-Algorithm".to_string(), + "AWS4-HMAC-SHA256".to_string(), + ), + ("X-Amz-Credential".to_string(), credential.clone()), + ("X-Amz-Date".to_string(), amz_date.clone()), + ("X-Amz-Expires".to_string(), expires.to_string()), + ("X-Amz-SignedHeaders".to_string(), "host".to_string()), + ]; + query_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1))); + + let canonical_query = query_params + .iter() + .map(|(name, value)| format!("{}={}", aws_query_encode(name), aws_query_encode(value))) + .collect::>() + .join("&"); + let canonical_request = format!( + "{}\n{}\n{}\nhost:{}\n\nhost\nUNSIGNED-PAYLOAD", + method, canonical_uri, canonical_query, host + ); + let scope = format!("{}/{}/s3/aws4_request", date_stamp, region); + let string_to_sign = sigv4::build_string_to_sign(&amz_date, &scope, &canonical_request); + let signing_key = sigv4::derive_signing_key(&secret_key, &date_stamp, region, "s3"); + let signature = sigv4::compute_signature(&signing_key, &string_to_sign); + + let final_query = format!("{}&X-Amz-Signature={}", canonical_query, signature); + let final_url = format!("{}{}?{}", api_base, canonical_uri, final_query); + + Json(json!({ + "url": final_url, + "method": method, + "expires_in": expires, + })) + .into_response() +} + +#[derive(Deserialize, Default)] +struct ObjectTagsPayload { + #[serde(default)] + tags: Vec, +} + +#[derive(Deserialize, Default)] +struct ObjectTagPayload { + #[serde(default, alias = "Key", alias = "key")] + key: String, + #[serde(default, alias = "Value", alias = "value")] + value: String, +} + +async fn update_object_tags(state: &AppState, bucket: &str, key: &str, body: Body) -> Response { + let payload: ObjectTagsPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + if payload.tags.len() > state.config.object_tag_limit { + return json_error( + StatusCode::BAD_REQUEST, + format!("Maximum {} tags allowed", state.config.object_tag_limit), + ); + } + + let tags = payload + .tags + .iter() + .filter(|tag| !tag.key.trim().is_empty()) + .map(|tag| Tag { + key: tag.key.trim().to_string(), + value: tag.value.to_string(), + }) + .collect::>(); + + let result = if tags.is_empty() { + state.storage.delete_object_tags(bucket, key).await + } else { + state.storage.set_object_tags(bucket, key, &tags).await + }; + + match result { + Ok(()) => Json(json!({ + "status": "ok", + "message": "Tags saved", + "tags": tags.into_iter().map(|tag| json!({ "Key": tag.key, "Value": tag.value })).collect::>(), + })) + .into_response(), + Err(err) => storage_json_error(err), + } +} + +#[derive(Deserialize, Default)] +struct CopyMovePayload { + #[serde(default)] + dest_bucket: String, + #[serde(default)] + dest_key: String, +} + +async fn copy_object_json(state: &AppState, bucket: &str, key: &str, body: Body) -> Response { + let payload: CopyMovePayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + let dest_bucket = payload.dest_bucket.trim(); + let dest_key = payload.dest_key.trim(); + if dest_bucket.is_empty() || dest_key.is_empty() { + return json_error( + StatusCode::BAD_REQUEST, + "dest_bucket and dest_key are required", + ); + } + + match state + .storage + .copy_object(bucket, key, dest_bucket, dest_key) + .await + { + Ok(_) => { + super::trigger_replication(state, dest_bucket, dest_key, "write"); + Json(json!({ + "status": "ok", + "message": format!("Copied to {}/{}", dest_bucket, dest_key), + "dest_bucket": dest_bucket, + "dest_key": dest_key, + })) + .into_response() + } + Err(err) => storage_json_error(err), + } +} + +async fn move_object_json(state: &AppState, bucket: &str, key: &str, body: Body) -> Response { + let payload: CopyMovePayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + let dest_bucket = payload.dest_bucket.trim(); + let dest_key = payload.dest_key.trim(); + if dest_bucket.is_empty() || dest_key.is_empty() { + return json_error( + StatusCode::BAD_REQUEST, + "dest_bucket and dest_key are required", + ); + } + if dest_bucket == bucket && dest_key == key { + return json_error( + StatusCode::BAD_REQUEST, + "Cannot move object to the same location", + ); + } + + match state.storage.copy_object(bucket, key, dest_bucket, dest_key).await { + Ok(_) => match state.storage.delete_object(bucket, key).await { + Ok(_) => { + super::trigger_replication(state, dest_bucket, dest_key, "write"); + super::trigger_replication(state, bucket, key, "delete"); + Json(json!({ + "status": "ok", + "message": format!("Moved to {}/{}", dest_bucket, dest_key), + "dest_bucket": dest_bucket, + "dest_key": dest_key, + })) + .into_response() + } + Err(_) => Json(json!({ + "status": "partial", + "message": format!("Copied to {}/{} but failed to delete source", dest_bucket, dest_key), + "dest_bucket": dest_bucket, + "dest_key": dest_key, + })) + .into_response(), + }, + Err(err) => storage_json_error(err), + } +} + +async fn purge_object_versions_for_key( + state: &AppState, + bucket: &str, + key: &str, +) -> Result<(), String> { + if let Ok(version_dir) = version_dir_for_object(state, bucket, key) { + if version_dir.exists() { + std::fs::remove_dir_all(&version_dir).map_err(|e| e.to_string())?; + } + } + Ok(()) +} + +async fn delete_object_json( + state: &AppState, + bucket: &str, + key: &str, + headers: &HeaderMap, + body: Body, +) -> Response { + let body_bytes = match to_bytes(body, usize::MAX).await { + Ok(bytes) => bytes, + Err(_) => return json_error(StatusCode::BAD_REQUEST, "Failed to read request body"), + }; + + let content_type = headers + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default(); + let form = if content_type.starts_with("application/x-www-form-urlencoded") { + parse_form_body(&body_bytes) + } else { + HashMap::new() + }; + let purge_versions = parse_bool_flag(form.get("purge_versions").map(|s| s.as_str())); + + if purge_versions { + if let Err(err) = state.storage.delete_object(bucket, key).await { + return storage_json_error(err); + } + super::trigger_replication(state, bucket, key, "delete"); + if let Err(err) = purge_object_versions_for_key(state, bucket, key).await { + return json_error(StatusCode::BAD_REQUEST, err); + } + return Json(json!({ + "status": "ok", + "message": format!("Permanently deleted '{}' and all versions", key), + })) + .into_response(); + } + + match state.storage.delete_object(bucket, key).await { + Ok(_) => { + super::trigger_replication(state, bucket, key, "delete"); + Json(json!({ + "status": "ok", + "message": format!("Deleted '{}'", key), + })) + .into_response() + } + Err(err) => storage_json_error(err), + } +} + +async fn restore_object_version_json( + state: &AppState, + bucket: &str, + key: &str, + version_id: &str, +) -> Response { + let version_dir = match version_dir_for_object(state, bucket, key) { + Ok(path) => path, + Err(err) => return json_error(StatusCode::BAD_REQUEST, err), + }; + let data_path = version_dir.join(format!("{}.bin", version_id)); + let meta_path = version_dir.join(format!("{}.json", version_id)); + if !data_path.exists() || !meta_path.exists() { + return json_error(StatusCode::NOT_FOUND, "Version not found"); + } + + let manifest_text = match std::fs::read_to_string(&meta_path) { + Ok(text) => text, + Err(err) => return json_error(StatusCode::BAD_REQUEST, err.to_string()), + }; + let manifest: VersionManifest = match serde_json::from_str(&manifest_text) { + Ok(manifest) => manifest, + Err(err) => return json_error(StatusCode::BAD_REQUEST, err.to_string()), + }; + + let live_exists = state.storage.head_object(bucket, key).await.is_ok(); + let versioning_enabled = state + .storage + .is_versioning_enabled(bucket) + .await + .unwrap_or(false); + if live_exists { + if let Err(err) = state.storage.delete_object(bucket, key).await { + return storage_json_error(err); + } + } + + let destination = match object_live_path(state, bucket, key) { + Ok(path) => path, + Err(err) => return json_error(StatusCode::BAD_REQUEST, err), + }; + if let Some(parent) = destination.parent() { + if let Err(err) = tokio::fs::create_dir_all(parent).await { + return json_error(StatusCode::INTERNAL_SERVER_ERROR, err.to_string()); + } + } + if let Err(err) = tokio::fs::copy(&data_path, &destination).await { + return json_error(StatusCode::INTERNAL_SERVER_ERROR, err.to_string()); + } + if let Err(err) = state + .storage + .put_object_metadata(bucket, key, &manifest.metadata) + .await + { + return storage_json_error(err); + } + super::trigger_replication(state, bucket, key, "write"); + + let mut message = format!("Restored '{}'", key); + if live_exists && versioning_enabled { + message.push_str(" (previous current version was archived)"); + } + Json(json!({ "status": "ok", "message": message })).into_response() +} + +#[derive(Debug, Clone, Copy)] +enum ObjectGetAction { + Download, + Preview, + Metadata, + Versions, + Tags, +} + +#[derive(Debug, Clone)] +enum ObjectPostAction { + Delete, + Presign, + Tags, + Copy, + Move, + Restore(String), +} + +fn parse_object_get_action(rest: &str) -> Option<(String, ObjectGetAction)> { + for (suffix, action) in [ + ("/download", ObjectGetAction::Download), + ("/preview", ObjectGetAction::Preview), + ("/metadata", ObjectGetAction::Metadata), + ("/versions", ObjectGetAction::Versions), + ("/tags", ObjectGetAction::Tags), + ] { + if let Some(key) = rest.strip_suffix(suffix) { + return Some((key.to_string(), action)); + } + } + None +} + +fn parse_object_post_action(rest: &str) -> Option<(String, ObjectPostAction)> { + if let Some((key, version_id)) = rest.rsplit_once("/restore/") { + return Some(( + key.to_string(), + ObjectPostAction::Restore(version_id.to_string()), + )); + } + if let Some(key_with_version) = rest.strip_suffix("/restore") { + if let Some((key, version_id)) = key_with_version.rsplit_once("/versions/") { + return Some(( + key.to_string(), + ObjectPostAction::Restore(version_id.to_string()), + )); + } + } + for (suffix, action) in [ + ("/delete", ObjectPostAction::Delete), + ("/presign", ObjectPostAction::Presign), + ("/tags", ObjectPostAction::Tags), + ("/copy", ObjectPostAction::Copy), + ("/move", ObjectPostAction::Move), + ] { + if let Some(key) = rest.strip_suffix(suffix) { + return Some((key.to_string(), action)); + } + } + None +} + +pub async fn object_get_dispatch( + State(state): State, + Extension(session): Extension, + Path((bucket_name, rest)): Path<(String, String)>, + headers: HeaderMap, +) -> Response { + let Some((key, action)) = parse_object_get_action(&rest) else { + return json_error(StatusCode::NOT_FOUND, "Unknown object action"); + }; + + match action { + ObjectGetAction::Download => { + serve_object_download_or_preview(state, bucket_name, key, headers, true).await + } + ObjectGetAction::Preview => { + serve_object_download_or_preview(state, bucket_name, key, headers, false).await + } + ObjectGetAction::Metadata => object_metadata_json(&state, &bucket_name, &key).await, + ObjectGetAction::Versions => object_versions_json(&state, &bucket_name, &key).await, + ObjectGetAction::Tags => { + let _ = session; + object_tags_json(&state, &bucket_name, &key).await + } + } +} + +pub async fn object_post_dispatch( + State(state): State, + Extension(session): Extension, + Path((bucket_name, rest)): Path<(String, String)>, + headers: HeaderMap, + body: Body, +) -> Response { + let Some((key, action)) = parse_object_post_action(&rest) else { + return json_error(StatusCode::NOT_FOUND, "Unknown object action"); + }; + + match action { + ObjectPostAction::Delete => { + delete_object_json(&state, &bucket_name, &key, &headers, body).await + } + ObjectPostAction::Presign => { + object_presign_json(&state, &session, &bucket_name, &key, body).await + } + ObjectPostAction::Tags => update_object_tags(&state, &bucket_name, &key, body).await, + ObjectPostAction::Copy => copy_object_json(&state, &bucket_name, &key, body).await, + ObjectPostAction::Move => move_object_json(&state, &bucket_name, &key, body).await, + ObjectPostAction::Restore(version_id) => { + restore_object_version_json(&state, &bucket_name, &key, &version_id).await + } + } +} + +#[derive(Deserialize, Default)] +struct BulkKeysPayload { + #[serde(default)] + keys: Vec, + #[serde(default)] + purge_versions: bool, +} + +async fn expand_bulk_keys( + state: &AppState, + bucket: &str, + keys: &[String], +) -> Result, StorageError> { + let mut expanded = Vec::new(); + for key in keys { + if key.ends_with('/') { + let params = ListParams { + max_keys: 5000, + continuation_token: None, + prefix: Some(key.clone()), + start_after: None, + }; + let objects = state.storage.list_objects(bucket, ¶ms).await?; + for object in objects.objects { + expanded.push(object.key); + } + } else { + expanded.push(key.clone()); + } + } + let mut unique = BTreeMap::new(); + for key in expanded { + unique.entry(key.clone()).or_insert(key); + } + Ok(unique.into_values().collect()) +} + +pub async fn bulk_delete_objects( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + body: Body, +) -> Response { + let payload: BulkKeysPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + let cleaned = payload + .keys + .into_iter() + .map(|key| key.trim().to_string()) + .filter(|key| !key.is_empty()) + .collect::>(); + if cleaned.is_empty() { + return json_error( + StatusCode::BAD_REQUEST, + "Select at least one object to delete", + ); + } + + let keys = match expand_bulk_keys(&state, &bucket_name, &cleaned).await { + Ok(keys) => keys, + Err(err) => return storage_json_error(err), + }; + if keys.is_empty() { + return json_error( + StatusCode::BAD_REQUEST, + "No objects found under the selected folders", + ); + } + if keys.len() > state.config.bulk_delete_max_keys { + return json_error( + StatusCode::BAD_REQUEST, + format!( + "Bulk delete supports at most {} keys", + state.config.bulk_delete_max_keys + ), + ); + } + + let mut deleted = Vec::new(); + let mut errors = Vec::new(); + + for key in keys { + match state.storage.delete_object(&bucket_name, &key).await { + Ok(_) => { + super::trigger_replication(&state, &bucket_name, &key, "delete"); + if payload.purge_versions { + if let Err(err) = + purge_object_versions_for_key(&state, &bucket_name, &key).await + { + errors.push(json!({ "key": key, "error": err })); + continue; + } + } + deleted.push(key); + } + Err(err) => errors.push(json!({ "key": key, "error": err.to_string() })), + } + } + + if deleted.is_empty() && !errors.is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ + "status": "error", + "message": "Unable to delete the selected objects", + "deleted": deleted, + "errors": errors, + })), + ) + .into_response(); + } + + let mut message = format!( + "Deleted {} object{}", + deleted.len(), + if deleted.len() == 1 { "" } else { "s" } + ); + if payload.purge_versions && !deleted.is_empty() { + message.push_str(" (including archived versions)"); + } + if !errors.is_empty() { + message.push_str(&format!("; {} failed", errors.len())); + } + + Json(json!({ + "status": if errors.is_empty() { "ok" } else { "partial" }, + "message": message, + "deleted": deleted, + "errors": errors, + })) + .into_response() +} + +pub async fn bulk_download_objects( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + body: Body, +) -> Response { + let payload: BulkKeysPayload = match parse_json_body(body).await { + Ok(payload) => payload, + Err(response) => return response, + }; + + let cleaned = payload + .keys + .into_iter() + .map(|key| key.trim().to_string()) + .filter(|key| !key.is_empty()) + .collect::>(); + if cleaned.is_empty() { + return json_error( + StatusCode::BAD_REQUEST, + "Select at least one object to download", + ); + } + + let keys = match expand_bulk_keys(&state, &bucket_name, &cleaned).await { + Ok(keys) => keys, + Err(err) => return storage_json_error(err), + }; + if keys.is_empty() { + return json_error( + StatusCode::BAD_REQUEST, + "No objects found under the selected folders", + ); + } + + let mut total_bytes = 0u64; + let mut archive_entries = Vec::new(); + for key in keys { + match state.storage.head_object(&bucket_name, &key).await { + Ok(meta) => { + total_bytes = total_bytes.saturating_add(meta.size); + match read_object_bytes_for_zip(&state, &bucket_name, &key).await { + Ok(bytes) => archive_entries.push((key, bytes, meta.last_modified)), + Err(err) => return json_error(StatusCode::BAD_REQUEST, err), + } + } + Err(err) => return storage_json_error(err), + } + } + + let max_total_bytes = 256 * 1024 * 1024u64; + if total_bytes > max_total_bytes { + return json_error( + StatusCode::BAD_REQUEST, + "Total download size exceeds 256 MB limit. Select fewer objects.", + ); + } + + let zip_bytes = match build_zip_archive(archive_entries) { + Ok(bytes) => bytes, + Err(err) => return json_error(StatusCode::BAD_REQUEST, err), + }; + + let mut headers = HeaderMap::new(); + headers.insert(header::CONTENT_TYPE, "application/zip".parse().unwrap()); + headers.insert( + header::CONTENT_DISPOSITION, + format!("attachment; filename=\"{}-download.zip\"", bucket_name) + .parse() + .unwrap(), + ); + (StatusCode::OK, headers, zip_bytes).into_response() +} + +pub async fn archived_objects( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + let versions_root = version_root_for_bucket(&state, &bucket_name); + if !versions_root.exists() { + return Json(json!({ "objects": [] })).into_response(); + } + + let mut grouped: BTreeMap> = BTreeMap::new(); + let mut stack = vec![versions_root]; + + while let Some(current) = stack.pop() { + let read_dir = match std::fs::read_dir(¤t) { + Ok(entries) => entries, + Err(_) => continue, + }; + for entry in read_dir.flatten() { + let file_type = match entry.file_type() { + Ok(file_type) => file_type, + Err(_) => continue, + }; + if file_type.is_dir() { + stack.push(entry.path()); + continue; + } + if entry.path().extension().and_then(|ext| ext.to_str()) != Some("json") { + continue; + } + let text = match std::fs::read_to_string(entry.path()) { + Ok(text) => text, + Err(_) => continue, + }; + let manifest = match serde_json::from_str::(&text) { + Ok(manifest) => manifest, + Err(_) => continue, + }; + if manifest.key.is_empty() { + continue; + } + grouped + .entry(manifest.key.clone()) + .or_default() + .push(manifest); + } + } + + let mut objects = Vec::new(); + for (key, mut versions) in grouped { + let live_exists = object_live_path(&state, &bucket_name, &key) + .map(|path| path.exists()) + .unwrap_or(false); + if live_exists { + continue; + } + versions.sort_by(|a, b| manifest_timestamp(b).cmp(&manifest_timestamp(a))); + let latest = versions.first().map(|record| manifest_to_json(record)); + objects.push(json!({ + "key": key, + "versions": versions.len(), + "total_size": versions.iter().map(|entry| entry.size).sum::(), + "latest": latest, + "restore_url": versions.first().map(|record| format!( + "/ui/buckets/{}/archived/{}/restore/{}", + bucket_name, + encode_object_key(&record.key), + encode_path_segment(&record.version_id) + )), + "purge_url": format!( + "/ui/buckets/{}/archived/{}/purge", + bucket_name, + encode_object_key(&key) + ), + })); + } + + Json(json!({ "objects": objects })).into_response() +} + +pub async fn archived_post_dispatch( + State(state): State, + Extension(_session): Extension, + Path((bucket_name, rest)): Path<(String, String)>, +) -> Response { + if let Some((key, version_id)) = rest.rsplit_once("/restore/") { + return restore_object_version_json(&state, &bucket_name, key, version_id).await; + } + if let Some(key) = rest.strip_suffix("/purge") { + match purge_object_versions_for_key(&state, &bucket_name, key).await { + Ok(()) => { + let _ = state.storage.delete_object(&bucket_name, key).await; + super::trigger_replication(&state, &bucket_name, key, "delete"); + Json(json!({ + "status": "ok", + "message": format!("Removed archived versions for '{}'", key), + })) + .into_response() + } + Err(err) => json_error(StatusCode::BAD_REQUEST, err), + } + } else { + json_error(StatusCode::NOT_FOUND, "Unknown archived object action") + } +} + +pub async fn gc_status_ui( + State(state): State, + Extension(_session): Extension, +) -> Response { + match &state.gc { + Some(gc) => Json(gc.status().await).into_response(), + None => Json(json!({ + "enabled": false, + "message": "GC is not enabled. Set GC_ENABLED=true to enable." + })) + .into_response(), + } +} + +pub async fn gc_run_ui( + State(state): State, + Extension(_session): Extension, + body: Body, +) -> Response { + let Some(gc) = &state.gc else { + return json_error(StatusCode::BAD_REQUEST, "GC is not enabled"); + }; + let payload: Value = parse_json_body(body).await.unwrap_or_else(|_| json!({})); + let dry_run = payload + .get("dry_run") + .and_then(|value| value.as_bool()) + .unwrap_or(false); + match gc.run_now(dry_run).await { + Ok(result) => Json(result).into_response(), + Err(err) => json_error(StatusCode::CONFLICT, err), + } +} + +pub async fn gc_history_ui( + State(state): State, + Extension(_session): Extension, + Query(params): Query>, +) -> Response { + let limit = params.get("limit").and_then(|v| v.parse::().ok()); + match &state.gc { + Some(gc) => Json(apply_history_limit(gc.history().await, limit)).into_response(), + None => Json(json!({ "executions": [] })).into_response(), + } +} + +pub async fn integrity_status_ui( + State(state): State, + Extension(_session): Extension, +) -> Response { + match &state.integrity { + Some(checker) => Json(checker.status().await).into_response(), + None => Json(json!({ + "enabled": false, + "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable." + })) + .into_response(), + } +} + +pub async fn integrity_run_ui( + State(state): State, + Extension(_session): Extension, + body: Body, +) -> Response { + let Some(checker) = &state.integrity else { + return json_error(StatusCode::BAD_REQUEST, "Integrity checker is not enabled"); + }; + let payload: Value = parse_json_body(body).await.unwrap_or_else(|_| json!({})); + let dry_run = payload + .get("dry_run") + .and_then(|value| value.as_bool()) + .unwrap_or(false); + let auto_heal = payload + .get("auto_heal") + .and_then(|value| value.as_bool()) + .unwrap_or(false); + match checker.run_now(dry_run, auto_heal).await { + Ok(result) => Json(result).into_response(), + Err(err) => json_error(StatusCode::CONFLICT, err), + } +} + +pub async fn integrity_history_ui( + State(state): State, + Extension(_session): Extension, + Query(params): Query>, +) -> Response { + let limit = params.get("limit").and_then(|v| v.parse::().ok()); + match &state.integrity { + Some(checker) => Json(apply_history_limit(checker.history().await, limit)).into_response(), + None => Json(json!({ "executions": [] })).into_response(), + } +} + +fn apply_history_limit(mut value: Value, limit: Option) -> Value { + if let Some(limit) = limit { + if let Some(arr) = value.get_mut("executions").and_then(|v| v.as_array_mut()) { + if arr.len() > limit { + arr.truncate(limit); + } + } + } + value +} + +pub async fn lifecycle_history( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(params): Query>, +) -> Response { + let limit = params + .get("limit") + .and_then(|value| value.parse::().ok()) + .unwrap_or(50); + let offset = params + .get("offset") + .and_then(|value| value.parse::().ok()) + .unwrap_or(0); + if !state.config.lifecycle_enabled { + return Json(json!({ + "executions": [], + "total": 0, + "limit": limit, + "offset": offset, + "enabled": false, + })) + .into_response(); + } + Json(crate::services::lifecycle::read_history( + &state.config.storage_root, + &bucket_name, + limit, + offset, + )) + .into_response() +} + +#[derive(Deserialize, Default)] +pub struct ReplicationFailuresQuery { + #[serde(default)] + pub limit: Option, + #[serde(default)] + pub offset: Option, +} + +#[derive(Deserialize)] +pub struct ReplicationObjectKeyQuery { + pub object_key: String, +} + +pub async fn replication_status( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + let Some(rule) = state.replication.get_rule(&bucket_name) else { + return json_error(StatusCode::NOT_FOUND, "No replication rule"); + }; + + let (endpoint_healthy, endpoint_error) = match state.connections.get(&rule.target_connection_id) + { + Some(conn) => { + let healthy = state.replication.check_endpoint(&conn).await; + let error = if healthy { + None + } else { + Some(format!("Cannot reach endpoint: {}", conn.endpoint_url)) + }; + (healthy, error) + } + None => (false, Some("Target connection not found".to_string())), + }; + + json_ok(json!({ + "enabled": rule.enabled, + "target_bucket": rule.target_bucket, + "target_connection_id": rule.target_connection_id, + "mode": rule.mode, + "objects_synced": rule.stats.objects_synced, + "objects_pending": rule.stats.objects_pending, + "objects_orphaned": rule.stats.objects_orphaned, + "bytes_synced": rule.stats.bytes_synced, + "last_sync_at": rule.stats.last_sync_at, + "last_sync_key": rule.stats.last_sync_key, + "endpoint_healthy": endpoint_healthy, + "endpoint_error": endpoint_error, + })) +} + +pub async fn replication_failures( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + let limit = q.limit.unwrap_or(50).clamp(1, 500); + let offset = q.offset.unwrap_or(0); + let failures = state + .replication + .get_failed_items(&bucket_name, limit, offset); + let total = state.replication.get_failure_count(&bucket_name); + json_ok(json!({ + "failures": failures, + "total": total, + "limit": limit, + "offset": offset, + })) +} + +pub async fn retry_replication_failure( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + retry_replication_failure_key(&state, &bucket_name, q.object_key.trim()).await +} + +pub async fn retry_replication_failure_path( + State(state): State, + Extension(_session): Extension, + Path((bucket_name, rest)): Path<(String, String)>, +) -> Response { + let Some(object_key) = rest.strip_suffix("/retry") else { + return json_error(StatusCode::NOT_FOUND, "Unknown replication failure action"); + }; + retry_replication_failure_key(&state, &bucket_name, object_key.trim()).await +} + +async fn retry_replication_failure_key( + state: &AppState, + bucket_name: &str, + object_key: &str, +) -> Response { + if object_key.is_empty() { + return json_error(StatusCode::BAD_REQUEST, "object_key is required"); + } + + if state + .replication + .retry_failed(bucket_name, object_key) + .await + { + json_ok(json!({ + "status": "submitted", + "object_key": object_key, + })) + } else { + json_error(StatusCode::BAD_REQUEST, "Failed to submit retry") + } +} + +pub async fn retry_all_replication_failures( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + let (submitted, skipped) = state.replication.retry_all(&bucket_name).await; + json_ok(json!({ + "status": "submitted", + "submitted": submitted, + "skipped": skipped, + })) +} + +pub async fn dismiss_replication_failure( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, + Query(q): Query, +) -> Response { + dismiss_replication_failure_key(&state, &bucket_name, q.object_key.trim()) +} + +pub async fn dismiss_replication_failure_path( + State(state): State, + Extension(_session): Extension, + Path((bucket_name, object_key)): Path<(String, String)>, +) -> Response { + dismiss_replication_failure_key(&state, &bucket_name, object_key.trim()) +} + +fn dismiss_replication_failure_key( + state: &AppState, + bucket_name: &str, + object_key: &str, +) -> Response { + if object_key.is_empty() { + return json_error(StatusCode::BAD_REQUEST, "object_key is required"); + } + + if state.replication.dismiss_failure(bucket_name, object_key) { + json_ok(json!({ + "status": "dismissed", + "object_key": object_key, + })) + } else { + json_error(StatusCode::NOT_FOUND, "Failure not found") + } +} + +pub async fn clear_replication_failures( + State(state): State, + Extension(_session): Extension, + Path(bucket_name): Path, +) -> Response { + state.replication.clear_failures(&bucket_name); + json_ok(json!({ "status": "cleared" })) +} + +static SERVER_START_TIME: std::sync::OnceLock = std::sync::OnceLock::new(); +static SYSINFO: std::sync::OnceLock> = std::sync::OnceLock::new(); + +async fn sample_system() -> (f64, u64, u64) { + let lock = SYSINFO.get_or_init(|| { + let mut system = System::new(); + system.refresh_cpu_usage(); + system.refresh_memory(); + Mutex::new(system) + }); + { + let mut system = lock.lock().unwrap(); + system.refresh_cpu_usage(); + } + tokio::time::sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL).await; + let mut system = lock.lock().unwrap(); + system.refresh_cpu_usage(); + system.refresh_memory(); + let cpu_percent = system.global_cpu_usage() as f64; + let mem_total = system.total_memory(); + let mem_used = system.used_memory(); + (cpu_percent, mem_used, mem_total) +} + +fn normalize_path_for_mount(path: &FsPath) -> String { + let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf()); + let raw = canonical.to_string_lossy().to_string(); + let stripped = raw.strip_prefix(r"\\?\").unwrap_or(&raw); + stripped.to_lowercase() +} + +fn sample_disk(path: &FsPath) -> (u64, u64) { + let disks = Disks::new_with_refreshed_list(); + let path_str = normalize_path_for_mount(path); + let mut best: Option<(usize, u64, u64)> = None; + for disk in disks.list() { + let mount_raw = disk.mount_point().to_string_lossy().to_string(); + let mount = mount_raw + .strip_prefix(r"\\?\") + .unwrap_or(&mount_raw) + .to_lowercase(); + let total = disk.total_space(); + let free = disk.available_space(); + if path_str.starts_with(&mount) { + let len = mount.len(); + match best { + Some((best_len, _, _)) if len <= best_len => {} + _ => best = Some((len, total, free)), + } + } + } + best.map(|(_, total, free)| (total, free)).unwrap_or((0, 0)) +} + +pub async fn collect_metrics(state: &AppState) -> Value { + let start_time = *SERVER_START_TIME.get_or_init(std::time::Instant::now); + let uptime_days = start_time.elapsed().as_secs_f64() / 86400.0; + + let buckets_list = state.storage.list_buckets().await.unwrap_or_default(); + let bucket_count = buckets_list.len() as u64; + + let mut total_objects: u64 = 0; + let mut total_bytes: u64 = 0; + let mut total_versions: u64 = 0; + for bucket in &buckets_list { + if let Ok(stats) = state.storage.bucket_stats(&bucket.name).await { + total_objects += stats.objects; + total_bytes += stats.bytes; + total_versions += stats.version_count; + } + } + + let (cpu_percent, mem_used, mem_total) = sample_system().await; + let mem_pct = if mem_total > 0 { + (mem_used as f64 / mem_total as f64) * 100.0 + } else { + 0.0 + }; + + let (disk_total, disk_free) = sample_disk(&state.config.storage_root); + let disk_used = disk_total.saturating_sub(disk_free); + let disk_pct = if disk_total > 0 { + (disk_used as f64 / disk_total as f64) * 100.0 + } else { + 0.0 + }; + + json!({ + "cpu_percent": cpu_percent, + "memory": { + "percent": mem_pct, + "used": human_size(mem_used), + "total": human_size(mem_total), + }, + "disk": { + "percent": disk_pct, + "free": human_size(disk_free), + "total": human_size(disk_total), + }, + "app": { + "storage_used": human_size(total_bytes), + "buckets": bucket_count, + "objects": total_objects, + "versions": total_versions, + "uptime_days": uptime_days.floor() as u64, + }, + }) +} + +pub async fn metrics_api(State(state): State) -> Response { + Json(collect_metrics(&state).await).into_response() +} + +#[derive(Deserialize, Default)] +pub struct HoursQuery { + #[serde(default)] + pub hours: Option, +} + +pub async fn metrics_history( + State(state): State, + Query(q): Query, +) -> Response { + let settings = metrics_settings_snapshot(&state); + match &state.system_metrics { + Some(metrics) => Json(json!({ + "enabled": settings.enabled, + "history": metrics.get_history(q.hours).await, + "interval_minutes": settings.interval_minutes, + "retention_hours": settings.retention_hours, + "hours_requested": q.hours.unwrap_or(settings.retention_hours), + })) + .into_response(), + None => Json(json!({ + "enabled": settings.enabled, + "history": [], + "interval_minutes": settings.interval_minutes, + "retention_hours": settings.retention_hours, + "hours_requested": q.hours.unwrap_or(settings.retention_hours), + })) + .into_response(), + } +} + +pub async fn metrics_operations(State(state): State) -> Response { + match &state.metrics { + Some(metrics) => { + let stats = metrics.get_current_stats(); + Json(json!({ + "enabled": true, + "stats": stats, + })) + .into_response() + } + None => Json(json!({ + "enabled": false, + "stats": null, + })) + .into_response(), + } +} + +pub async fn metrics_operations_history( + State(state): State, + Query(q): Query, +) -> Response { + match &state.metrics { + Some(metrics) => { + let history = metrics.get_history(q.hours.or(Some(24))); + Json(json!({ + "enabled": true, + "history": history, + "interval_minutes": 5, + })) + .into_response() + } + None => Json(json!({ + "enabled": false, + "history": [], + "interval_minutes": 5, + })) + .into_response(), + } +} diff --git a/crates/myfsio-server/src/handlers/ui_pages.rs b/crates/myfsio-server/src/handlers/ui_pages.rs new file mode 100644 index 0000000..2214609 --- /dev/null +++ b/crates/myfsio-server/src/handlers/ui_pages.rs @@ -0,0 +1,3094 @@ +use std::collections::HashMap; + +use axum::body::Body; +use axum::extract::{Extension, Form, Path, Query, State}; +use axum::http::{header, HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Redirect, Response}; +use http_body_util::BodyExt; +use serde_json::{json, Value}; +use tera::Context; + +use crate::handlers::ui::{base_context, inject_flash, render}; +use crate::middleware::session::SessionHandle; +use crate::state::AppState; +use crate::templates::TemplateEngine; +use myfsio_storage::traits::StorageEngine; + +pub fn register_ui_endpoints(engine: &TemplateEngine) { + engine.register_endpoints(&[ + ("ui.login", "/login"), + ("ui.logout", "/logout"), + ("ui.buckets_overview", "/ui/buckets"), + ("ui.bucket_detail", "/ui/buckets/{bucket_name}"), + ("ui.create_bucket", "/ui/buckets/create"), + ("ui.delete_bucket", "/ui/buckets/{bucket_name}/delete"), + ( + "ui.update_bucket_versioning", + "/ui/buckets/{bucket_name}/versioning", + ), + ("ui.update_bucket_quota", "/ui/buckets/{bucket_name}/quota"), + ( + "ui.update_bucket_encryption", + "/ui/buckets/{bucket_name}/encryption", + ), + ( + "ui.update_bucket_policy", + "/ui/buckets/{bucket_name}/policy", + ), + ( + "ui.update_bucket_replication", + "/ui/buckets/{bucket_name}/replication", + ), + ( + "ui.update_bucket_website", + "/ui/buckets/{bucket_name}/website", + ), + ("ui.upload_object", "/ui/buckets/{bucket_name}/upload"), + ( + "ui.bulk_delete_objects", + "/ui/buckets/{bucket_name}/bulk-delete", + ), + ( + "ui.bulk_download_objects", + "/ui/buckets/{bucket_name}/bulk-download", + ), + ("ui.archived_objects", "/ui/buckets/{bucket_name}/archived"), + ( + "ui.initiate_multipart_upload", + "/ui/buckets/{bucket_name}/multipart/initiate", + ), + ( + "ui.upload_multipart_part", + "/ui/buckets/{bucket_name}/multipart/{upload_id}/part", + ), + ( + "ui.complete_multipart_upload", + "/ui/buckets/{bucket_name}/multipart/{upload_id}/complete", + ), + ( + "ui.abort_multipart_upload", + "/ui/buckets/{bucket_name}/multipart/{upload_id}/abort", + ), + ( + "ui.get_lifecycle_history", + "/ui/buckets/{bucket_name}/lifecycle/history", + ), + ( + "ui.get_replication_status", + "/ui/buckets/{bucket_name}/replication/status", + ), + ( + "ui.get_replication_failures", + "/ui/buckets/{bucket_name}/replication/failures", + ), + ( + "ui.clear_replication_failures", + "/ui/buckets/{bucket_name}/replication/failures/clear", + ), + ( + "ui.retry_all_replication_failures", + "/ui/buckets/{bucket_name}/replication/failures/retry-all", + ), + ( + "ui.retry_replication_failure", + "/ui/buckets/{bucket_name}/replication/failures/retry", + ), + ( + "ui.dismiss_replication_failure", + "/ui/buckets/{bucket_name}/replication/failures/dismiss", + ), + ("ui.replication_wizard", "/ui/replication/new"), + ( + "ui.create_peer_replication_rules", + "/ui/sites/peers/{site_id}/replication-rules", + ), + ("ui.iam_dashboard", "/ui/iam"), + ("ui.create_iam_user", "/ui/iam/users"), + ("ui.update_iam_user", "/ui/iam/users/{user_id}"), + ("ui.delete_iam_user", "/ui/iam/users/{user_id}/delete"), + ("ui.update_iam_policies", "/ui/iam/users/{user_id}/policies"), + ("ui.update_iam_expiry", "/ui/iam/users/{user_id}/expiry"), + ( + "ui.rotate_iam_secret", + "/ui/iam/users/{user_id}/rotate-secret", + ), + ("ui.connections_dashboard", "/ui/connections"), + ("ui.create_connection", "/ui/connections/create"), + ("ui.update_connection", "/ui/connections/{connection_id}"), + ( + "ui.delete_connection", + "/ui/connections/{connection_id}/delete", + ), + ("ui.test_connection", "/ui/connections/test"), + ("ui.sites_dashboard", "/ui/sites"), + ("ui.update_local_site", "/ui/sites/local"), + ("ui.add_peer_site", "/ui/sites/peers"), + ("ui.metrics_dashboard", "/ui/metrics"), + ("ui.system_dashboard", "/ui/system"), + ("ui.system_gc_status", "/ui/system/gc/status"), + ("ui.system_gc_run", "/ui/system/gc/run"), + ("ui.system_gc_history", "/ui/system/gc/history"), + ("ui.system_integrity_status", "/ui/system/integrity/status"), + ("ui.system_integrity_run", "/ui/system/integrity/run"), + ( + "ui.system_integrity_history", + "/ui/system/integrity/history", + ), + ("ui.website_domains_dashboard", "/ui/website-domains"), + ("ui.create_website_domain", "/ui/website-domains/create"), + ("ui.update_website_domain", "/ui/website-domains/{domain}"), + ( + "ui.delete_website_domain", + "/ui/website-domains/{domain}/delete", + ), + ("ui.docs_page", "/ui/docs"), + ]); +} + +fn page_context(state: &AppState, session: &SessionHandle, endpoint: &str) -> Context { + let mut ctx = base_context(session, Some(endpoint)); + let principal = session.read(|s| { + s.user_id.as_ref().map(|uid| { + json!({ + "access_key": uid, + "user_id": uid, + "display_name": s + .display_name + .clone() + .unwrap_or_else(|| uid.clone()), + "is_admin": true, + }) + }) + }); + match principal { + Some(p) => ctx.insert("principal", &p), + None => ctx.insert("principal", &Value::Null), + } + ctx.insert("can_manage_iam", &true); + ctx.insert("can_manage_replication", &true); + ctx.insert("can_manage_sites", &true); + ctx.insert("can_manage_encryption", &state.config.encryption_enabled); + ctx.insert("website_hosting_nav", &state.config.website_hosting_enabled); + ctx.insert("encryption_enabled", &state.config.encryption_enabled); + ctx.insert("kms_enabled", &state.config.kms_enabled); + + let flashed = session.write(|s| s.take_flash()); + inject_flash(&mut ctx, flashed); + ctx +} + +fn human_size(bytes: u64) -> String { + const UNITS: [&str; 6] = ["B", "KB", "MB", "GB", "TB", "PB"]; + let mut size = bytes as f64; + let mut idx = 0usize; + while size >= 1024.0 && idx < UNITS.len() - 1 { + size /= 1024.0; + idx += 1; + } + if idx == 0 { + format!("{} {}", bytes, UNITS[idx]) + } else { + format!("{:.1} {}", size, UNITS[idx]) + } +} + +fn wants_json(headers: &HeaderMap) -> bool { + headers + .get("x-requested-with") + .and_then(|value| value.to_str().ok()) + .map(|value| value.eq_ignore_ascii_case("xmlhttprequest")) + .unwrap_or(false) + || headers + .get(header::ACCEPT) + .and_then(|value| value.to_str().ok()) + .map(|value| value.contains("application/json")) + .unwrap_or(false) +} + +async fn parse_form_any( + headers: &HeaderMap, + body: Body, +) -> Result, String> { + let content_type = headers + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + let is_multipart = content_type + .to_ascii_lowercase() + .starts_with("multipart/form-data"); + + let bytes = body + .collect() + .await + .map_err(|e| format!("Failed to read request body: {}", e))? + .to_bytes(); + + if is_multipart { + let boundary = multer::parse_boundary(&content_type) + .map_err(|_| "Missing multipart boundary".to_string())?; + let stream = futures::stream::once(async move { Ok::<_, std::io::Error>(bytes) }); + let mut multipart = multer::Multipart::new(stream, boundary); + let mut out = HashMap::new(); + while let Some(field) = multipart + .next_field() + .await + .map_err(|e| format!("Malformed multipart body: {}", e))? + { + let name = match field.name() { + Some(name) => name.to_string(), + None => continue, + }; + if field.file_name().is_some() { + continue; + } + let value = field + .text() + .await + .map_err(|e| format!("Invalid multipart field '{}': {}", name, e))?; + out.insert(name, value); + } + Ok(out) + } else { + let parsed: Vec<(String, String)> = serde_urlencoded::from_bytes(&bytes) + .map_err(|e| format!("Invalid form body: {}", e))?; + Ok(parsed.into_iter().collect()) + } +} + +fn bucket_tab_redirect(bucket_name: &str, tab: &str) -> Response { + Redirect::to(&format!("/ui/buckets/{}?tab={}", bucket_name, tab)).into_response() +} + +fn default_public_policy(bucket_name: &str) -> String { + serde_json::to_string_pretty(&json!({ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowList", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:ListBucket"], + "Resource": [format!("arn:aws:s3:::{}", bucket_name)], + }, + { + "Sid": "AllowRead", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:GetObject"], + "Resource": [format!("arn:aws:s3:::{}/*", bucket_name)], + } + ] + })) + .unwrap_or_else(|_| "{}".to_string()) +} + +fn parse_api_base(state: &AppState) -> (String, String) { + let api_base = state.config.api_base_url.trim_end_matches('/').to_string(); + let api_host = api_base + .split("://") + .nth(1) + .unwrap_or(&api_base) + .split('/') + .next() + .unwrap_or("") + .to_string(); + (api_base, api_host) +} + +fn config_encryption_to_ui(value: Option<&Value>) -> Value { + match value { + Some(Value::Object(map)) => Value::Object(map.clone()), + Some(Value::String(s)) => { + serde_json::from_str(s).unwrap_or_else(|_| json!({ "Rules": [] })) + } + _ => json!({ "Rules": [] }), + } +} + +fn config_website_to_ui(value: Option<&Value>) -> Value { + let parsed = match value { + Some(Value::Object(map)) => Value::Object(map.clone()), + Some(Value::String(s)) => serde_json::from_str(s).unwrap_or(Value::Null), + _ => Value::Null, + }; + + let Some(map) = parsed.as_object() else { + return Value::Null; + }; + + json!({ + "index_document": map + .get("index_document") + .and_then(Value::as_str) + .unwrap_or("index.html"), + "error_document": map.get("error_document").and_then(Value::as_str), + }) +} + +fn bucket_access_descriptor( + policy: Option<&Value>, + bucket_name: &str, +) -> (&'static str, &'static str) { + let Some(policy) = policy else { + return ("Private", "bg-secondary-subtle text-secondary-emphasis"); + }; + + let default_policy = default_public_policy(bucket_name); + let default_policy_value: Value = serde_json::from_str(&default_policy).unwrap_or(Value::Null); + if *policy == default_policy_value { + return ("Public Read", "bg-warning-subtle text-warning-emphasis"); + } + + ("Custom policy", "bg-info-subtle text-info-emphasis") +} + +pub async fn buckets_overview( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.buckets_overview"); + + let buckets = match state.storage.list_buckets().await { + Ok(list) => list, + Err(e) => { + tracing::error!("list_buckets failed: {}", e); + Vec::new() + } + }; + + let mut items: Vec = Vec::with_capacity(buckets.len()); + for b in &buckets { + let stats = state.storage.bucket_stats(&b.name).await.ok(); + let total_bytes = stats.as_ref().map(|s| s.total_bytes()).unwrap_or(0); + let total_objects = stats.as_ref().map(|s| s.total_objects()).unwrap_or(0); + let policy = state + .storage + .get_bucket_config(&b.name) + .await + .ok() + .and_then(|cfg| cfg.policy); + let (access_label, access_badge) = bucket_access_descriptor(policy.as_ref(), &b.name); + + items.push(json!({ + "meta": { + "name": b.name, + "creation_date": b.creation_date.to_rfc3339(), + }, + "summary": { + "human_size": human_size(total_bytes), + "objects": total_objects, + }, + "detail_url": format!("/ui/buckets/{}", b.name), + "access_badge": access_badge, + "access_label": access_label, + })); + } + + ctx.insert("buckets", &items); + render(&state, "buckets.html", &ctx) +} + +pub async fn bucket_detail( + State(state): State, + Extension(session): Extension, + Path(bucket_name): Path, + Query(request_args): Query>, +) -> Response { + if !matches!(state.storage.bucket_exists(&bucket_name).await, Ok(true)) { + session.write(|s| { + s.push_flash( + "danger", + format!("Bucket '{}' does not exist.", bucket_name), + ) + }); + return Redirect::to("/ui/buckets").into_response(); + } + + let mut ctx = page_context(&state, &session, "ui.bucket_detail"); + ctx.insert("request_args", &request_args); + let bucket_meta = state + .storage + .list_buckets() + .await + .ok() + .and_then(|list| list.into_iter().find(|b| b.name == bucket_name)); + let bucket_config = state + .storage + .get_bucket_config(&bucket_name) + .await + .unwrap_or_default(); + let bucket_stats = state + .storage + .bucket_stats(&bucket_name) + .await + .unwrap_or_default(); + let replication_rule = state.replication.get_rule(&bucket_name); + let target_conn = replication_rule + .as_ref() + .and_then(|rule| state.connections.get(&rule.target_connection_id)); + let versioning_status_enum = state + .storage + .get_versioning_status(&bucket_name) + .await + .unwrap_or(myfsio_common::types::VersioningStatus::Disabled); + let versioning_enabled = + matches!(versioning_status_enum, myfsio_common::types::VersioningStatus::Enabled); + let versioning_suspended = matches!( + versioning_status_enum, + myfsio_common::types::VersioningStatus::Suspended + ); + let encryption_config = config_encryption_to_ui(bucket_config.encryption.as_ref()); + let website_config = config_website_to_ui(bucket_config.website.as_ref()); + let quota = bucket_config.quota.clone(); + let max_bytes = quota.as_ref().and_then(|q| q.max_bytes); + let max_objects = quota.as_ref().and_then(|q| q.max_objects); + let bucket_policy = bucket_config.policy.clone().unwrap_or(Value::Null); + let bucket_policy_text = if bucket_policy.is_null() { + String::new() + } else { + serde_json::to_string_pretty(&bucket_policy).unwrap_or_else(|_| bucket_policy.to_string()) + }; + let default_policy = default_public_policy(&bucket_name); + let default_policy_value: Value = serde_json::from_str(&default_policy).unwrap_or(Value::Null); + let preset_choice = if bucket_policy.is_null() { + "private" + } else if bucket_policy == default_policy_value { + "public" + } else { + "custom" + }; + ctx.insert("bucket_name", &bucket_name); + ctx.insert( + "bucket", + &json!({ + "name": bucket_name, + "creation_date": bucket_meta + .as_ref() + .map(|b| b.creation_date.to_rfc3339()) + .unwrap_or_else(|| chrono::Utc::now().to_rfc3339()), + }), + ); + ctx.insert("objects", &Vec::::new()); + ctx.insert("prefixes", &Vec::::new()); + ctx.insert("total_objects", &bucket_stats.total_objects()); + ctx.insert("total_bytes", &bucket_stats.total_bytes()); + ctx.insert("current_objects", &bucket_stats.objects); + ctx.insert("current_bytes", &bucket_stats.bytes); + ctx.insert("version_count", &bucket_stats.version_count); + ctx.insert("version_bytes", &bucket_stats.version_bytes); + ctx.insert("max_objects", &max_objects); + ctx.insert("max_bytes", &max_bytes); + ctx.insert("has_max_objects", &max_objects.is_some()); + ctx.insert("has_max_bytes", &max_bytes.is_some()); + ctx.insert( + "obj_pct", + &max_objects + .map(|m| { + ((bucket_stats.total_objects() as f64 / m.max(1) as f64) * 100.0).round() as u64 + }) + .unwrap_or(0), + ); + ctx.insert( + "bytes_pct", + &max_bytes + .map(|m| ((bucket_stats.total_bytes() as f64 / m.max(1) as f64) * 100.0).round() as u64) + .unwrap_or(0), + ); + ctx.insert("has_quota", "a.is_some()); + ctx.insert("versioning_enabled", &versioning_enabled); + ctx.insert("versioning_suspended", &versioning_suspended); + ctx.insert( + "versioning_status", + &(match versioning_status_enum { + myfsio_common::types::VersioningStatus::Enabled => "Enabled", + myfsio_common::types::VersioningStatus::Suspended => "Suspended", + myfsio_common::types::VersioningStatus::Disabled => "Disabled", + }), + ); + ctx.insert("encryption_config", &encryption_config); + ctx.insert("enc_rules", &Vec::::new()); + ctx.insert("enc_algorithm", &""); + ctx.insert("enc_kms_key", &""); + let replication_rules = replication_rule + .clone() + .and_then(|rule| serde_json::to_value(rule).ok()) + .map(|rule| vec![rule]) + .unwrap_or_default(); + ctx.insert("replication_rules", &replication_rules); + ctx.insert( + "replication_rule", + &replication_rule + .clone() + .and_then(|rule| serde_json::to_value(rule).ok()) + .unwrap_or(Value::Null), + ); + ctx.insert("website_config", &website_config); + ctx.insert("bucket_policy", &bucket_policy); + ctx.insert("bucket_policy_text", &bucket_policy_text); + ctx.insert("preset_choice", &preset_choice); + let conns: Vec = state + .connections + .list() + .into_iter() + .map(|c| { + json!({ + "id": c.id, + "name": c.name, + "endpoint_url": c.endpoint_url, + "region": c.region, + "access_key": c.access_key, + }) + }) + .collect(); + ctx.insert("connections", &conns); + ctx.insert("current_prefix", &""); + ctx.insert("parent_prefix", &""); + ctx.insert("has_more", &false); + ctx.insert("next_token", &""); + ctx.insert( + "active_tab", + &request_args + .get("tab") + .cloned() + .unwrap_or_else(|| "objects".to_string()), + ); + let multipart_uploads: Vec = state + .storage + .list_multipart_uploads(&bucket_name) + .await + .unwrap_or_default() + .into_iter() + .map(|u| { + json!({ + "upload_id": u.upload_id, + "key": u.key, + "initiated": u.initiated.to_rfc3339(), + }) + }) + .collect(); + ctx.insert("multipart_uploads", &multipart_uploads); + ctx.insert( + "target_conn", + &target_conn + .as_ref() + .and_then(|conn| serde_json::to_value(conn).ok()) + .unwrap_or(Value::Null), + ); + ctx.insert( + "target_conn_name", + &target_conn + .as_ref() + .map(|conn| conn.name.clone()) + .unwrap_or_default(), + ); + ctx.insert("default_policy", &default_policy); + ctx.insert("can_manage_cors", &true); + ctx.insert("can_manage_lifecycle", &true); + ctx.insert("can_manage_quota", &true); + ctx.insert("can_manage_versioning", &true); + ctx.insert("can_manage_website", &true); + ctx.insert("can_edit_policy", &true); + ctx.insert("is_replication_admin", &true); + ctx.insert("lifecycle_enabled", &state.config.lifecycle_enabled); + ctx.insert("site_sync_enabled", &state.config.site_sync_enabled); + ctx.insert( + "website_hosting_enabled", + &state.config.website_hosting_enabled, + ); + let website_domains: Vec = state + .website_domains + .as_ref() + .map(|store| { + store + .list_all() + .into_iter() + .filter_map(|entry| { + if entry.get("bucket").and_then(|v| v.as_str()) == Some(bucket_name.as_str()) { + entry + .get("domain") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + } else { + None + } + }) + .collect() + }) + .unwrap_or_default(); + ctx.insert("website_domains", &website_domains); + let kms_keys: Vec = if let Some(kms) = &state.kms { + kms.list_keys() + .await + .into_iter() + .map(|key| { + json!({ + "key_id": key.key_id, + "description": key.description, + }) + }) + .collect() + } else { + Vec::new() + }; + ctx.insert("kms_keys", &kms_keys); + ctx.insert( + "bucket_stats", + &json!({ + "bytes": bucket_stats.bytes, + "objects": bucket_stats.objects, + "total_bytes": bucket_stats.total_bytes(), + "total_objects": bucket_stats.total_objects(), + "version_bytes": bucket_stats.version_bytes, + "version_count": bucket_stats.version_count + }), + ); + ctx.insert( + "bucket_quota", + &json!({ "max_bytes": max_bytes, "max_objects": max_objects }), + ); + ctx.insert( + "buckets_for_copy_url", + &format!("/ui/buckets/{}/copy-targets", bucket_name), + ); + ctx.insert("acl_url", &format!("/ui/buckets/{}/acl", bucket_name)); + ctx.insert("cors_url", &format!("/ui/buckets/{}/cors", bucket_name)); + ctx.insert( + "folders_url", + &format!("/ui/buckets/{}/folders", bucket_name), + ); + ctx.insert( + "lifecycle_url", + &format!("/ui/buckets/{}/lifecycle", bucket_name), + ); + ctx.insert( + "objects_api_url", + &format!("/ui/buckets/{}/objects", bucket_name), + ); + ctx.insert( + "objects_stream_url", + &format!("/ui/buckets/{}/objects/stream", bucket_name), + ); + render(&state, "bucket_detail.html", &ctx) +} + +pub async fn iam_dashboard( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.iam_dashboard"); + let now = chrono::Utc::now(); + let soon = now + chrono::Duration::days(7); + let raw_users = state.iam.list_users().await; + let mut users: Vec = Vec::with_capacity(raw_users.len()); + for u in raw_users.iter() { + let user_id = u + .get("user_id") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let display_name = u + .get("display_name") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let enabled = u.get("enabled").and_then(|v| v.as_bool()).unwrap_or(true); + let access_key = u + .get("access_keys") + .and_then(|v| v.as_array()) + .and_then(|arr| { + arr.iter().find_map(|k| { + k.get("access_key") + .and_then(|x| x.as_str()) + .map(|s| s.to_string()) + }) + }) + .unwrap_or_default(); + + let detail = state.iam.get_user(&user_id).await; + let policies = detail + .as_ref() + .and_then(|d| d.get("policies").cloned()) + .unwrap_or(Value::Array(Vec::new())); + let expires_at = detail + .as_ref() + .and_then(|d| d.get("expires_at").cloned()) + .unwrap_or(Value::Null); + let is_admin = policies + .as_array() + .map(|items| { + items.iter().any(|policy| { + policy + .get("actions") + .and_then(|value| value.as_array()) + .map(|actions| { + actions + .iter() + .any(|action| matches!(action.as_str(), Some("*") | Some("iam:*"))) + }) + .unwrap_or(false) + }) + }) + .unwrap_or(false); + let expires_dt = expires_at.as_str().and_then(|value| { + chrono::DateTime::parse_from_rfc3339(value) + .ok() + .map(|dt| dt.with_timezone(&chrono::Utc)) + }); + let is_expired = expires_dt.map(|dt| dt <= now).unwrap_or(false); + let is_expiring_soon = expires_dt.map(|dt| dt > now && dt <= soon).unwrap_or(false); + let access_keys = u + .get("access_keys") + .cloned() + .unwrap_or(Value::Array(Vec::new())); + + users.push(json!({ + "user_id": user_id, + "access_key": access_key, + "display_name": display_name, + "enabled": enabled, + "is_enabled": enabled, + "expires_at": expires_at, + "is_admin": is_admin, + "is_expired": is_expired, + "is_expiring_soon": is_expiring_soon, + "access_keys": access_keys, + "policies": policies, + "policy_count": u.get("policy_count").cloned().unwrap_or(Value::from(0)), + })); + } + let all_buckets: Vec = state + .storage + .list_buckets() + .await + .map(|list| list.into_iter().map(|b| b.name).collect()) + .unwrap_or_default(); + ctx.insert("users", &users); + ctx.insert("iam_locked", &false); + ctx.insert("locked_reason", &""); + ctx.insert("iam_disabled", &false); + ctx.insert("all_buckets", &all_buckets); + ctx.insert("disclosed_secret", &Value::Null); + let config_doc = + serde_json::to_string_pretty(&state.iam.export_config(true)).unwrap_or_default(); + ctx.insert("config_document", &config_doc); + ctx.insert("config_summary", &json!({ "user_count": users.len() })); + render(&state, "iam.html", &ctx) +} + +#[derive(serde::Deserialize)] +pub struct CreateIamUserForm { + pub display_name: Option, + pub access_key: Option, + pub secret_key: Option, + pub policies: Option, + pub expires_at: Option, + #[serde(default)] + pub csrf_token: String, +} + +fn parse_policies(raw: &str) -> Result, String> { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return Ok(vec![]); + } + serde_json::from_str::>(trimmed) + .map_err(|e| format!("Invalid policies JSON: {}", e)) +} + +fn normalize_expires_at(raw: Option) -> Result, String> { + let Some(value) = raw else { + return Ok(None); + }; + let trimmed = value.trim(); + if trimmed.is_empty() { + return Ok(None); + } + if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(trimmed) { + return Ok(Some(dt.with_timezone(&chrono::Utc).to_rfc3339())); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M") { + return Ok(Some(naive.and_utc().to_rfc3339())); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S") { + return Ok(Some(naive.and_utc().to_rfc3339())); + } + Err("Invalid expiry date format".to_string()) +} + +pub async fn create_iam_user( + State(state): State, + Extension(session): Extension, + headers: HeaderMap, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let wants_json = wants_json(&headers); + let display_name = form + .display_name + .as_deref() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "Unnamed".to_string()); + + if display_name.len() > 64 { + let message = "Display name must be 64 characters or fewer".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/iam").into_response(); + } + + let policies = match form.policies.as_deref().map(parse_policies) { + Some(Ok(p)) if !p.is_empty() => Some(p), + Some(Ok(_)) | None => None, + Some(Err(e)) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + return Redirect::to("/ui/iam").into_response(); + } + }; + + let expires_at = match normalize_expires_at(form.expires_at) { + Ok(v) => v, + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + return Redirect::to("/ui/iam").into_response(); + } + }; + + let custom_access_key = form + .access_key + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); + let custom_secret_key = form + .secret_key + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); + + match state.iam.create_user( + &display_name, + policies.clone(), + custom_access_key, + custom_secret_key, + expires_at, + ) { + Ok(created) => { + let user_id = created + .get("user_id") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let access_key = created + .get("access_key") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let secret_key = created + .get("secret_key") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let message = format!("Created user {}", access_key); + if wants_json { + return axum::Json(json!({ + "success": true, + "message": message, + "user_id": user_id, + "access_key": access_key, + "secret_key": secret_key, + "display_name": display_name, + "expires_at": created.get("expires_at").cloned().unwrap_or(Value::Null), + "policies": policies.unwrap_or_default(), + })) + .into_response(); + } + session + .write(|s| s.push_flash("success", format!("{}. Copy the secret now.", message))); + Redirect::to("/ui/iam").into_response() + } + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + Redirect::to("/ui/iam").into_response() + } + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateIamUserForm { + pub display_name: Option, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_iam_user( + State(state): State, + Extension(session): Extension, + Path(user_id): Path, + headers: HeaderMap, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let wants_json = wants_json(&headers); + let display_name = form + .display_name + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); + + match state.iam.update_user(&user_id, display_name, None) { + Ok(()) => { + if wants_json { + let display_name = state + .iam + .get_user(&user_id) + .await + .and_then(|user| { + user.get("display_name") + .and_then(|value| value.as_str()) + .map(ToString::to_string) + }) + .unwrap_or_default(); + return axum::Json(json!({ + "success": true, + "user_id": user_id, + "display_name": display_name, + })) + .into_response(); + } + session.write(|s| s.push_flash("success", "User updated.")); + Redirect::to("/ui/iam").into_response() + } + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + Redirect::to("/ui/iam").into_response() + } + } +} + +pub async fn delete_iam_user( + State(state): State, + Extension(session): Extension, + Path(user_id): Path, + headers: HeaderMap, +) -> Response { + let wants_json = wants_json(&headers); + match state.iam.delete_user(&user_id) { + Ok(()) => { + if wants_json { + return axum::Json(json!({ "success": true })).into_response(); + } + session.write(|s| s.push_flash("success", "User deleted.")); + Redirect::to("/ui/iam").into_response() + } + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + Redirect::to("/ui/iam").into_response() + } + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateIamPoliciesForm { + pub policies: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_iam_policies( + State(state): State, + Extension(session): Extension, + Path(user_id): Path, + headers: HeaderMap, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let wants_json = wants_json(&headers); + let policies = match parse_policies(&form.policies) { + Ok(p) => p, + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + return Redirect::to("/ui/iam").into_response(); + } + }; + + match state.iam.update_user_policies(&user_id, policies) { + Ok(()) => { + if wants_json { + let policies = state + .iam + .get_user(&user_id) + .await + .and_then(|user| user.get("policies").cloned()) + .unwrap_or_else(|| Value::Array(Vec::new())); + return axum::Json(json!({ + "success": true, + "user_id": user_id, + "policies": policies, + })) + .into_response(); + } + session.write(|s| s.push_flash("success", "Policies updated.")); + Redirect::to("/ui/iam").into_response() + } + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + Redirect::to("/ui/iam").into_response() + } + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateIamExpiryForm { + pub expires_at: Option, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_iam_expiry( + State(state): State, + Extension(session): Extension, + Path(user_id): Path, + headers: HeaderMap, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let wants_json = wants_json(&headers); + let expires_at = match normalize_expires_at(form.expires_at) { + Ok(v) => v, + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + return Redirect::to("/ui/iam").into_response(); + } + }; + + match state.iam.update_user(&user_id, None, Some(expires_at)) { + Ok(()) => { + if wants_json { + return axum::Json(json!({ "success": true })).into_response(); + } + session.write(|s| s.push_flash("success", "Expiry updated.")); + Redirect::to("/ui/iam").into_response() + } + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + Redirect::to("/ui/iam").into_response() + } + } +} + +pub async fn rotate_iam_secret( + State(state): State, + Extension(session): Extension, + Path(user_id): Path, + headers: HeaderMap, +) -> Response { + let wants_json = wants_json(&headers); + match state.iam.rotate_secret(&user_id) { + Ok(result) => { + let access_key = result + .get("access_key") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let secret_key = result + .get("secret_key") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + if wants_json { + return axum::Json(json!({ + "success": true, + "access_key": access_key, + "secret_key": secret_key, + })) + .into_response(); + } + session + .write(|s| s.push_flash("success", format!("Secret rotated for {}.", access_key))); + Redirect::to("/ui/iam").into_response() + } + Err(e) => { + if wants_json { + return (StatusCode::BAD_REQUEST, axum::Json(json!({ "error": e }))) + .into_response(); + } + session.write(|s| s.push_flash("danger", e)); + Redirect::to("/ui/iam").into_response() + } + } +} + +pub async fn sites_dashboard( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.sites_dashboard"); + + let local_site = state + .site_registry + .as_ref() + .and_then(|reg| reg.get_local_site()) + .map(|s| { + json!({ + "site_id": s.site_id, + "display_name": s.display_name, + "endpoint": s.endpoint, + "region": s.region, + "priority": s.priority, + }) + }) + .unwrap_or(Value::Null); + + let peers: Vec = state + .site_registry + .as_ref() + .map(|reg| { + reg.list_peers() + .into_iter() + .map(|p| { + json!({ + "site_id": p.site_id, + "display_name": p.display_name, + "endpoint": p.endpoint, + "region": p.region, + "priority": p.priority, + "connection_id": p.connection_id, + "is_healthy": p.is_healthy, + "last_health_check": p.last_health_check, + }) + }) + .collect() + }) + .unwrap_or_default(); + + let peers_with_stats: Vec = peers + .iter() + .cloned() + .map(|peer| { + let has_connection = peer + .get("connection_id") + .and_then(|value| value.as_str()) + .map(|value| !value.is_empty()) + .unwrap_or(false); + json!({ + "peer": peer, + "has_connection": has_connection, + "buckets_syncing": 0, + "has_bidirectional": false, + }) + }) + .collect(); + + let conns: Vec = state + .connections + .list() + .into_iter() + .map(|c| { + json!({ + "id": c.id, + "name": c.name, + "endpoint_url": c.endpoint_url, + "region": c.region, + "access_key": c.access_key, + }) + }) + .collect(); + + ctx.insert("local_site", &local_site); + ctx.insert("peers", &peers); + ctx.insert("peers_with_stats", &peers_with_stats); + ctx.insert("connections", &conns); + ctx.insert( + "config_site_id", + &state.config.site_id.clone().unwrap_or_default(), + ); + ctx.insert( + "config_site_endpoint", + &state.config.site_endpoint.clone().unwrap_or_default(), + ); + ctx.insert("config_site_region", &state.config.site_region); + ctx.insert("topology", &json!({"sites": [], "connections": []})); + render(&state, "sites.html", &ctx) +} + +#[derive(serde::Deserialize)] +pub struct LocalSiteForm { + pub site_id: String, + #[serde(default)] + pub endpoint: String, + #[serde(default = "default_site_region")] + pub region: String, + #[serde(default = "default_site_priority")] + pub priority: i32, + #[serde(default)] + pub display_name: String, + #[serde(default)] + pub csrf_token: String, +} + +#[derive(serde::Deserialize)] +pub struct PeerSiteForm { + pub site_id: String, + pub endpoint: String, + #[serde(default = "default_site_region")] + pub region: String, + #[serde(default = "default_site_priority")] + pub priority: i32, + #[serde(default)] + pub display_name: String, + #[serde(default)] + pub connection_id: String, + #[serde(default)] + pub csrf_token: String, +} + +#[derive(serde::Deserialize, Default)] +pub struct DeletePeerSiteForm { + #[serde(default)] + pub csrf_token: String, +} + +fn default_site_region() -> String { + "us-east-1".to_string() +} + +fn default_site_priority() -> i32 { + 100 +} + +pub async fn update_local_site( + State(state): State, + Extension(session): Extension, + headers: HeaderMap, + Form(form): Form, +) -> Response { + let wants_json = wants_json(&headers); + let site_id = form.site_id.trim().to_string(); + if site_id.is_empty() { + let message = "Site ID is required.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + } + + let Some(registry) = &state.site_registry else { + let message = "Site registry is not available.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + }; + + let existing = registry.get_local_site(); + let site = crate::services::site_registry::SiteInfo { + site_id: site_id.clone(), + endpoint: form.endpoint.trim().to_string(), + region: form.region.trim().to_string(), + priority: form.priority, + display_name: { + let display_name = form.display_name.trim(); + if display_name.is_empty() { + site_id.clone() + } else { + display_name.to_string() + } + }, + created_at: existing.and_then(|site| site.created_at), + }; + registry.set_local_site(site); + + let message = "Local site configuration updated".to_string(); + if wants_json { + return axum::Json(json!({ "ok": true, "message": message })).into_response(); + } + session.write(|s| s.push_flash("success", message)); + Redirect::to("/ui/sites").into_response() +} + +pub async fn add_peer_site( + State(state): State, + Extension(session): Extension, + headers: HeaderMap, + Form(form): Form, +) -> Response { + let wants_json = wants_json(&headers); + let site_id = form.site_id.trim().to_string(); + let endpoint = form.endpoint.trim().to_string(); + if site_id.is_empty() { + let message = "Site ID is required.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + } + if endpoint.is_empty() { + let message = "Endpoint is required.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + } + + let Some(registry) = &state.site_registry else { + let message = "Site registry is not available.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + }; + + if registry.get_peer(&site_id).is_some() { + let message = format!("Peer site '{}' already exists.", site_id); + if wants_json { + return ( + StatusCode::CONFLICT, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + } + + let connection_id = { + let value = form.connection_id.trim(); + if value.is_empty() { + None + } else { + Some(value.to_string()) + } + }; + if let Some(connection_id) = connection_id.as_deref() { + if state.connections.get(connection_id).is_none() { + let message = format!("Connection '{}' not found.", connection_id); + if wants_json { + return ( + StatusCode::NOT_FOUND, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + } + } + + let has_connection = connection_id.is_some(); + let peer = crate::services::site_registry::PeerSite { + site_id: site_id.clone(), + endpoint, + region: form.region.trim().to_string(), + priority: form.priority, + display_name: { + let display_name = form.display_name.trim(); + if display_name.is_empty() { + site_id.clone() + } else { + display_name.to_string() + } + }, + connection_id: connection_id.clone(), + created_at: None, + is_healthy: false, + last_health_check: None, + }; + registry.add_peer(peer); + + let message = format!("Peer site '{}' added.", site_id); + if wants_json { + let redirect = if has_connection { + Some(format!("/ui/replication/new?site_id={}", site_id)) + } else { + None + }; + return axum::Json(json!({ + "ok": true, + "message": message, + "redirect": redirect, + })) + .into_response(); + } + session.write(|s| s.push_flash("success", message)); + if has_connection { + return Redirect::to(&format!("/ui/replication/new?site_id={}", site_id)).into_response(); + } + Redirect::to("/ui/sites").into_response() +} + +pub async fn update_peer_site( + State(state): State, + Extension(session): Extension, + Path(site_id): Path, + headers: HeaderMap, + Form(form): Form, +) -> Response { + let wants_json = wants_json(&headers); + let Some(registry) = &state.site_registry else { + let message = "Site registry is not available.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + }; + + let Some(existing) = registry.get_peer(&site_id) else { + let message = format!("Peer site '{}' not found.", site_id); + if wants_json { + return ( + StatusCode::NOT_FOUND, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + }; + + let connection_id = { + let value = form.connection_id.trim(); + if value.is_empty() { + None + } else { + Some(value.to_string()) + } + }; + if let Some(connection_id) = connection_id.as_deref() { + if state.connections.get(connection_id).is_none() { + let message = format!("Connection '{}' not found.", connection_id); + if wants_json { + return ( + StatusCode::NOT_FOUND, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + } + } + + let peer = crate::services::site_registry::PeerSite { + site_id: site_id.clone(), + endpoint: form.endpoint.trim().to_string(), + region: form.region.trim().to_string(), + priority: form.priority, + display_name: { + let display_name = form.display_name.trim(); + if display_name.is_empty() { + site_id.clone() + } else { + display_name.to_string() + } + }, + connection_id, + created_at: existing.created_at, + is_healthy: existing.is_healthy, + last_health_check: existing.last_health_check, + }; + registry.update_peer(peer); + + let message = format!("Peer site '{}' updated.", site_id); + if wants_json { + return axum::Json(json!({ "ok": true, "message": message })).into_response(); + } + session.write(|s| s.push_flash("success", message)); + Redirect::to("/ui/sites").into_response() +} + +pub async fn delete_peer_site( + State(state): State, + Extension(session): Extension, + Path(site_id): Path, + headers: HeaderMap, + Form(_form): Form, +) -> Response { + let wants_json = wants_json(&headers); + let Some(registry) = &state.site_registry else { + let message = "Site registry is not available.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/sites").into_response(); + }; + + if registry.delete_peer(&site_id) { + let message = format!("Peer site '{}' deleted.", site_id); + if wants_json { + return axum::Json(json!({ "ok": true, "message": message })).into_response(); + } + session.write(|s| s.push_flash("success", message)); + } else { + let message = format!("Peer site '{}' not found.", site_id); + if wants_json { + return ( + StatusCode::NOT_FOUND, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + } + + Redirect::to("/ui/sites").into_response() +} + +pub async fn connections_dashboard( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.connections_dashboard"); + let conns = state.connections.list(); + let items: Vec = conns + .into_iter() + .map(|c| { + json!({ + "id": c.id, + "name": c.name, + "endpoint_url": c.endpoint_url, + "region": c.region, + "access_key": c.access_key, + }) + }) + .collect(); + ctx.insert("connections", &items); + render(&state, "connections.html", &ctx) +} + +pub async fn metrics_dashboard( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.metrics_dashboard"); + ctx.insert( + "metrics_enabled", + &(state.config.metrics_enabled || state.config.metrics_history_enabled), + ); + ctx.insert( + "metrics_history_enabled", + &state.config.metrics_history_enabled, + ); + ctx.insert("operation_metrics_enabled", &state.config.metrics_enabled); + ctx.insert("history", &Vec::::new()); + ctx.insert("operation_metrics", &Vec::::new()); + + let metrics = crate::handlers::ui_api::collect_metrics(&state).await; + let cpu_percent = metrics + .get("cpu_percent") + .and_then(|v| v.as_f64()) + .unwrap_or(0.0); + let memory = metrics + .get("memory") + .cloned() + .unwrap_or_else(|| json!({ "percent": 0, "total": "0 B", "used": "0 B" })); + let disk = metrics + .get("disk") + .cloned() + .unwrap_or_else(|| json!({ "percent": 0, "free": "0 B", "total": "0 B" })); + let app = metrics.get("app").cloned().unwrap_or_else(|| { + json!({ + "buckets": 0, "objects": 0, "storage_used": "0 B", + "uptime_days": 0, "versions": 0, + }) + }); + let mem_pct = memory + .get("percent") + .and_then(|v| v.as_f64()) + .unwrap_or(0.0); + let disk_pct = disk.get("percent").and_then(|v| v.as_f64()).unwrap_or(0.0); + let has_issues = cpu_percent > 80.0 || mem_pct > 85.0 || disk_pct > 90.0; + + ctx.insert("cpu_percent", &cpu_percent); + ctx.insert("memory", &memory); + ctx.insert("disk", &disk); + ctx.insert("app", &app); + ctx.insert("has_issues", &has_issues); + ctx.insert( + "summary", + &json!({ + "app": app, + "cpu_percent": cpu_percent, + "disk": disk, + "memory": memory, + "has_issues": has_issues, + }), + ); + render(&state, "metrics.html", &ctx) +} + +fn format_history_timestamp(timestamp: Option) -> String { + let Some(timestamp) = timestamp else { + return "-".to_string(); + }; + let millis = (timestamp * 1000.0).round() as i64; + chrono::DateTime::::from_timestamp_millis(millis) + .map(|dt| dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()) + .unwrap_or_else(|| "-".to_string()) +} + +fn format_byte_count(bytes: u64) -> String { + const UNITS: [&str; 5] = ["B", "KB", "MB", "GB", "TB"]; + let mut value = bytes as f64; + let mut unit = 0usize; + while value >= 1024.0 && unit < UNITS.len() - 1 { + value /= 1024.0; + unit += 1; + } + if unit == 0 { + format!("{} {}", bytes, UNITS[unit]) + } else { + format!("{value:.1} {}", UNITS[unit]) + } +} + +fn decorate_gc_history(executions: &[Value]) -> Vec { + executions + .iter() + .cloned() + .map(|mut execution| { + let timestamp = execution.get("timestamp").and_then(|value| value.as_f64()); + let bytes_freed = execution + .get("result") + .and_then(|value| value.get("temp_bytes_freed")) + .and_then(|value| value.as_u64()) + .unwrap_or(0); + if let Some(obj) = execution.as_object_mut() { + obj.insert( + "timestamp_display".to_string(), + Value::String(format_history_timestamp(timestamp)), + ); + obj.insert( + "bytes_freed_display".to_string(), + Value::String(format_byte_count(bytes_freed)), + ); + } + execution + }) + .collect() +} + +fn decorate_integrity_history(executions: &[Value]) -> Vec { + executions + .iter() + .cloned() + .map(|mut execution| { + let timestamp = execution.get("timestamp").and_then(|value| value.as_f64()); + if let Some(obj) = execution.as_object_mut() { + obj.insert( + "timestamp_display".to_string(), + Value::String(format_history_timestamp(timestamp)), + ); + } + execution + }) + .collect() +} + +pub async fn system_dashboard( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.system_dashboard"); + + let gc_status = match &state.gc { + Some(gc) => gc.status().await, + None => json!({ + "dry_run": false, + "enabled": false, + "interval_hours": 6, + "lock_file_max_age_hours": 1, + "multipart_max_age_days": 7, + "running": false, + "scanning": false, + "scan_elapsed_seconds": Value::Null, + "temp_file_max_age_hours": 24, + }), + }; + let gc_history = match &state.gc { + Some(gc) => gc + .history() + .await + .get("executions") + .and_then(|value| value.as_array()) + .map(|values| decorate_gc_history(values)) + .unwrap_or_default(), + None => Vec::new(), + }; + + let integrity_status = match &state.integrity { + Some(checker) => checker.status().await, + None => json!({ + "auto_heal": false, + "batch_size": 100, + "dry_run": false, + "enabled": false, + "interval_hours": 24, + "running": false, + "scanning": false, + "scan_elapsed_seconds": Value::Null, + }), + }; + let integrity_history = match &state.integrity { + Some(checker) => checker + .history() + .await + .get("executions") + .and_then(|value| value.as_array()) + .map(|values| decorate_integrity_history(values)) + .unwrap_or_default(), + None => Vec::new(), + }; + + ctx.insert("gc_enabled", &state.config.gc_enabled); + ctx.insert("integrity_enabled", &state.config.integrity_enabled); + ctx.insert("gc_history", &gc_history); + ctx.insert("integrity_history", &integrity_history); + ctx.insert("gc_status", &gc_status); + ctx.insert("integrity_status", &integrity_status); + ctx.insert("app_version", &env!("CARGO_PKG_VERSION")); + ctx.insert("display_timezone", &"UTC"); + ctx.insert("platform", &std::env::consts::OS); + ctx.insert( + "storage_root", + &state.config.storage_root.display().to_string(), + ); + ctx.insert("total_issues", &0); + let features = vec![ + json!({"label": "Encryption (SSE-S3)", "enabled": state.config.encryption_enabled}), + json!({"label": "KMS", "enabled": state.config.kms_enabled}), + json!({"label": "Versioning Lifecycle", "enabled": state.config.lifecycle_enabled}), + json!({"label": "Metrics History", "enabled": state.config.metrics_history_enabled}), + json!({"label": "Operation Metrics", "enabled": state.config.metrics_enabled}), + json!({"label": "Site Sync", "enabled": state.config.site_sync_enabled}), + json!({"label": "Website Hosting", "enabled": state.config.website_hosting_enabled}), + json!({"label": "Garbage Collection", "enabled": state.config.gc_enabled}), + json!({"label": "Integrity Scanner", "enabled": state.config.integrity_enabled}), + ]; + ctx.insert("features", &features); + render(&state, "system.html", &ctx) +} + +pub async fn website_domains_dashboard( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.website_domains_dashboard"); + let buckets: Vec = state + .storage + .list_buckets() + .await + .map(|list| list.into_iter().map(|b| b.name).collect()) + .unwrap_or_default(); + let mappings = state + .website_domains + .as_ref() + .map(|store| { + let mut mappings = store.list_all(); + mappings.sort_by(|a, b| { + let a_domain = a + .get("domain") + .and_then(|value| value.as_str()) + .unwrap_or(""); + let b_domain = b + .get("domain") + .and_then(|value| value.as_str()) + .unwrap_or(""); + a_domain.cmp(b_domain) + }); + mappings + }) + .unwrap_or_default(); + ctx.insert("domains", &mappings); + ctx.insert("mappings", &mappings); + ctx.insert("buckets", &buckets); + render(&state, "website_domains.html", &ctx) +} + +pub async fn replication_wizard( + State(state): State, + Extension(session): Extension, + Query(q): Query>, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.replication_wizard"); + + let site_id = q.get("site_id").cloned().unwrap_or_default(); + let peer_record = state + .site_registry + .as_ref() + .and_then(|reg| { + if site_id.is_empty() { + reg.list_peers().into_iter().next() + } else { + reg.get_peer(&site_id) + } + }) + .map(|p| { + json!({ + "site_id": p.site_id, + "display_name": p.display_name, + "endpoint": p.endpoint, + "region": p.region, + "connection_id": p.connection_id, + }) + }) + .unwrap_or_else(|| { + json!({ + "site_id": site_id, + "display_name": "", + "endpoint": "", + "region": "us-east-1", + }) + }); + let peer_connection_id = peer_record + .get("connection_id") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + + let local_site = state + .site_registry + .as_ref() + .and_then(|reg| reg.get_local_site()) + .map(|s| { + json!({ + "site_id": s.site_id, + "display_name": s.display_name, + "endpoint": s.endpoint, + "region": s.region, + }) + }) + .unwrap_or(Value::Null); + + let peers: Vec = state + .site_registry + .as_ref() + .map(|reg| { + reg.list_peers() + .into_iter() + .map(|p| { + json!({ + "site_id": p.site_id, + "display_name": p.display_name, + "endpoint": p.endpoint, + "region": p.region, + "connection_id": p.connection_id, + }) + }) + .collect() + }) + .unwrap_or_default(); + + let all_rules = state.replication.list_rules(); + let bucket_names: Vec = state + .storage + .list_buckets() + .await + .map(|list| list.into_iter().map(|b| b.name).collect()) + .unwrap_or_default(); + let buckets: Vec = bucket_names + .into_iter() + .map(|bucket_name| { + let existing_rule = all_rules + .iter() + .find(|rule| rule.bucket_name == bucket_name); + let has_rule_for_peer = existing_rule + .map(|rule| rule.target_connection_id == peer_connection_id) + .unwrap_or(false); + json!({ + "name": bucket_name, + "has_rule": has_rule_for_peer, + "existing_mode": if has_rule_for_peer { + existing_rule.map(|rule| rule.mode.clone()) + } else { + None:: + }, + "existing_target": if has_rule_for_peer { + existing_rule.map(|rule| rule.target_bucket.clone()) + } else { + None:: + }, + }) + }) + .collect(); + + let conns: Vec = state + .connections + .list() + .into_iter() + .map(|c| { + json!({ + "id": c.id, + "name": c.name, + "endpoint_url": c.endpoint_url, + "region": c.region, + "access_key": c.access_key, + }) + }) + .collect(); + + let connection = conns + .iter() + .find(|conn| { + conn.get("id") + .and_then(|value| value.as_str()) + .map(|id| id == peer_connection_id) + .unwrap_or(false) + }) + .cloned() + .or_else(|| conns.first().cloned()) + .unwrap_or_else( + || json!({ "id": "", "name": "", "endpoint_url": "", "region": "", "access_key": "" }), + ); + + ctx.insert("peer", &peer_record); + ctx.insert("peers", &peers); + ctx.insert("local_site", &local_site); + ctx.insert("connections", &conns); + ctx.insert("connection", &connection); + ctx.insert("buckets", &buckets); + render(&state, "replication_wizard.html", &ctx) +} + +#[derive(serde::Deserialize)] +pub struct CreatePeerReplicationRulesForm { + #[serde(default)] + pub mode: String, + #[serde(default)] + pub buckets: Vec, + #[serde(default)] + pub csrf_token: String, + #[serde(flatten)] + pub extras: HashMap, +} + +pub async fn create_peer_replication_rules( + State(state): State, + Extension(session): Extension, + Path(site_id): Path, + Form(form): Form, +) -> Response { + create_peer_replication_rules_impl(state, session, site_id, form).await +} + +pub async fn create_peer_replication_rules_from_query( + State(state): State, + Extension(session): Extension, + Query(q): Query>, + Form(form): Form, +) -> Response { + let site_id = q.get("site_id").cloned().unwrap_or_default(); + create_peer_replication_rules_impl(state, session, site_id, form).await +} + +async fn create_peer_replication_rules_impl( + state: AppState, + session: SessionHandle, + site_id: String, + form: CreatePeerReplicationRulesForm, +) -> Response { + let Some(registry) = &state.site_registry else { + session.write(|s| s.push_flash("danger", "Site registry is not available.")); + return Redirect::to("/ui/sites").into_response(); + }; + let Some(peer) = registry.get_peer(&site_id) else { + session.write(|s| s.push_flash("danger", format!("Peer site '{}' not found.", site_id))); + return Redirect::to("/ui/sites").into_response(); + }; + let Some(connection_id) = peer.connection_id.clone() else { + session.write(|s| { + s.push_flash( + "danger", + "This peer has no connection configured. Add a connection first.", + ) + }); + return Redirect::to("/ui/sites").into_response(); + }; + if state.connections.get(&connection_id).is_none() { + session.write(|s| { + s.push_flash( + "danger", + format!("Connection '{}' was not found.", connection_id), + ) + }); + return Redirect::to("/ui/sites").into_response(); + } + + let mode = match form.mode.trim() { + crate::services::replication::MODE_ALL => crate::services::replication::MODE_ALL, + crate::services::replication::MODE_BIDIRECTIONAL => { + crate::services::replication::MODE_BIDIRECTIONAL + } + _ => crate::services::replication::MODE_NEW_ONLY, + } + .to_string(); + + if form.buckets.is_empty() { + session.write(|s| s.push_flash("warning", "No buckets selected.")); + return Redirect::to("/ui/sites").into_response(); + } + + let mut created = 0usize; + let mut created_existing = Vec::new(); + + for bucket_name in form.buckets { + let target_key = format!("target_{}", bucket_name); + let target_bucket = form + .extras + .get(&target_key) + .map(|value| value.trim()) + .filter(|value| !value.is_empty()) + .unwrap_or(bucket_name.as_str()) + .to_string(); + + let rule = crate::services::replication::ReplicationRule { + bucket_name: bucket_name.clone(), + target_connection_id: connection_id.clone(), + target_bucket, + enabled: true, + mode: mode.clone(), + created_at: Some(chrono::Utc::now().timestamp_millis() as f64 / 1000.0), + stats: Default::default(), + sync_deletions: true, + last_pull_at: None, + filter_prefix: None, + }; + + state.replication.set_rule(rule); + created += 1; + if mode == crate::services::replication::MODE_ALL { + created_existing.push(bucket_name); + } + } + + for bucket_name in created_existing { + state + .replication + .clone() + .schedule_existing_objects_sync(bucket_name); + } + + if created > 0 { + session.write(|s| { + s.push_flash( + "success", + format!( + "Created {} replication rule(s) for {}.", + created, + if peer.display_name.is_empty() { + peer.site_id.as_str() + } else { + peer.display_name.as_str() + } + ), + ) + }); + } + Redirect::to("/ui/sites").into_response() +} + +pub async fn docs_page( + State(state): State, + Extension(session): Extension, +) -> Response { + let mut ctx = page_context(&state, &session, "ui.docs_page"); + let (api_base, api_host) = parse_api_base(&state); + ctx.insert("api_base", &api_base); + ctx.insert("api_host", &api_host); + render(&state, "docs.html", &ctx) +} + +#[derive(serde::Deserialize)] +pub struct CreateBucketForm { + pub bucket_name: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn create_bucket( + State(state): State, + Extension(session): Extension, + headers: HeaderMap, + body: Body, +) -> Response { + let wants_json = wants_json(&headers); + let form = match parse_form_any(&headers, body).await { + Ok(fields) => CreateBucketForm { + bucket_name: fields.get("bucket_name").cloned().unwrap_or_default(), + csrf_token: fields.get("csrf_token").cloned().unwrap_or_default(), + }, + Err(message) => { + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/buckets").into_response(); + } + }; + let bucket_name = form.bucket_name.trim().to_string(); + + if bucket_name.is_empty() { + let message = "Bucket name is required".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/buckets").into_response(); + } + + match state.storage.create_bucket(&bucket_name).await { + Ok(()) => { + let message = format!("Bucket '{}' created.", bucket_name); + if wants_json { + return axum::Json(json!({ + "success": true, + "message": message, + "bucket_name": bucket_name, + })) + .into_response(); + } + session.write(|s| s.push_flash("success", message)); + } + Err(e) => { + let message = format!("Failed to create bucket: {}", e); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + } + } + Redirect::to("/ui/buckets").into_response() +} + +#[derive(serde::Deserialize)] +pub struct UpdateBucketVersioningForm { + pub state: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn delete_bucket( + State(state): State, + Path(bucket_name): Path, +) -> Response { + match state.storage.delete_bucket(&bucket_name).await { + Ok(()) => axum::Json(json!({ + "ok": true, + "message": format!("Bucket '{}' deleted.", bucket_name), + })) + .into_response(), + Err(e) => ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(), + } +} + +pub async fn update_bucket_versioning( + State(state): State, + Path(bucket_name): Path, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let enabled = form.state.eq_ignore_ascii_case("enable"); + match state.storage.set_versioning(&bucket_name, enabled).await { + Ok(()) => axum::Json(json!({ + "ok": true, + "enabled": enabled, + "message": if enabled { "Versioning enabled." } else { "Versioning suspended." }, + })) + .into_response(), + Err(e) => ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(), + } +} + +fn empty_string_as_none<'de, D, T>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, + T: std::str::FromStr, + T::Err: std::fmt::Display, +{ + use serde::Deserialize; + let opt = Option::::deserialize(deserializer)?; + match opt.as_deref() { + None | Some("") => Ok(None), + Some(s) => s.parse::().map(Some).map_err(serde::de::Error::custom), + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateBucketQuotaForm { + pub action: String, + #[serde(default, deserialize_with = "empty_string_as_none")] + pub max_mb: Option, + #[serde(default, deserialize_with = "empty_string_as_none")] + pub max_objects: Option, + #[serde(default)] + pub csrf_token: String, +} + +#[derive(serde::Deserialize)] +pub struct UpdateBucketReplicationForm { + pub action: String, + #[serde(default)] + pub target_connection_id: String, + #[serde(default)] + pub target_bucket: String, + #[serde(default)] + pub replication_mode: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_bucket_replication( + State(state): State, + Extension(session): Extension, + Path(bucket_name): Path, + headers: HeaderMap, + Form(form): Form, +) -> Response { + let wants_json = wants_json(&headers); + + let respond = |ok: bool, status: StatusCode, message: String, extra: Value| -> Response { + if wants_json { + let mut payload = json!({ + "ok": ok, + "message": message, + }); + if let Some(obj) = payload.as_object_mut() { + if let Some(extra_obj) = extra.as_object() { + for (key, value) in extra_obj { + obj.insert(key.clone(), value.clone()); + } + } + } + return (status, axum::Json(payload)).into_response(); + } + + session.write(|s| s.push_flash(if ok { "success" } else { "danger" }, message)); + bucket_tab_redirect(&bucket_name, "replication") + }; + + match form.action.as_str() { + "delete" => { + state.replication.delete_rule(&bucket_name); + respond( + true, + StatusCode::OK, + "Replication configuration removed.".to_string(), + json!({ "action": "delete", "enabled": false }), + ) + } + "pause" => { + let Some(mut rule) = state.replication.get_rule(&bucket_name) else { + return respond( + true, + StatusCode::OK, + "No replication configuration to pause.".to_string(), + json!({ "action": "pause", "enabled": false, "no_op": true }), + ); + }; + rule.enabled = false; + state.replication.set_rule(rule); + respond( + true, + StatusCode::OK, + "Replication paused.".to_string(), + json!({ "action": "pause", "enabled": false }), + ) + } + "resume" => { + let Some(mut rule) = state.replication.get_rule(&bucket_name) else { + return respond( + true, + StatusCode::OK, + "No replication configuration to resume.".to_string(), + json!({ "action": "resume", "enabled": false, "no_op": true }), + ); + }; + rule.enabled = true; + let mode = rule.mode.clone(); + state.replication.set_rule(rule); + + let message = if mode == crate::services::replication::MODE_ALL { + state + .replication + .clone() + .schedule_existing_objects_sync(bucket_name.clone()); + "Replication resumed. Existing object sync will continue in the background." + .to_string() + } else { + "Replication resumed.".to_string() + }; + + respond( + true, + StatusCode::OK, + message, + json!({ "action": "resume", "enabled": true, "mode": mode }), + ) + } + "create" => { + let target_connection_id = form.target_connection_id.trim(); + let target_bucket = form.target_bucket.trim(); + if target_connection_id.is_empty() || target_bucket.is_empty() { + return respond( + false, + StatusCode::BAD_REQUEST, + "Target connection and bucket are required.".to_string(), + json!({ "error": "Target connection and bucket are required" }), + ); + } + if state.connections.get(target_connection_id).is_none() { + return respond( + false, + StatusCode::BAD_REQUEST, + "Target connection was not found.".to_string(), + json!({ "error": "Target connection was not found" }), + ); + } + + let mode = match form.replication_mode.trim() { + crate::services::replication::MODE_ALL => crate::services::replication::MODE_ALL, + crate::services::replication::MODE_BIDIRECTIONAL => { + crate::services::replication::MODE_BIDIRECTIONAL + } + _ => crate::services::replication::MODE_NEW_ONLY, + }; + + state + .replication + .set_rule(crate::services::replication::ReplicationRule { + bucket_name: bucket_name.clone(), + target_connection_id: target_connection_id.to_string(), + target_bucket: target_bucket.to_string(), + enabled: true, + mode: mode.to_string(), + created_at: Some( + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs_f64()) + .unwrap_or(0.0), + ), + stats: crate::services::replication::ReplicationStats::default(), + sync_deletions: true, + last_pull_at: None, + filter_prefix: None, + }); + + let message = if mode == crate::services::replication::MODE_ALL { + state + .replication + .clone() + .schedule_existing_objects_sync(bucket_name.clone()); + "Replication configured. Existing object sync will continue in the background." + .to_string() + } else { + "Replication configured. New uploads will be replicated.".to_string() + }; + + respond( + true, + StatusCode::OK, + message, + json!({ + "action": "create", + "enabled": true, + "mode": mode, + "target_connection_id": target_connection_id, + "target_bucket": target_bucket, + }), + ) + } + _ => respond( + false, + StatusCode::BAD_REQUEST, + "Invalid replication action.".to_string(), + json!({ "error": "Invalid action" }), + ), + } +} + +#[derive(serde::Deserialize)] +pub struct ConnectionForm { + pub name: String, + pub endpoint_url: String, + pub access_key: String, + #[serde(default)] + pub secret_key: String, + #[serde(default = "default_connection_region")] + pub region: String, + #[serde(default)] + pub csrf_token: String, +} + +fn default_connection_region() -> String { + "us-east-1".to_string() +} + +pub async fn create_connection( + State(state): State, + Extension(session): Extension, + headers: HeaderMap, + Form(form): Form, +) -> Response { + let wants_json = wants_json(&headers); + let name = form.name.trim(); + let endpoint = form.endpoint_url.trim(); + let access_key = form.access_key.trim(); + let secret_key = form.secret_key.trim(); + let region = form.region.trim(); + + if name.is_empty() || endpoint.is_empty() || access_key.is_empty() || secret_key.is_empty() { + let message = "All connection fields are required.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/connections").into_response(); + } + + let connection = crate::stores::connections::RemoteConnection { + id: uuid::Uuid::new_v4().to_string(), + name: name.to_string(), + endpoint_url: endpoint.to_string(), + access_key: access_key.to_string(), + secret_key: secret_key.to_string(), + region: if region.is_empty() { + default_connection_region() + } else { + region.to_string() + }, + }; + + match state.connections.add(connection.clone()) { + Ok(()) => { + let message = format!("Connection '{}' created.", connection.name); + if wants_json { + axum::Json(json!({ + "ok": true, + "message": message, + "connection": { + "id": connection.id, + "name": connection.name, + "endpoint_url": connection.endpoint_url, + "access_key": connection.access_key, + "region": connection.region, + } + })) + .into_response() + } else { + session.write(|s| s.push_flash("success", message)); + Redirect::to("/ui/connections").into_response() + } + } + Err(err) => { + let message = format!("Failed to create connection: {}", err); + if wants_json { + ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response() + } else { + session.write(|s| s.push_flash("danger", message)); + Redirect::to("/ui/connections").into_response() + } + } + } +} + +pub async fn update_connection( + State(state): State, + Extension(session): Extension, + Path(connection_id): Path, + headers: HeaderMap, + Form(form): Form, +) -> Response { + let wants_json = wants_json(&headers); + let Some(mut connection) = state.connections.get(&connection_id) else { + let message = "Connection not found.".to_string(); + if wants_json { + return ( + StatusCode::NOT_FOUND, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/connections").into_response(); + }; + + let name = form.name.trim(); + let endpoint = form.endpoint_url.trim(); + let access_key = form.access_key.trim(); + let secret_key = form.secret_key.trim(); + let region = form.region.trim(); + + if name.is_empty() || endpoint.is_empty() || access_key.is_empty() { + let message = "Name, endpoint, and access key are required.".to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to("/ui/connections").into_response(); + } + + connection.name = name.to_string(); + connection.endpoint_url = endpoint.to_string(); + connection.access_key = access_key.to_string(); + if !secret_key.is_empty() { + connection.secret_key = secret_key.to_string(); + } + connection.region = if region.is_empty() { + default_connection_region() + } else { + region.to_string() + }; + + match state.connections.add(connection.clone()) { + Ok(()) => { + let message = format!("Connection '{}' updated.", connection.name); + if wants_json { + axum::Json(json!({ + "ok": true, + "message": message, + "connection": { + "id": connection.id, + "name": connection.name, + "endpoint_url": connection.endpoint_url, + "access_key": connection.access_key, + "region": connection.region, + } + })) + .into_response() + } else { + session.write(|s| s.push_flash("success", message)); + Redirect::to("/ui/connections").into_response() + } + } + Err(err) => { + let message = format!("Failed to update connection: {}", err); + if wants_json { + ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response() + } else { + session.write(|s| s.push_flash("danger", message)); + Redirect::to("/ui/connections").into_response() + } + } + } +} + +#[derive(serde::Deserialize, Default)] +pub struct DeleteConnectionForm { + #[serde(default)] + pub csrf_token: String, +} + +pub async fn delete_connection( + State(state): State, + Extension(session): Extension, + Path(connection_id): Path, + headers: HeaderMap, + Form(_form): Form, +) -> Response { + let wants_json = wants_json(&headers); + match state.connections.delete(&connection_id) { + Ok(true) => { + let message = "Connection deleted.".to_string(); + if wants_json { + axum::Json(json!({ "ok": true, "message": message })).into_response() + } else { + session.write(|s| s.push_flash("success", message)); + Redirect::to("/ui/connections").into_response() + } + } + Ok(false) => { + let message = "Connection not found.".to_string(); + if wants_json { + ( + StatusCode::NOT_FOUND, + axum::Json(json!({ "error": message })), + ) + .into_response() + } else { + session.write(|s| s.push_flash("danger", message)); + Redirect::to("/ui/connections").into_response() + } + } + Err(err) => { + let message = format!("Failed to delete connection: {}", err); + if wants_json { + ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response() + } else { + session.write(|s| s.push_flash("danger", message)); + Redirect::to("/ui/connections").into_response() + } + } + } +} + +#[derive(serde::Deserialize)] +pub struct WebsiteDomainForm { + pub bucket: String, + #[serde(default)] + pub domain: String, + #[serde(default)] + pub csrf_token: String, +} + +#[derive(serde::Deserialize, Default)] +pub struct WebsiteDomainDeleteForm { + #[serde(default)] + pub csrf_token: String, +} + +pub async fn create_website_domain( + State(state): State, + Extension(session): Extension, + Form(form): Form, +) -> Response { + let Some(store) = &state.website_domains else { + session.write(|s| s.push_flash("danger", "Website hosting is not enabled.")); + return Redirect::to("/ui/website-domains").into_response(); + }; + + let domain = crate::services::website_domains::normalize_domain(&form.domain); + let bucket = form.bucket.trim().to_string(); + if !crate::services::website_domains::is_valid_domain(&domain) { + session.write(|s| s.push_flash("danger", "Enter a valid domain name.")); + return Redirect::to("/ui/website-domains").into_response(); + } + match state.storage.bucket_exists(&bucket).await { + Ok(true) => {} + _ => { + session + .write(|s| s.push_flash("danger", format!("Bucket '{}' does not exist.", bucket))); + return Redirect::to("/ui/website-domains").into_response(); + } + } + store.set_mapping(&domain, &bucket); + session.write(|s| { + s.push_flash( + "success", + format!("Domain '{}' mapped to '{}'.", domain, bucket), + ) + }); + Redirect::to("/ui/website-domains").into_response() +} + +pub async fn update_website_domain( + State(state): State, + Extension(session): Extension, + Path(domain): Path, + Form(form): Form, +) -> Response { + let Some(store) = &state.website_domains else { + session.write(|s| s.push_flash("danger", "Website hosting is not enabled.")); + return Redirect::to("/ui/website-domains").into_response(); + }; + + let domain = crate::services::website_domains::normalize_domain(&domain); + let bucket = form.bucket.trim().to_string(); + match state.storage.bucket_exists(&bucket).await { + Ok(true) => {} + _ => { + session + .write(|s| s.push_flash("danger", format!("Bucket '{}' does not exist.", bucket))); + return Redirect::to("/ui/website-domains").into_response(); + } + } + if store.get_bucket(&domain).is_none() { + session.write(|s| s.push_flash("danger", format!("Domain '{}' was not found.", domain))); + return Redirect::to("/ui/website-domains").into_response(); + } + store.set_mapping(&domain, &bucket); + session.write(|s| s.push_flash("success", format!("Domain '{}' updated.", domain))); + Redirect::to("/ui/website-domains").into_response() +} + +pub async fn delete_website_domain( + State(state): State, + Extension(session): Extension, + Path(domain): Path, + Form(_form): Form, +) -> Response { + let Some(store) = &state.website_domains else { + session.write(|s| s.push_flash("danger", "Website hosting is not enabled.")); + return Redirect::to("/ui/website-domains").into_response(); + }; + + let domain = crate::services::website_domains::normalize_domain(&domain); + if store.delete_mapping(&domain) { + session.write(|s| s.push_flash("success", format!("Domain '{}' removed.", domain))); + } else { + session.write(|s| s.push_flash("danger", format!("Domain '{}' was not found.", domain))); + } + Redirect::to("/ui/website-domains").into_response() +} + +pub async fn update_bucket_quota( + State(state): State, + Path(bucket_name): Path, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let mut config = match state.storage.get_bucket_config(&bucket_name).await { + Ok(cfg) => cfg, + Err(e) => { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(); + } + }; + + if form.action.eq_ignore_ascii_case("remove") { + config.quota = None; + } else { + config.quota = Some(myfsio_common::types::QuotaConfig { + max_bytes: form.max_mb.map(|mb| mb.saturating_mul(1024 * 1024)), + max_objects: form.max_objects, + }); + } + + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => axum::Json(json!({ + "ok": true, + "has_quota": config.quota.is_some(), + "max_bytes": config.quota.as_ref().and_then(|q| q.max_bytes), + "max_objects": config.quota.as_ref().and_then(|q| q.max_objects), + "message": if config.quota.is_some() { "Quota settings saved." } else { "Quota removed." }, + })) + .into_response(), + Err(e) => ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(), + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateBucketEncryptionForm { + pub action: String, + #[serde(default)] + pub algorithm: String, + #[serde(default)] + pub kms_key_id: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_bucket_encryption( + State(state): State, + Path(bucket_name): Path, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let mut config = match state.storage.get_bucket_config(&bucket_name).await { + Ok(cfg) => cfg, + Err(e) => { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(); + } + }; + + if form.action.eq_ignore_ascii_case("disable") { + config.encryption = None; + } else { + let mut inner = json!({ + "SSEAlgorithm": if form.algorithm == "aws:kms" { "aws:kms" } else { "AES256" } + }); + if form.algorithm == "aws:kms" && !form.kms_key_id.trim().is_empty() { + inner["KMSMasterKeyID"] = Value::String(form.kms_key_id.trim().to_string()); + } + config.encryption = Some(json!({ + "Rules": [{ + "ApplyServerSideEncryptionByDefault": inner + }] + })); + } + + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => { + let algorithm = config + .encryption + .as_ref() + .and_then(|value| value.get("Rules")) + .and_then(|rules| rules.as_array()) + .and_then(|rules| rules.first()) + .and_then(|rule| rule.get("ApplyServerSideEncryptionByDefault")) + .and_then(|inner| inner.get("SSEAlgorithm")) + .and_then(|v| v.as_str()) + .unwrap_or("AES256"); + axum::Json(json!({ + "ok": true, + "enabled": config.encryption.is_some(), + "algorithm": algorithm, + "message": if config.encryption.is_some() { "Encryption settings saved." } else { "Encryption disabled." }, + })) + .into_response() + } + Err(e) => ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(), + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateBucketPolicyForm { + pub mode: String, + #[serde(default)] + pub policy_document: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_bucket_policy( + State(state): State, + Extension(session): Extension, + Path(bucket_name): Path, + headers: HeaderMap, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let wants_json = wants_json(&headers); + let redirect_url = format!("/ui/buckets/{}?tab=permissions", bucket_name); + let mut config = match state.storage.get_bucket_config(&bucket_name).await { + Ok(cfg) => cfg, + Err(e) => { + let message = e.to_string(); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to(&redirect_url).into_response(); + } + }; + + if form.mode.eq_ignore_ascii_case("delete") { + config.policy = None; + } else { + let policy: Value = match serde_json::from_str(&form.policy_document) { + Ok(value) => value, + Err(e) => { + let message = format!("Invalid policy JSON: {}", e); + if wants_json { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response(); + } + session.write(|s| s.push_flash("danger", message)); + return Redirect::to(&redirect_url).into_response(); + } + }; + config.policy = Some(policy); + } + + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => { + let message = if config.policy.is_some() { + "Bucket policy saved." + } else { + "Bucket policy deleted." + }; + if wants_json { + axum::Json(json!({ + "ok": true, + "message": message, + })) + .into_response() + } else { + session.write(|s| s.push_flash("success", message)); + Redirect::to(&redirect_url).into_response() + } + } + Err(e) => { + let message = e.to_string(); + if wants_json { + ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": message })), + ) + .into_response() + } else { + session.write(|s| s.push_flash("danger", message)); + Redirect::to(&redirect_url).into_response() + } + } + } +} + +#[derive(serde::Deserialize)] +pub struct UpdateBucketWebsiteForm { + pub action: String, + #[serde(default)] + pub index_document: String, + #[serde(default)] + pub error_document: String, + #[serde(default)] + pub csrf_token: String, +} + +pub async fn update_bucket_website( + State(state): State, + Path(bucket_name): Path, + axum::extract::Form(form): axum::extract::Form, +) -> Response { + let mut config = match state.storage.get_bucket_config(&bucket_name).await { + Ok(cfg) => cfg, + Err(e) => { + return ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(); + } + }; + + if form.action.eq_ignore_ascii_case("disable") { + config.website = None; + } else { + let index_document = if form.index_document.trim().is_empty() { + "index.html".to_string() + } else { + form.index_document.trim().to_string() + }; + let error_document = form.error_document.trim().to_string(); + config.website = Some(json!({ + "index_document": index_document, + "error_document": if error_document.is_empty() { Value::Null } else { Value::String(error_document) } + })); + } + + match state.storage.set_bucket_config(&bucket_name, &config).await { + Ok(()) => { + let website = config.website.clone().unwrap_or(Value::Null); + axum::Json(json!({ + "ok": true, + "enabled": !website.is_null(), + "index_document": website.get("index_document").and_then(|v| v.as_str()).unwrap_or("index.html"), + "error_document": website.get("error_document").and_then(|v| v.as_str()).unwrap_or(""), + "message": if website.is_null() { "Website hosting disabled." } else { "Website settings saved." }, + })) + .into_response() + } + Err(e) => ( + StatusCode::BAD_REQUEST, + axum::Json(json!({ "error": e.to_string() })), + ) + .into_response(), + } +} diff --git a/crates/myfsio-server/src/lib.rs b/crates/myfsio-server/src/lib.rs new file mode 100644 index 0000000..9c74f94 --- /dev/null +++ b/crates/myfsio-server/src/lib.rs @@ -0,0 +1,675 @@ +pub mod config; +pub mod handlers; +pub mod middleware; +pub mod services; +pub mod session; +pub mod state; +pub mod stores; +pub mod templates; + +use axum::Router; + +pub const SERVER_HEADER: &str = concat!("MyFSIO-Rust/", env!("CARGO_PKG_VERSION")); + +pub fn create_ui_router(state: state::AppState) -> Router { + use axum::routing::{delete, get, post, put}; + use handlers::ui; + use handlers::ui_api; + use handlers::ui_pages; + + let protected = Router::new() + .route("/", get(ui::root_redirect)) + .route("/ui", get(ui::root_redirect)) + .route("/ui/", get(ui::root_redirect)) + .route( + "/ui/buckets", + get(ui_pages::buckets_overview).post(ui_pages::create_bucket), + ) + .route("/ui/buckets/create", post(ui_pages::create_bucket)) + .route("/ui/buckets/{bucket_name}", get(ui_pages::bucket_detail)) + .route( + "/ui/buckets/{bucket_name}/delete", + post(ui_pages::delete_bucket), + ) + .route( + "/ui/buckets/{bucket_name}/versioning", + post(ui_pages::update_bucket_versioning), + ) + .route( + "/ui/buckets/{bucket_name}/quota", + post(ui_pages::update_bucket_quota), + ) + .route( + "/ui/buckets/{bucket_name}/encryption", + post(ui_pages::update_bucket_encryption), + ) + .route( + "/ui/buckets/{bucket_name}/policy", + post(ui_pages::update_bucket_policy), + ) + .route( + "/ui/buckets/{bucket_name}/replication", + post(ui_pages::update_bucket_replication), + ) + .route( + "/ui/buckets/{bucket_name}/website", + post(ui_pages::update_bucket_website), + ) + .route( + "/ui/buckets/{bucket_name}/upload", + post(ui_api::upload_object), + ) + .route( + "/ui/buckets/{bucket_name}/multipart/initiate", + post(ui_api::initiate_multipart_upload), + ) + .route( + "/ui/buckets/{bucket_name}/multipart/{upload_id}/part", + put(ui_api::upload_multipart_part), + ) + .route( + "/ui/buckets/{bucket_name}/multipart/{upload_id}/parts", + put(ui_api::upload_multipart_part), + ) + .route( + "/ui/buckets/{bucket_name}/multipart/{upload_id}/complete", + post(ui_api::complete_multipart_upload), + ) + .route( + "/ui/buckets/{bucket_name}/multipart/{upload_id}/abort", + delete(ui_api::abort_multipart_upload), + ) + .route( + "/ui/buckets/{bucket_name}/multipart/{upload_id}", + delete(ui_api::abort_multipart_upload), + ) + .route( + "/ui/buckets/{bucket_name}/objects", + get(ui_api::list_bucket_objects), + ) + .route( + "/ui/buckets/{bucket_name}/objects/stream", + get(ui_api::stream_bucket_objects), + ) + .route( + "/ui/buckets/{bucket_name}/objects/search", + get(ui_api::search_bucket_objects), + ) + .route( + "/ui/buckets/{bucket_name}/stats", + get(ui_api::bucket_stats_json), + ) + .route( + "/ui/buckets/{bucket_name}/folders", + get(ui_api::list_bucket_folders), + ) + .route( + "/ui/buckets/{bucket_name}/copy-targets", + get(ui_api::list_copy_targets), + ) + .route( + "/ui/buckets/{bucket_name}/list-for-copy", + get(ui_api::list_copy_targets), + ) + .route( + "/ui/buckets/{bucket_name}/objects/bulk-delete", + post(ui_api::bulk_delete_objects), + ) + .route( + "/ui/buckets/{bucket_name}/objects/bulk-download", + post(ui_api::bulk_download_objects), + ) + .route( + "/ui/buckets/{bucket_name}/objects/{*rest}", + get(ui_api::object_get_dispatch).post(ui_api::object_post_dispatch), + ) + .route( + "/ui/buckets/{bucket_name}/acl", + get(ui_api::bucket_acl).post(ui_api::update_bucket_acl), + ) + .route( + "/ui/buckets/{bucket_name}/cors", + get(ui_api::bucket_cors).post(ui_api::update_bucket_cors), + ) + .route( + "/ui/buckets/{bucket_name}/lifecycle", + get(ui_api::bucket_lifecycle).post(ui_api::update_bucket_lifecycle), + ) + .route( + "/ui/buckets/{bucket_name}/lifecycle/history", + get(ui_api::lifecycle_history), + ) + .route( + "/ui/buckets/{bucket_name}/replication/status", + get(ui_api::replication_status), + ) + .route( + "/ui/buckets/{bucket_name}/replication/failures", + get(ui_api::replication_failures).delete(ui_api::clear_replication_failures), + ) + .route( + "/ui/buckets/{bucket_name}/replication/failures/retry", + post(ui_api::retry_replication_failure), + ) + .route( + "/ui/buckets/{bucket_name}/replication/failures/retry-all", + post(ui_api::retry_all_replication_failures), + ) + .route( + "/ui/buckets/{bucket_name}/replication/failures/dismiss", + delete(ui_api::dismiss_replication_failure), + ) + .route( + "/ui/buckets/{bucket_name}/replication/failures/clear", + delete(ui_api::clear_replication_failures), + ) + .route( + "/ui/buckets/{bucket_name}/replication/failures/{*rest}", + post(ui_api::retry_replication_failure_path) + .delete(ui_api::dismiss_replication_failure_path), + ) + .route( + "/ui/buckets/{bucket_name}/bulk-delete", + post(ui_api::bulk_delete_objects), + ) + .route( + "/ui/buckets/{bucket_name}/bulk-download", + post(ui_api::bulk_download_objects), + ) + .route( + "/ui/buckets/{bucket_name}/archived", + get(ui_api::archived_objects), + ) + .route( + "/ui/buckets/{bucket_name}/archived/{*rest}", + post(ui_api::archived_post_dispatch), + ) + .route("/ui/iam", get(ui_pages::iam_dashboard)) + .route("/ui/iam/users", post(ui_pages::create_iam_user)) + .route("/ui/iam/users/{user_id}", post(ui_pages::update_iam_user)) + .route( + "/ui/iam/users/{user_id}/delete", + post(ui_pages::delete_iam_user), + ) + .route( + "/ui/iam/users/{user_id}/update", + post(ui_pages::update_iam_user), + ) + .route( + "/ui/iam/users/{user_id}/policies", + post(ui_pages::update_iam_policies), + ) + .route( + "/ui/iam/users/{user_id}/expiry", + post(ui_pages::update_iam_expiry), + ) + .route( + "/ui/iam/users/{user_id}/rotate-secret", + post(ui_pages::rotate_iam_secret), + ) + .route( + "/ui/iam/users/{user_id}/rotate", + post(ui_pages::rotate_iam_secret), + ) + .route("/ui/connections/create", post(ui_pages::create_connection)) + .route("/ui/connections/test", post(ui_api::test_connection)) + .route( + "/ui/connections/{connection_id}", + post(ui_pages::update_connection), + ) + .route( + "/ui/connections/{connection_id}/update", + post(ui_pages::update_connection), + ) + .route( + "/ui/connections/{connection_id}/delete", + post(ui_pages::delete_connection), + ) + .route( + "/ui/connections/{connection_id}/health", + get(ui_api::connection_health), + ) + .route("/ui/sites", get(ui_pages::sites_dashboard)) + .route("/ui/sites/local", post(ui_pages::update_local_site)) + .route("/ui/sites/peers", post(ui_pages::add_peer_site)) + .route( + "/ui/sites/peers/{site_id}/update", + post(ui_pages::update_peer_site), + ) + .route( + "/ui/sites/peers/{site_id}/delete", + post(ui_pages::delete_peer_site), + ) + .route("/ui/sites/peers/{site_id}/health", get(ui_api::peer_health)) + .route( + "/ui/sites/peers/{site_id}/sync-stats", + get(ui_api::peer_sync_stats), + ) + .route( + "/ui/sites/peers/{site_id}/bidirectional-status", + get(ui_api::peer_bidirectional_status), + ) + .route( + "/ui/connections", + get(ui_pages::connections_dashboard).post(ui_pages::create_connection), + ) + .route("/ui/metrics", get(ui_pages::metrics_dashboard)) + .route( + "/ui/metrics/settings", + get(ui_api::metrics_settings).put(ui_api::update_metrics_settings), + ) + .route("/ui/metrics/api", get(ui_api::metrics_api)) + .route("/ui/metrics/history", get(ui_api::metrics_history)) + .route("/ui/metrics/operations", get(ui_api::metrics_operations)) + .route( + "/ui/metrics/operations/history", + get(ui_api::metrics_operations_history), + ) + .route("/ui/system", get(ui_pages::system_dashboard)) + .route("/ui/system/gc/status", get(ui_api::gc_status_ui)) + .route("/ui/system/gc/run", post(ui_api::gc_run_ui)) + .route("/ui/system/gc/history", get(ui_api::gc_history_ui)) + .route( + "/ui/system/integrity/status", + get(ui_api::integrity_status_ui), + ) + .route("/ui/system/integrity/run", post(ui_api::integrity_run_ui)) + .route( + "/ui/system/integrity/history", + get(ui_api::integrity_history_ui), + ) + .route( + "/ui/website-domains", + get(ui_pages::website_domains_dashboard), + ) + .route( + "/ui/website-domains/create", + post(ui_pages::create_website_domain), + ) + .route( + "/ui/website-domains/{domain}", + post(ui_pages::update_website_domain), + ) + .route( + "/ui/website-domains/{domain}/update", + post(ui_pages::update_website_domain), + ) + .route( + "/ui/website-domains/{domain}/delete", + post(ui_pages::delete_website_domain), + ) + .route("/ui/replication/new", get(ui_pages::replication_wizard)) + .route( + "/ui/replication/create", + post(ui_pages::create_peer_replication_rules_from_query), + ) + .route( + "/ui/sites/peers/{site_id}/replication-rules", + post(ui_pages::create_peer_replication_rules), + ) + .route("/ui/docs", get(ui_pages::docs_page)) + .layer(axum::middleware::from_fn(ui::require_login)); + + let public = Router::new() + .route("/login", get(ui::login_page).post(ui::login_submit)) + .route("/logout", post(ui::logout).get(ui::logout)); + + let session_state = middleware::SessionLayerState { + store: state.sessions.clone(), + secure: false, + }; + + let static_service = tower::ServiceBuilder::new() + .layer(tower_http::set_header::SetResponseHeaderLayer::overriding( + axum::http::header::CACHE_CONTROL, + axum::http::HeaderValue::from_static("no-cache"), + )) + .service(tower_http::services::ServeDir::new(&state.config.static_dir)); + + protected + .merge(public) + .fallback(ui::not_found_page) + .layer(axum::middleware::from_fn_with_state( + state.clone(), + middleware::csrf_layer, + )) + .layer(axum::middleware::from_fn_with_state( + session_state, + middleware::session_layer, + )) + .layer(axum::middleware::from_fn_with_state( + state.clone(), + middleware::ui_metrics_layer, + )) + .with_state(state) + .nest_service("/static", static_service) + .layer(axum::middleware::from_fn(middleware::server_header)) + .layer(tower_http::compression::CompressionLayer::new()) +} + +pub fn create_router(state: state::AppState) -> Router { + let default_rate_limit = middleware::RateLimitLayerState::with_per_op( + state.config.ratelimit_default, + state.config.ratelimit_list_buckets, + state.config.ratelimit_bucket_ops, + state.config.ratelimit_object_ops, + state.config.ratelimit_head_ops, + state.config.num_trusted_proxies, + ); + let admin_rate_limit = middleware::RateLimitLayerState::new( + state.config.ratelimit_admin, + state.config.num_trusted_proxies, + ); + + let mut api_router = Router::new() + .route("/myfsio/health", axum::routing::get(handlers::health_check)) + .route("/", axum::routing::get(handlers::list_buckets)) + .route( + "/{bucket}", + axum::routing::put(handlers::create_bucket) + .get(handlers::get_bucket) + .delete(handlers::delete_bucket) + .head(handlers::head_bucket) + .post(handlers::post_bucket), + ) + .route( + "/{bucket}/", + axum::routing::put(handlers::create_bucket) + .get(handlers::get_bucket) + .delete(handlers::delete_bucket) + .head(handlers::head_bucket) + .post(handlers::post_bucket), + ) + .route( + "/{bucket}/{*key}", + axum::routing::put(handlers::put_object) + .get(handlers::get_object) + .delete(handlers::delete_object) + .head(handlers::head_object) + .post(handlers::post_object), + ); + + if state.config.kms_enabled { + api_router = api_router + .route( + "/kms/keys", + axum::routing::get(handlers::kms::list_keys).post(handlers::kms::create_key), + ) + .route( + "/kms/keys/{key_id}", + axum::routing::get(handlers::kms::get_key).delete(handlers::kms::delete_key), + ) + .route( + "/kms/keys/{key_id}/enable", + axum::routing::post(handlers::kms::enable_key), + ) + .route( + "/kms/keys/{key_id}/disable", + axum::routing::post(handlers::kms::disable_key), + ) + .route("/kms/encrypt", axum::routing::post(handlers::kms::encrypt)) + .route("/kms/decrypt", axum::routing::post(handlers::kms::decrypt)) + .route( + "/kms/generate-data-key", + axum::routing::post(handlers::kms::generate_data_key), + ) + .route( + "/kms/generate-data-key-without-plaintext", + axum::routing::post(handlers::kms::generate_data_key_without_plaintext), + ) + .route( + "/kms/re-encrypt", + axum::routing::post(handlers::kms::re_encrypt), + ) + .route( + "/kms/generate-random", + axum::routing::post(handlers::kms::generate_random), + ) + .route( + "/kms/client/generate-key", + axum::routing::post(handlers::kms::client_generate_key), + ) + .route( + "/kms/client/encrypt", + axum::routing::post(handlers::kms::client_encrypt), + ) + .route( + "/kms/client/decrypt", + axum::routing::post(handlers::kms::client_decrypt), + ) + .route( + "/kms/materials/{key_id}", + axum::routing::post(handlers::kms::materials), + ); + } + + api_router = api_router + .layer(axum::middleware::from_fn_with_state( + state.clone(), + middleware::auth_layer, + )) + .layer(axum::middleware::from_fn_with_state( + default_rate_limit, + middleware::rate_limit_layer, + )); + + let admin_router = Router::new() + .route( + "/admin/site", + axum::routing::get(handlers::admin::get_local_site) + .put(handlers::admin::update_local_site), + ) + .route( + "/admin/sites", + axum::routing::get(handlers::admin::list_all_sites) + .post(handlers::admin::register_peer_site), + ) + .route( + "/admin/sites/{site_id}", + axum::routing::get(handlers::admin::get_peer_site) + .put(handlers::admin::update_peer_site) + .delete(handlers::admin::delete_peer_site), + ) + .route( + "/admin/sites/{site_id}/health", + axum::routing::get(handlers::admin::check_peer_health) + .post(handlers::admin::check_peer_health), + ) + .route( + "/admin/sites/{site_id}/bidirectional-status", + axum::routing::get(handlers::admin::check_bidirectional_status), + ) + .route( + "/admin/topology", + axum::routing::get(handlers::admin::get_topology), + ) + .route( + "/admin/site/local", + axum::routing::get(handlers::admin::get_local_site) + .put(handlers::admin::update_local_site), + ) + .route( + "/admin/site/all", + axum::routing::get(handlers::admin::list_all_sites), + ) + .route( + "/admin/site/peers", + axum::routing::post(handlers::admin::register_peer_site), + ) + .route( + "/admin/site/peers/{site_id}", + axum::routing::get(handlers::admin::get_peer_site) + .put(handlers::admin::update_peer_site) + .delete(handlers::admin::delete_peer_site), + ) + .route( + "/admin/site/peers/{site_id}/health", + axum::routing::post(handlers::admin::check_peer_health), + ) + .route( + "/admin/site/topology", + axum::routing::get(handlers::admin::get_topology), + ) + .route( + "/admin/site/peers/{site_id}/bidirectional-status", + axum::routing::get(handlers::admin::check_bidirectional_status), + ) + .route( + "/admin/iam/users", + axum::routing::get(handlers::admin::iam_list_users), + ) + .route( + "/admin/iam/users/{identifier}", + axum::routing::get(handlers::admin::iam_get_user), + ) + .route( + "/admin/iam/users/{identifier}/policies", + axum::routing::get(handlers::admin::iam_get_user_policies), + ) + .route( + "/admin/iam/users/{identifier}/access-keys", + axum::routing::post(handlers::admin::iam_create_access_key), + ) + .route( + "/admin/iam/users/{identifier}/keys", + axum::routing::post(handlers::admin::iam_create_access_key), + ) + .route( + "/admin/iam/users/{identifier}/access-keys/{access_key}", + axum::routing::delete(handlers::admin::iam_delete_access_key), + ) + .route( + "/admin/iam/users/{identifier}/keys/{access_key}", + axum::routing::delete(handlers::admin::iam_delete_access_key), + ) + .route( + "/admin/iam/users/{identifier}/disable", + axum::routing::post(handlers::admin::iam_disable_user), + ) + .route( + "/admin/iam/users/{identifier}/enable", + axum::routing::post(handlers::admin::iam_enable_user), + ) + .route( + "/admin/website-domains", + axum::routing::get(handlers::admin::list_website_domains) + .post(handlers::admin::create_website_domain), + ) + .route( + "/admin/website-domains/{domain}", + axum::routing::get(handlers::admin::get_website_domain) + .put(handlers::admin::update_website_domain) + .delete(handlers::admin::delete_website_domain), + ) + .route( + "/admin/gc/status", + axum::routing::get(handlers::admin::gc_status), + ) + .route( + "/admin/gc/run", + axum::routing::post(handlers::admin::gc_run), + ) + .route( + "/admin/gc/history", + axum::routing::get(handlers::admin::gc_history), + ) + .route( + "/admin/integrity/status", + axum::routing::get(handlers::admin::integrity_status), + ) + .route( + "/admin/integrity/run", + axum::routing::post(handlers::admin::integrity_run), + ) + .route( + "/admin/integrity/history", + axum::routing::get(handlers::admin::integrity_history), + ) + .layer(axum::middleware::from_fn_with_state( + state.clone(), + middleware::auth_layer, + )) + .layer(axum::middleware::from_fn_with_state( + admin_rate_limit, + middleware::rate_limit_layer, + )); + + let request_body_timeout = + std::time::Duration::from_secs(state.config.request_body_timeout_secs); + + api_router + .merge(admin_router) + .layer(axum::middleware::from_fn(middleware::server_header)) + .layer(cors_layer(&state.config)) + .layer(axum::middleware::from_fn_with_state( + state.clone(), + middleware::bucket_cors_layer, + )) + .layer(axum::middleware::from_fn(middleware::request_log_layer)) + .layer(tower_http::compression::CompressionLayer::new()) + .layer(tower_http::timeout::RequestBodyTimeoutLayer::new( + request_body_timeout, + )) + .with_state(state) +} + +fn cors_layer(config: &config::ServerConfig) -> tower_http::cors::CorsLayer { + use axum::http::{HeaderName, HeaderValue, Method}; + use tower_http::cors::{Any, CorsLayer}; + + let mut layer = CorsLayer::new(); + + if config.cors_origins.iter().any(|origin| origin == "*") { + layer = layer.allow_origin(Any); + } else { + let origins = config + .cors_origins + .iter() + .filter_map(|origin| HeaderValue::from_str(origin).ok()) + .collect::>(); + if !origins.is_empty() { + layer = layer.allow_origin(origins); + } + } + + let methods = config + .cors_methods + .iter() + .filter_map(|method| method.parse::().ok()) + .collect::>(); + if !methods.is_empty() { + layer = layer.allow_methods(methods); + } + + if config.cors_allow_headers.iter().any(|header| header == "*") { + layer = layer.allow_headers(Any); + } else { + let headers = config + .cors_allow_headers + .iter() + .filter_map(|header| header.parse::().ok()) + .collect::>(); + if !headers.is_empty() { + layer = layer.allow_headers(headers); + } + } + + if config + .cors_expose_headers + .iter() + .any(|header| header == "*") + { + layer = layer.expose_headers(Any); + } else { + let headers = config + .cors_expose_headers + .iter() + .filter_map(|header| header.parse::().ok()) + .collect::>(); + if !headers.is_empty() { + layer = layer.expose_headers(headers); + } + } + + layer +} diff --git a/crates/myfsio-server/src/main.rs b/crates/myfsio-server/src/main.rs new file mode 100644 index 0000000..0732f3c --- /dev/null +++ b/crates/myfsio-server/src/main.rs @@ -0,0 +1,557 @@ +use clap::{Parser, Subcommand}; +use myfsio_server::config::ServerConfig; +use myfsio_server::state::AppState; + +#[derive(Parser)] +#[command( + name = "myfsio", + version, + about = "MyFSIO S3-compatible storage engine" +)] +struct Cli { + #[arg(long, help = "Validate configuration and exit")] + check_config: bool, + #[arg(long, help = "Show configuration summary and exit")] + show_config: bool, + #[arg(long, help = "Reset admin credentials and exit")] + reset_cred: bool, + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Command { + Serve, + Version, +} + +#[tokio::main] +async fn main() { + load_env_files(); + init_tracing(); + + let cli = Cli::parse(); + let config = ServerConfig::from_env(); + if !config + .ratelimit_storage_uri + .eq_ignore_ascii_case("memory://") + { + tracing::warn!( + "RATE_LIMIT_STORAGE_URI={} is not supported yet; using in-memory rate limits", + config.ratelimit_storage_uri + ); + } + + if cli.reset_cred { + reset_admin_credentials(&config); + return; + } + if cli.check_config || cli.show_config { + print_config_summary(&config); + if cli.check_config { + let issues = validate_config(&config); + for issue in &issues { + println!("{issue}"); + } + if issues.iter().any(|issue| issue.starts_with("CRITICAL:")) { + std::process::exit(1); + } + } + return; + } + + match cli.command.unwrap_or(Command::Serve) { + Command::Version => { + println!("myfsio {}", env!("CARGO_PKG_VERSION")); + return; + } + Command::Serve => {} + } + + ensure_iam_bootstrap(&config); + let bind_addr = config.bind_addr; + let ui_bind_addr = config.ui_bind_addr; + + tracing::info!("MyFSIO Rust Engine starting — API on {}", bind_addr); + if config.ui_enabled { + tracing::info!("UI will bind on {}", ui_bind_addr); + } + tracing::info!("Storage root: {}", config.storage_root.display()); + tracing::info!("Region: {}", config.region); + tracing::info!( + "Encryption: {}, KMS: {}, GC: {}, Lifecycle: {}, Integrity: {}, Metrics History: {}, Operation Metrics: {}, UI: {}", + config.encryption_enabled, + config.kms_enabled, + config.gc_enabled, + config.lifecycle_enabled, + config.integrity_enabled, + config.metrics_history_enabled, + config.metrics_enabled, + config.ui_enabled + ); + + let state = if config.encryption_enabled || config.kms_enabled { + AppState::new_with_encryption(config.clone()).await + } else { + AppState::new(config.clone()) + }; + + let mut bg_handles: Vec> = Vec::new(); + + if let Some(ref gc) = state.gc { + bg_handles.push(gc.clone().start_background()); + tracing::info!("GC background service started"); + } + + if let Some(ref integrity) = state.integrity { + bg_handles.push(integrity.clone().start_background()); + tracing::info!("Integrity checker background service started"); + } + + if let Some(ref metrics) = state.metrics { + bg_handles.push(metrics.clone().start_background()); + tracing::info!("Metrics collector background service started"); + } + + if let Some(ref system_metrics) = state.system_metrics { + bg_handles.push(system_metrics.clone().start_background()); + tracing::info!("System metrics history collector started"); + } + + if config.lifecycle_enabled { + let lifecycle = + std::sync::Arc::new(myfsio_server::services::lifecycle::LifecycleService::new( + state.storage.clone(), + config.storage_root.clone(), + myfsio_server::services::lifecycle::LifecycleConfig { + interval_seconds: 3600, + max_history_per_bucket: config.lifecycle_max_history_per_bucket, + }, + )); + bg_handles.push(lifecycle.start_background()); + tracing::info!("Lifecycle manager background service started"); + } + + if let Some(ref site_sync) = state.site_sync { + let worker = site_sync.clone(); + bg_handles.push(tokio::spawn(async move { + worker.run().await; + })); + tracing::info!("Site sync worker started"); + } + + let ui_enabled = config.ui_enabled; + let api_app = myfsio_server::create_router(state.clone()); + let ui_app = if ui_enabled { + Some(myfsio_server::create_ui_router(state.clone())) + } else { + None + }; + + let api_listener = match tokio::net::TcpListener::bind(bind_addr).await { + Ok(listener) => listener, + Err(err) => { + if err.kind() == std::io::ErrorKind::AddrInUse { + tracing::error!("API port already in use: {}", bind_addr); + } else { + tracing::error!("Failed to bind API {}: {}", bind_addr, err); + } + for handle in bg_handles { + handle.abort(); + } + std::process::exit(1); + } + }; + tracing::info!("API listening on {}", bind_addr); + + let ui_listener = if let Some(ref app) = ui_app { + let _ = app; + match tokio::net::TcpListener::bind(ui_bind_addr).await { + Ok(listener) => { + tracing::info!("UI listening on {}", ui_bind_addr); + Some(listener) + } + Err(err) => { + if err.kind() == std::io::ErrorKind::AddrInUse { + tracing::error!("UI port already in use: {}", ui_bind_addr); + } else { + tracing::error!("Failed to bind UI {}: {}", ui_bind_addr, err); + } + for handle in bg_handles { + handle.abort(); + } + std::process::exit(1); + } + } + } else { + None + }; + + let shutdown = shutdown_signal_shared(); + let api_shutdown = shutdown.clone(); + let api_listener = axum::serve::ListenerExt::tap_io(api_listener, |stream| { + if let Err(err) = stream.set_nodelay(true) { + tracing::trace!("failed to set TCP_NODELAY on api socket: {}", err); + } + }); + let api_task = tokio::spawn(async move { + axum::serve( + api_listener, + api_app.into_make_service_with_connect_info::(), + ) + .with_graceful_shutdown(async move { + api_shutdown.notified().await; + }) + .await + }); + + let ui_task = if let (Some(listener), Some(app)) = (ui_listener, ui_app) { + let ui_shutdown = shutdown.clone(); + let listener = axum::serve::ListenerExt::tap_io(listener, |stream| { + if let Err(err) = stream.set_nodelay(true) { + tracing::trace!("failed to set TCP_NODELAY on ui socket: {}", err); + } + }); + Some(tokio::spawn(async move { + axum::serve(listener, app) + .with_graceful_shutdown(async move { + ui_shutdown.notified().await; + }) + .await + })) + } else { + None + }; + + tokio::signal::ctrl_c() + .await + .expect("Failed to listen for Ctrl+C"); + tracing::info!("Shutdown signal received"); + shutdown.notify_waiters(); + + if let Err(err) = api_task.await.unwrap_or(Ok(())) { + tracing::error!("API server exited with error: {}", err); + } + if let Some(task) = ui_task { + if let Err(err) = task.await.unwrap_or(Ok(())) { + tracing::error!("UI server exited with error: {}", err); + } + } + + for handle in bg_handles { + handle.abort(); + } +} + +fn print_config_summary(config: &ServerConfig) { + println!("MyFSIO Rust Configuration"); + println!("Version: {}", env!("CARGO_PKG_VERSION")); + println!("API bind: {}", config.bind_addr); + println!("UI bind: {}", config.ui_bind_addr); + println!("UI enabled: {}", config.ui_enabled); + println!("Storage root: {}", config.storage_root.display()); + println!("IAM config: {}", config.iam_config_path.display()); + println!("Region: {}", config.region); + println!("Encryption enabled: {}", config.encryption_enabled); + println!( + "Encryption chunk size: {} bytes", + config.encryption_chunk_size_bytes + ); + println!("KMS enabled: {}", config.kms_enabled); + println!( + "KMS data key bounds: {}-{} bytes", + config.kms_generate_data_key_min_bytes, config.kms_generate_data_key_max_bytes + ); + println!("GC enabled: {}", config.gc_enabled); + println!( + "GC interval: {} hours, dry run: {}", + config.gc_interval_hours, config.gc_dry_run + ); + println!("Integrity enabled: {}", config.integrity_enabled); + println!("Lifecycle enabled: {}", config.lifecycle_enabled); + println!( + "Lifecycle history limit: {}", + config.lifecycle_max_history_per_bucket + ); + println!( + "Website hosting enabled: {}", + config.website_hosting_enabled + ); + println!("Site sync enabled: {}", config.site_sync_enabled); + println!("API base URL: {}", config.api_base_url); + println!( + "Object key max: {} bytes, tag limit: {}", + config.object_key_max_length_bytes, config.object_tag_limit + ); + println!( + "Rate limits: default {} per {}s, admin {} per {}s", + config.ratelimit_default.max_requests, + config.ratelimit_default.window_seconds, + config.ratelimit_admin.max_requests, + config.ratelimit_admin.window_seconds + ); + println!( + "Metrics history enabled: {}", + config.metrics_history_enabled + ); + println!("Operation metrics enabled: {}", config.metrics_enabled); +} + +fn validate_config(config: &ServerConfig) -> Vec { + let mut issues = Vec::new(); + + if config.ui_enabled && config.bind_addr == config.ui_bind_addr { + issues.push( + "CRITICAL: API and UI bind addresses cannot be identical when UI is enabled." + .to_string(), + ); + } + if config.presigned_url_min_expiry > config.presigned_url_max_expiry { + issues.push("CRITICAL: PRESIGNED_URL_MIN_EXPIRY_SECONDS cannot exceed PRESIGNED_URL_MAX_EXPIRY_SECONDS.".to_string()); + } + if config.encryption_chunk_size_bytes == 0 { + issues.push("CRITICAL: ENCRYPTION_CHUNK_SIZE_BYTES must be greater than zero.".to_string()); + } + if config.kms_generate_data_key_min_bytes == 0 { + issues.push( + "CRITICAL: KMS_GENERATE_DATA_KEY_MIN_BYTES must be greater than zero.".to_string(), + ); + } + if config.kms_generate_data_key_min_bytes > config.kms_generate_data_key_max_bytes { + issues.push("CRITICAL: KMS_GENERATE_DATA_KEY_MIN_BYTES cannot exceed KMS_GENERATE_DATA_KEY_MAX_BYTES.".to_string()); + } + if config.gc_interval_hours <= 0.0 { + issues.push("CRITICAL: GC_INTERVAL_HOURS must be greater than zero.".to_string()); + } + if config.bucket_config_cache_ttl_seconds < 0.0 { + issues.push("CRITICAL: BUCKET_CONFIG_CACHE_TTL_SECONDS cannot be negative.".to_string()); + } + if !config + .ratelimit_storage_uri + .eq_ignore_ascii_case("memory://") + { + issues.push(format!( + "WARNING: RATE_LIMIT_STORAGE_URI={} is not supported yet; using in-memory limits.", + config.ratelimit_storage_uri + )); + } + if let Err(err) = std::fs::create_dir_all(&config.storage_root) { + issues.push(format!( + "CRITICAL: Cannot create storage root {}: {}", + config.storage_root.display(), + err + )); + } + if let Some(parent) = config.iam_config_path.parent() { + if let Err(err) = std::fs::create_dir_all(parent) { + issues.push(format!( + "CRITICAL: Cannot create IAM config directory {}: {}", + parent.display(), + err + )); + } + } + if config.encryption_enabled && config.secret_key.is_none() { + issues.push( + "WARNING: ENCRYPTION_ENABLED=true but SECRET_KEY is not configured; secure-at-rest config encryption is unavailable.".to_string(), + ); + } + if config.site_sync_enabled && !config.website_hosting_enabled { + issues.push( + "INFO: SITE_SYNC_ENABLED=true without WEBSITE_HOSTING_ENABLED; this is valid but unrelated.".to_string(), + ); + } + + issues +} + +fn init_tracing() { + use tracing_subscriber::EnvFilter; + + let filter = EnvFilter::try_from_env("RUST_LOG") + .or_else(|_| { + EnvFilter::try_new(std::env::var("LOG_LEVEL").unwrap_or_else(|_| "INFO".to_string())) + }) + .unwrap_or_else(|_| EnvFilter::new("INFO")); + tracing_subscriber::fmt().with_env_filter(filter).init(); +} + +fn shutdown_signal_shared() -> std::sync::Arc { + std::sync::Arc::new(tokio::sync::Notify::new()) +} + +fn load_env_files() { + let cwd = std::env::current_dir().ok(); + let mut candidates: Vec = Vec::new(); + candidates.push(std::path::PathBuf::from("/opt/myfsio/myfsio.env")); + if let Some(ref dir) = cwd { + candidates.push(dir.join(".env")); + candidates.push(dir.join("myfsio.env")); + for ancestor in dir.ancestors().skip(1).take(4) { + candidates.push(ancestor.join(".env")); + candidates.push(ancestor.join("myfsio.env")); + } + } + + let mut seen = std::collections::HashSet::new(); + for path in candidates { + if !seen.insert(path.clone()) { + continue; + } + if path.is_file() { + match dotenvy::from_path_override(&path) { + Ok(()) => eprintln!("Loaded env file: {}", path.display()), + Err(e) => eprintln!("Failed to load env file {}: {}", path.display(), e), + } + } + } +} + +fn ensure_iam_bootstrap(config: &ServerConfig) { + let iam_path = &config.iam_config_path; + if iam_path.exists() { + return; + } + + let access_key = std::env::var("ADMIN_ACCESS_KEY") + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| format!("AK{}", uuid::Uuid::new_v4().simple())); + let secret_key = std::env::var("ADMIN_SECRET_KEY") + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| format!("SK{}", uuid::Uuid::new_v4().simple())); + + let user_id = format!("u-{}", &uuid::Uuid::new_v4().simple().to_string()[..16]); + let created_at = chrono::Utc::now().to_rfc3339(); + + let body = serde_json::json!({ + "version": 2, + "users": [{ + "user_id": user_id, + "display_name": "Local Admin", + "enabled": true, + "access_keys": [{ + "access_key": access_key, + "secret_key": secret_key, + "status": "active", + "created_at": created_at, + }], + "policies": [{ + "bucket": "*", + "actions": ["*"], + "prefix": "*", + }] + }] + }); + + let json = match serde_json::to_string_pretty(&body) { + Ok(s) => s, + Err(e) => { + tracing::error!("Failed to serialize IAM bootstrap config: {}", e); + return; + } + }; + + if let Some(parent) = iam_path.parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + tracing::error!( + "Failed to create IAM config dir {}: {}", + parent.display(), + e + ); + return; + } + } + + if let Err(e) = std::fs::write(iam_path, json) { + tracing::error!( + "Failed to write IAM bootstrap config {}: {}", + iam_path.display(), + e + ); + return; + } + + tracing::info!("============================================================"); + tracing::info!("MYFSIO - ADMIN CREDENTIALS INITIALIZED"); + tracing::info!("============================================================"); + tracing::info!("Access Key: {}", access_key); + tracing::info!("Secret Key: {}", secret_key); + tracing::info!("Saved to: {}", iam_path.display()); + tracing::info!("============================================================"); +} + +fn reset_admin_credentials(config: &ServerConfig) { + if let Some(parent) = config.iam_config_path.parent() { + if let Err(err) = std::fs::create_dir_all(parent) { + eprintln!( + "Failed to create IAM config directory {}: {}", + parent.display(), + err + ); + std::process::exit(1); + } + } + + if config.iam_config_path.exists() { + let backup = config + .iam_config_path + .with_extension(format!("bak-{}", chrono::Utc::now().timestamp())); + if let Err(err) = std::fs::rename(&config.iam_config_path, &backup) { + eprintln!( + "Failed to back up existing IAM config {}: {}", + config.iam_config_path.display(), + err + ); + std::process::exit(1); + } + println!("Backed up existing IAM config to {}", backup.display()); + prune_iam_backups(&config.iam_config_path, 5); + } + + ensure_iam_bootstrap(config); + println!("Admin credentials reset."); +} + +fn prune_iam_backups(iam_path: &std::path::Path, keep: usize) { + let parent = match iam_path.parent() { + Some(p) => p, + None => return, + }; + let stem = match iam_path.file_stem().and_then(|s| s.to_str()) { + Some(s) => s, + None => return, + }; + let prefix = format!("{}.bak-", stem); + + let entries = match std::fs::read_dir(parent) { + Ok(entries) => entries, + Err(_) => return, + }; + let mut backups: Vec<(i64, std::path::PathBuf)> = entries + .filter_map(|e| e.ok()) + .filter_map(|e| { + let path = e.path(); + let name = path.file_name()?.to_str()?; + let rest = name.strip_prefix(&prefix)?; + let ts: i64 = rest.parse().ok()?; + Some((ts, path)) + }) + .collect(); + backups.sort_by(|a, b| b.0.cmp(&a.0)); + + for (_, path) in backups.into_iter().skip(keep) { + if let Err(err) = std::fs::remove_file(&path) { + eprintln!( + "Failed to remove old IAM backup {}: {}", + path.display(), + err + ); + } else { + println!("Pruned old IAM backup {}", path.display()); + } + } +} diff --git a/crates/myfsio-server/src/middleware/auth.rs b/crates/myfsio-server/src/middleware/auth.rs new file mode 100644 index 0000000..494c4f5 --- /dev/null +++ b/crates/myfsio-server/src/middleware/auth.rs @@ -0,0 +1,1457 @@ +use axum::extract::{Request, State}; +use axum::http::{header, HeaderMap, Method, StatusCode, Uri}; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; + +use chrono::{NaiveDateTime, Utc}; +use myfsio_auth::sigv4; +use myfsio_common::error::{S3Error, S3ErrorCode}; +use myfsio_common::types::Principal; +use myfsio_storage::traits::StorageEngine; +use serde_json::Value; +use std::time::Instant; +use tokio::io::AsyncReadExt; + +use crate::middleware::sha_body::{is_hex_sha256, Sha256VerifyBody}; +use crate::services::acl::acl_from_bucket_config; +use crate::state::AppState; + +fn wrap_body_for_sha256_verification(req: &mut Request) { + let declared = match req + .headers() + .get("x-amz-content-sha256") + .and_then(|v| v.to_str().ok()) + { + Some(v) => v.to_string(), + None => return, + }; + if !is_hex_sha256(&declared) { + return; + } + let is_chunked = req + .headers() + .get("content-encoding") + .and_then(|v| v.to_str().ok()) + .map(|v| v.to_ascii_lowercase().contains("aws-chunked")) + .unwrap_or(false); + if is_chunked { + return; + } + let body = std::mem::replace(req.body_mut(), axum::body::Body::empty()); + let wrapped = Sha256VerifyBody::new(body, declared); + *req.body_mut() = axum::body::Body::new(wrapped); +} + +#[derive(Clone, Debug)] +struct OriginalCanonicalPath(String); + +fn website_error_response( + status: StatusCode, + body: Option>, + content_type: &str, + include_body: bool, +) -> Response { + let (body, content_type) = match body { + Some(body) => (body, content_type), + None => ( + default_website_error_body(status).into_bytes(), + "text/html; charset=utf-8", + ), + }; + let mut headers = HeaderMap::new(); + headers.insert(header::CONTENT_TYPE, content_type.parse().unwrap()); + headers.insert(header::ACCEPT_RANGES, "bytes".parse().unwrap()); + headers.insert( + header::CONTENT_LENGTH, + body.len().to_string().parse().unwrap(), + ); + if include_body { + (status, headers, body.clone()).into_response() + } else { + (status, headers).into_response() + } +} + +fn default_website_error_body(status: StatusCode) -> String { + let code = status.as_u16(); + if status == StatusCode::NOT_FOUND { + "

404 page not found

".to_string() + } else { + let reason = status.canonical_reason().unwrap_or("Error"); + format!("{code} {reason}") + } +} + +fn parse_range_header(range_header: &str, total_size: u64) -> Option<(u64, u64)> { + let range_spec = range_header.strip_prefix("bytes=")?; + if let Some(suffix) = range_spec.strip_prefix('-') { + let suffix_len: u64 = suffix.parse().ok()?; + if suffix_len == 0 || suffix_len > total_size { + return None; + } + return Some((total_size - suffix_len, total_size - 1)); + } + + let (start_str, end_str) = range_spec.split_once('-')?; + let start: u64 = start_str.parse().ok()?; + let end = if end_str.is_empty() { + total_size.saturating_sub(1) + } else { + end_str + .parse::() + .ok()? + .min(total_size.saturating_sub(1)) + }; + + if start > end || start >= total_size { + return None; + } + Some((start, end)) +} + +fn website_content_type(key: &str, metadata: &std::collections::HashMap) -> String { + metadata + .get("__content_type__") + .filter(|value| !value.trim().is_empty()) + .cloned() + .unwrap_or_else(|| { + mime_guess::from_path(key) + .first_raw() + .unwrap_or("application/octet-stream") + .to_string() + }) +} + +fn parse_website_config(value: &Value) -> Option<(String, Option)> { + match value { + Value::Object(map) => { + let index_document = map + .get("index_document") + .or_else(|| map.get("IndexDocument")) + .and_then(|v| v.as_str()) + .unwrap_or("index.html") + .to_string(); + let error_document = map + .get("error_document") + .or_else(|| map.get("ErrorDocument")) + .and_then(|v| v.as_str()) + .map(|v| v.to_string()); + Some((index_document, error_document)) + } + Value::String(raw) => { + if let Ok(json) = serde_json::from_str::(raw) { + return parse_website_config(&json); + } + let doc = roxmltree::Document::parse(raw).ok()?; + let index_document = doc + .descendants() + .find(|node| node.is_element() && node.tag_name().name() == "Suffix") + .and_then(|node| node.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()) + .unwrap_or_else(|| "index.html".to_string()); + let error_document = doc + .descendants() + .find(|node| node.is_element() && node.tag_name().name() == "Key") + .and_then(|node| node.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()); + Some((index_document, error_document)) + } + _ => None, + } +} + +async fn serve_website_document( + state: &AppState, + bucket: &str, + key: &str, + method: &axum::http::Method, + range_header: Option<&str>, + status: StatusCode, +) -> Option { + let metadata = state.storage.get_object_metadata(bucket, key).await.ok()?; + let (meta, mut reader) = state.storage.get_object(bucket, key).await.ok()?; + let content_type = website_content_type(key, &metadata); + + if method == axum::http::Method::HEAD { + let mut headers = HeaderMap::new(); + headers.insert(header::CONTENT_TYPE, content_type.parse().unwrap()); + headers.insert( + header::CONTENT_LENGTH, + meta.size.to_string().parse().unwrap(), + ); + headers.insert(header::ACCEPT_RANGES, "bytes".parse().unwrap()); + return Some((status, headers).into_response()); + } + + let mut bytes = Vec::new(); + if reader.read_to_end(&mut bytes).await.is_err() { + return None; + } + + let mut headers = HeaderMap::new(); + headers.insert(header::CONTENT_TYPE, content_type.parse().unwrap()); + headers.insert(header::ACCEPT_RANGES, "bytes".parse().unwrap()); + + if status == StatusCode::OK { + if let Some(range_header) = range_header { + let Some((start, end)) = parse_range_header(range_header, bytes.len() as u64) else { + let mut range_headers = HeaderMap::new(); + range_headers.insert( + header::CONTENT_RANGE, + format!("bytes */{}", bytes.len()).parse().unwrap(), + ); + return Some((StatusCode::RANGE_NOT_SATISFIABLE, range_headers).into_response()); + }; + let body = bytes[start as usize..=end as usize].to_vec(); + headers.insert( + header::CONTENT_RANGE, + format!("bytes {}-{}/{}", start, end, bytes.len()) + .parse() + .unwrap(), + ); + headers.insert( + header::CONTENT_LENGTH, + body.len().to_string().parse().unwrap(), + ); + return Some((StatusCode::PARTIAL_CONTENT, headers, body).into_response()); + } + } + + headers.insert( + header::CONTENT_LENGTH, + bytes.len().to_string().parse().unwrap(), + ); + Some((status, headers, bytes).into_response()) +} + +async fn maybe_serve_website( + state: &AppState, + method: Method, + host: String, + uri_path: String, + range_header: Option, +) -> Option { + if !state.config.website_hosting_enabled { + return None; + } + if method != axum::http::Method::GET && method != axum::http::Method::HEAD { + return None; + } + let request_path = uri_path.trim_start_matches('/').to_string(); + let include_error_body = method != axum::http::Method::HEAD; + let store = state.website_domains.as_ref()?; + let bucket = store.get_bucket(&host)?; + if !matches!(state.storage.bucket_exists(&bucket).await, Ok(true)) { + return Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )); + } + + let bucket_config = state.storage.get_bucket_config(&bucket).await.ok()?; + let Some(website_config) = bucket_config.website.as_ref() else { + return Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )); + }; + let Some((index_document, error_document)) = parse_website_config(website_config) else { + return Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )); + }; + + let mut object_key = if request_path.is_empty() || uri_path.ends_with('/') { + if request_path.is_empty() { + index_document.clone() + } else { + format!("{}{}", request_path, index_document) + } + } else { + request_path.clone() + }; + + let exists = state + .storage + .head_object(&bucket, &object_key) + .await + .is_ok(); + if !exists && !request_path.is_empty() && !request_path.ends_with('/') { + let alternate = format!("{}/{}", request_path, index_document); + if state.storage.head_object(&bucket, &alternate).await.is_ok() { + object_key = alternate; + } else if let Some(error_key) = error_document.as_deref() { + return serve_website_document( + state, + &bucket, + error_key, + &method, + range_header.as_deref(), + StatusCode::NOT_FOUND, + ) + .await + .or_else(|| { + Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )) + }); + } else { + return Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )); + } + } else if !exists { + if let Some(error_key) = error_document.as_deref() { + return serve_website_document( + state, + &bucket, + error_key, + &method, + range_header.as_deref(), + StatusCode::NOT_FOUND, + ) + .await + .or_else(|| { + Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )) + }); + } + return Some(website_error_response( + StatusCode::NOT_FOUND, + None, + "text/plain; charset=utf-8", + include_error_body, + )); + } + + serve_website_document( + state, + &bucket, + &object_key, + &method, + range_header.as_deref(), + StatusCode::OK, + ) + .await +} + +fn virtual_host_candidate(host: &str) -> Option { + let (candidate, _) = host.split_once('.')?; + if candidate.is_empty() || matches!(candidate, "www" | "s3" | "api" | "admin" | "kms") { + return None; + } + if myfsio_storage::validation::validate_bucket_name(candidate).is_some() { + return None; + } + Some(candidate.to_string()) +} + +async fn virtual_host_bucket( + state: &AppState, + host: &str, + path: &str, + method: &Method, +) -> Option { + if path.starts_with("/ui") + || path.starts_with("/admin") + || path.starts_with("/kms") + || path.starts_with("/myfsio") + { + return None; + } + + let bucket = virtual_host_candidate(host)?; + if path == format!("/{}", bucket) || path.starts_with(&format!("/{}/", bucket)) { + return None; + } + + match state.storage.bucket_exists(&bucket).await { + Ok(true) => Some(bucket), + Ok(false) if *method == Method::PUT && path == "/" => Some(bucket), + _ => None, + } +} + +fn rewrite_uri_for_virtual_host(uri: &Uri, bucket: &str) -> Option { + let path = uri.path(); + let rewritten_path = if path == "/" { + format!("/{}/", bucket) + } else { + format!("/{}{}", bucket, path) + }; + let path_and_query = match uri.query() { + Some(query) => format!("{}?{}", rewritten_path, query), + None => rewritten_path, + }; + + let mut parts = uri.clone().into_parts(); + parts.path_and_query = Some(path_and_query.parse().ok()?); + Uri::from_parts(parts).ok() +} + +fn sigv4_canonical_path(req: &Request) -> &str { + req.extensions() + .get::() + .map(|path| path.0.as_str()) + .unwrap_or_else(|| req.uri().path()) +} + +pub async fn auth_layer(State(state): State, mut req: Request, next: Next) -> Response { + let start = Instant::now(); + let uri = req.uri().clone(); + let path = uri.path().to_string(); + let method = req.method().clone(); + let query = uri.query().unwrap_or("").to_string(); + let copy_source = req + .headers() + .get("x-amz-copy-source") + .and_then(|v| v.to_str().ok()) + .map(|value| value.to_string()); + let endpoint_type = classify_endpoint(&path, &query); + let bytes_in = req + .headers() + .get(axum::http::header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + + let host = req + .headers() + .get(header::HOST) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.split(':').next()) + .map(|value| value.trim().to_ascii_lowercase()); + let range_header = req + .headers() + .get(header::RANGE) + .and_then(|value| value.to_str().ok()) + .map(|value| value.to_string()); + + let response = if path == "/myfsio/health" { + next.run(req).await + } else if let Some(response) = maybe_serve_website( + &state, + method.clone(), + host.clone().unwrap_or_default(), + path.clone(), + range_header, + ) + .await + { + response + } else { + let auth_path = if let Some(bucket) = + virtual_host_bucket(&state, host.as_deref().unwrap_or_default(), &path, &method).await + { + if let Some(rewritten) = rewrite_uri_for_virtual_host(req.uri(), &bucket) { + req.extensions_mut() + .insert(OriginalCanonicalPath(path.clone())); + *req.uri_mut() = rewritten; + req.uri().path().to_string() + } else { + path.clone() + } + } else { + path.clone() + }; + + match try_auth(&state, &req) { + AuthResult::NoAuth => match authorize_request( + &state, + None, + &method, + &auth_path, + &query, + copy_source.as_deref(), + ) + .await + { + Ok(()) => next.run(req).await, + Err(err) => error_response(err, &auth_path), + }, + AuthResult::Ok(principal) => { + if let Err(err) = authorize_request( + &state, + Some(&principal), + &method, + &auth_path, + &query, + copy_source.as_deref(), + ) + .await + { + error_response(err, &auth_path) + } else { + req.extensions_mut().insert(principal); + wrap_body_for_sha256_verification(&mut req); + next.run(req).await + } + } + AuthResult::Denied(err) => error_response(err, &auth_path), + } + }; + + if let Some(metrics) = &state.metrics { + let latency_ms = start.elapsed().as_secs_f64() * 1000.0; + let status = response.status().as_u16(); + let bytes_out = response + .headers() + .get(axum::http::header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + let error_code = if status >= 400 { + Some(s3_code_for_status(status)) + } else { + None + }; + metrics.record_request( + method.as_str(), + endpoint_type, + status, + latency_ms, + bytes_in, + bytes_out, + error_code, + ); + } + + response +} + +fn classify_endpoint(path: &str, query: &str) -> &'static str { + if path == "/" { + return "list_buckets"; + } + let segments: Vec<&str> = path + .trim_start_matches('/') + .split('/') + .filter(|s| !s.is_empty()) + .collect(); + if segments.is_empty() { + return "other"; + } + if segments.len() == 1 { + if query.contains("uploads") { + return "list_multipart_uploads"; + } + if query.contains("versioning") { + return "bucket_versioning"; + } + if query.contains("lifecycle") { + return "bucket_lifecycle"; + } + if query.contains("policy") { + return "bucket_policy"; + } + if query.contains("website") { + return "bucket_website"; + } + if query.contains("encryption") { + return "bucket_encryption"; + } + if query.contains("replication") { + return "bucket_replication"; + } + return "bucket"; + } + if query.contains("uploadId") { + return "multipart_part"; + } + if query.contains("uploads") { + return "multipart_init"; + } + if query.contains("tagging") { + return "object_tagging"; + } + if query.contains("acl") { + return "object_acl"; + } + "object" +} + +fn s3_code_for_status(status: u16) -> &'static str { + match status { + 400 => "BadRequest", + 401 => "Unauthorized", + 403 => "AccessDenied", + 404 => "NotFound", + 405 => "MethodNotAllowed", + 409 => "Conflict", + 411 => "MissingContentLength", + 412 => "PreconditionFailed", + 413 => "EntityTooLarge", + 416 => "InvalidRange", + 500 => "InternalError", + 501 => "NotImplemented", + 503 => "ServiceUnavailable", + _ => "Other", + } +} + +enum AuthResult { + Ok(Principal), + Denied(S3Error), + NoAuth, +} + +async fn authorize_request( + state: &AppState, + principal: Option<&Principal>, + method: &Method, + path: &str, + query: &str, + copy_source: Option<&str>, +) -> Result<(), S3Error> { + if path == "/myfsio/health" { + return Ok(()); + } + if path == "/" { + if let Some(principal) = principal { + if state.iam.authorize(principal, None, "list", None) { + return Ok(()); + } + return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied")); + } + return Err(S3Error::new( + S3ErrorCode::AccessDenied, + "Missing credentials", + )); + } + + if path.starts_with("/admin/") || path.starts_with("/kms/") { + return if principal.is_some() { + Ok(()) + } else { + Err(S3Error::new( + S3ErrorCode::AccessDenied, + "Missing credentials", + )) + }; + } + + let mut segments = path + .trim_start_matches('/') + .split('/') + .filter(|s| !s.is_empty()); + let bucket = match segments.next() { + Some(b) => b, + None => { + return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied")); + } + }; + let remaining: Vec<&str> = segments.collect(); + + if remaining.is_empty() { + let action = resolve_bucket_action(method, query); + return authorize_action(state, principal, bucket, action, None).await; + } + + let object_key = remaining.join("/"); + if *method == Method::PUT { + if let Some(copy_source) = copy_source { + let source = copy_source.strip_prefix('/').unwrap_or(copy_source); + if let Some((src_bucket, src_key)) = source.split_once('/') { + let source_allowed = + authorize_action(state, principal, src_bucket, "read", Some(src_key)) + .await + .is_ok(); + let dest_allowed = + authorize_action(state, principal, bucket, "write", Some(&object_key)) + .await + .is_ok(); + if source_allowed && dest_allowed { + return Ok(()); + } + return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied")); + } + } + } + + let action = resolve_object_action(method, query); + authorize_action(state, principal, bucket, action, Some(&object_key)).await +} + +async fn authorize_action( + state: &AppState, + principal: Option<&Principal>, + bucket: &str, + action: &str, + object_key: Option<&str>, +) -> Result<(), S3Error> { + let iam_allowed = principal + .map(|principal| { + state + .iam + .authorize(principal, Some(bucket), action, object_key) + }) + .unwrap_or(false); + let policy_decision = evaluate_bucket_policy( + state, + principal.map(|principal| principal.access_key.as_str()), + bucket, + action, + object_key, + ) + .await; + + if matches!(policy_decision, PolicyDecision::Deny) { + return Err(S3Error::new( + S3ErrorCode::AccessDenied, + "Access denied by bucket policy", + )); + } + if iam_allowed || matches!(policy_decision, PolicyDecision::Allow) { + return Ok(()); + } + if evaluate_bucket_acl( + state, + bucket, + principal.map(|principal| principal.access_key.as_str()), + action, + principal.is_some(), + ) + .await + { + return Ok(()); + } + + if principal.is_some() { + Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied")) + } else { + Err(S3Error::new( + S3ErrorCode::AccessDenied, + "Missing credentials", + )) + } +} + +async fn evaluate_bucket_acl( + state: &AppState, + bucket: &str, + principal_id: Option<&str>, + action: &str, + is_authenticated: bool, +) -> bool { + let config = match state.storage.get_bucket_config(bucket).await { + Ok(config) => config, + Err(_) => return false, + }; + let Some(value) = config.acl.as_ref() else { + return false; + }; + let Some(acl) = acl_from_bucket_config(value) else { + return false; + }; + acl.allowed_actions(principal_id, is_authenticated) + .contains(action) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum PolicyDecision { + Allow, + Deny, + Neutral, +} + +async fn evaluate_bucket_policy( + state: &AppState, + access_key: Option<&str>, + bucket: &str, + action: &str, + object_key: Option<&str>, +) -> PolicyDecision { + let config = match state.storage.get_bucket_config(bucket).await { + Ok(config) => config, + Err(_) => return PolicyDecision::Neutral, + }; + let policy: &Value = match config.policy.as_ref() { + Some(policy) => policy, + None => return PolicyDecision::Neutral, + }; + let mut decision = PolicyDecision::Neutral; + + match policy.get("Statement") { + Some(Value::Array(items)) => { + for statement in items.iter() { + match evaluate_policy_statement(statement, access_key, bucket, action, object_key) { + PolicyDecision::Deny => return PolicyDecision::Deny, + PolicyDecision::Allow => decision = PolicyDecision::Allow, + PolicyDecision::Neutral => {} + } + } + } + Some(statement) => { + return evaluate_policy_statement(statement, access_key, bucket, action, object_key); + } + None => return PolicyDecision::Neutral, + } + + decision +} + +fn evaluate_policy_statement( + statement: &Value, + access_key: Option<&str>, + bucket: &str, + action: &str, + object_key: Option<&str>, +) -> PolicyDecision { + if !statement_matches_principal(statement, access_key) + || !statement_matches_action(statement, action) + || !statement_matches_resource(statement, bucket, object_key) + { + return PolicyDecision::Neutral; + } + + match statement + .get("Effect") + .and_then(|value| value.as_str()) + .map(|value| value.to_ascii_lowercase()) + .as_deref() + { + Some("deny") => PolicyDecision::Deny, + Some("allow") => PolicyDecision::Allow, + _ => PolicyDecision::Neutral, + } +} + +fn statement_matches_principal(statement: &Value, access_key: Option<&str>) -> bool { + match statement.get("Principal") { + Some(principal) => principal_value_matches(principal, access_key), + None => false, + } +} + +fn principal_value_matches(value: &Value, access_key: Option<&str>) -> bool { + match value { + Value::String(token) => token == "*" || access_key == Some(token.as_str()), + Value::Array(items) => items + .iter() + .any(|item| principal_value_matches(item, access_key)), + Value::Object(map) => map + .values() + .any(|item| principal_value_matches(item, access_key)), + _ => false, + } +} + +fn statement_matches_action(statement: &Value, action: &str) -> bool { + match statement.get("Action") { + Some(Value::String(value)) => policy_action_matches(value, action), + Some(Value::Array(items)) => items.iter().any(|item| { + item.as_str() + .map(|value| policy_action_matches(value, action)) + .unwrap_or(false) + }), + _ => false, + } +} + +fn policy_action_matches(policy_action: &str, requested_action: &str) -> bool { + let normalized_policy_action = normalize_policy_action(policy_action); + normalized_policy_action == "*" || normalized_policy_action == requested_action +} + +fn normalize_policy_action(action: &str) -> String { + let normalized = action.trim().to_ascii_lowercase(); + if normalized == "*" { + return normalized; + } + match normalized.as_str() { + "s3:listbucket" + | "s3:listallmybuckets" + | "s3:listbucketversions" + | "s3:listmultipartuploads" + | "s3:listparts" => "list".to_string(), + "s3:getobject" + | "s3:getobjectversion" + | "s3:getobjecttagging" + | "s3:getobjectversiontagging" + | "s3:getobjectacl" + | "s3:getbucketversioning" + | "s3:headobject" + | "s3:headbucket" => "read".to_string(), + "s3:putobject" + | "s3:createbucket" + | "s3:putobjecttagging" + | "s3:putbucketversioning" + | "s3:createmultipartupload" + | "s3:uploadpart" + | "s3:completemultipartupload" + | "s3:abortmultipartupload" + | "s3:copyobject" => "write".to_string(), + "s3:deleteobject" + | "s3:deleteobjectversion" + | "s3:deletebucket" + | "s3:deleteobjecttagging" => "delete".to_string(), + "s3:putobjectacl" | "s3:putbucketacl" | "s3:getbucketacl" => "share".to_string(), + "s3:putbucketpolicy" | "s3:getbucketpolicy" | "s3:deletebucketpolicy" => { + "policy".to_string() + } + "s3:getreplicationconfiguration" + | "s3:putreplicationconfiguration" + | "s3:deletereplicationconfiguration" + | "s3:replicateobject" + | "s3:replicatetags" + | "s3:replicatedelete" => "replication".to_string(), + "s3:getlifecycleconfiguration" + | "s3:putlifecycleconfiguration" + | "s3:deletelifecycleconfiguration" + | "s3:getbucketlifecycle" + | "s3:putbucketlifecycle" => "lifecycle".to_string(), + "s3:getbucketcors" | "s3:putbucketcors" | "s3:deletebucketcors" => "cors".to_string(), + other => other.to_string(), + } +} + +fn statement_matches_resource(statement: &Value, bucket: &str, object_key: Option<&str>) -> bool { + match statement.get("Resource") { + Some(Value::String(resource)) => resource_matches(resource, bucket, object_key), + Some(Value::Array(items)) => items.iter().any(|item| { + item.as_str() + .map(|resource| resource_matches(resource, bucket, object_key)) + .unwrap_or(false) + }), + _ => false, + } +} + +fn resource_matches(resource: &str, bucket: &str, object_key: Option<&str>) -> bool { + let remainder = match resource.strip_prefix("arn:aws:s3:::") { + Some(value) => value, + None => return false, + }; + + match remainder.split_once('/') { + Some((resource_bucket, resource_key)) => object_key + .map(|key| wildcard_match(bucket, resource_bucket) && wildcard_match(key, resource_key)) + .unwrap_or(false), + None => object_key.is_none() && wildcard_match(bucket, remainder), + } +} + +fn wildcard_match(value: &str, pattern: &str) -> bool { + let value = value.as_bytes(); + let pattern = pattern.as_bytes(); + let mut value_idx = 0usize; + let mut pattern_idx = 0usize; + let mut star_idx: Option = None; + let mut match_idx = 0usize; + + while value_idx < value.len() { + if pattern_idx < pattern.len() + && (pattern[pattern_idx] == b'?' + || pattern[pattern_idx].eq_ignore_ascii_case(&value[value_idx])) + { + value_idx += 1; + pattern_idx += 1; + } else if pattern_idx < pattern.len() && pattern[pattern_idx] == b'*' { + star_idx = Some(pattern_idx); + pattern_idx += 1; + match_idx = value_idx; + } else if let Some(star) = star_idx { + pattern_idx = star + 1; + match_idx += 1; + value_idx = match_idx; + } else { + return false; + } + } + + while pattern_idx < pattern.len() && pattern[pattern_idx] == b'*' { + pattern_idx += 1; + } + + pattern_idx == pattern.len() +} + +fn resolve_bucket_action(method: &Method, query: &str) -> &'static str { + if has_query_key(query, "versioning") { + return "versioning"; + } + if has_query_key(query, "tagging") { + return "tagging"; + } + if has_query_key(query, "cors") { + return "cors"; + } + if has_query_key(query, "location") { + return "list"; + } + if has_query_key(query, "encryption") { + return "encryption"; + } + if has_query_key(query, "lifecycle") { + return "lifecycle"; + } + if has_query_key(query, "acl") { + return "share"; + } + if has_query_key(query, "policy") || has_query_key(query, "policyStatus") { + return "policy"; + } + if has_query_key(query, "replication") { + return "replication"; + } + if has_query_key(query, "quota") { + return "quota"; + } + if has_query_key(query, "website") { + return "website"; + } + if has_query_key(query, "object-lock") { + return "object_lock"; + } + if has_query_key(query, "notification") { + return "notification"; + } + if has_query_key(query, "logging") { + return "logging"; + } + if has_query_key(query, "versions") || has_query_key(query, "uploads") { + return "list"; + } + if has_query_key(query, "delete") { + return "delete"; + } + + match *method { + Method::GET => "list", + Method::HEAD => "read", + Method::PUT => "create_bucket", + Method::DELETE => "delete_bucket", + Method::POST => "write", + _ => "list", + } +} + +fn resolve_object_action(method: &Method, query: &str) -> &'static str { + if has_query_key(query, "tagging") { + return if *method == Method::GET { + "read" + } else { + "write" + }; + } + if has_query_key(query, "acl") { + return if *method == Method::GET { + "read" + } else { + "write" + }; + } + if has_query_key(query, "retention") || has_query_key(query, "legal-hold") { + return "object_lock"; + } + if has_query_key(query, "attributes") { + return "read"; + } + if has_query_key(query, "uploads") || has_query_key(query, "uploadId") { + return match *method { + Method::GET => "read", + _ => "write", + }; + } + if has_query_key(query, "select") { + return "read"; + } + + match *method { + Method::GET | Method::HEAD => "read", + Method::PUT => "write", + Method::DELETE => "delete", + Method::POST => "write", + _ => "read", + } +} + +fn has_query_key(query: &str, key: &str) -> bool { + if query.is_empty() { + return false; + } + query + .split('&') + .filter(|part| !part.is_empty()) + .any(|part| part == key || part.starts_with(&format!("{}=", key))) +} + +fn try_auth(state: &AppState, req: &Request) -> AuthResult { + if let Some(auth_header) = req.headers().get("authorization") { + if let Ok(auth_str) = auth_header.to_str() { + if auth_str.starts_with("AWS4-HMAC-SHA256 ") { + return verify_sigv4_header(state, req, auth_str); + } + } + } + + let query = req.uri().query().unwrap_or(""); + if query.contains("X-Amz-Algorithm=AWS4-HMAC-SHA256") { + return verify_sigv4_query(state, req); + } + + if let (Some(ak), Some(sk)) = ( + req.headers() + .get("x-access-key") + .and_then(|v| v.to_str().ok()), + req.headers() + .get("x-secret-key") + .and_then(|v| v.to_str().ok()), + ) { + return match state.iam.authenticate(ak, sk) { + Some(principal) => AuthResult::Ok(principal), + None => AuthResult::Denied(S3Error::from_code(S3ErrorCode::SignatureDoesNotMatch)), + }; + } + + AuthResult::NoAuth +} + +fn verify_sigv4_header(state: &AppState, req: &Request, auth_str: &str) -> AuthResult { + let parts: Vec<&str> = auth_str + .strip_prefix("AWS4-HMAC-SHA256 ") + .unwrap() + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect(); + + if parts.len() != 3 { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Malformed Authorization header", + )); + } + + let mut credential: &str = ""; + let mut signed_headers_str: &str = ""; + let mut provided_signature: &str = ""; + for part in &parts { + if let Some(v) = part.strip_prefix("Credential=") { + credential = v; + } else if let Some(v) = part.strip_prefix("SignedHeaders=") { + signed_headers_str = v; + } else if let Some(v) = part.strip_prefix("Signature=") { + provided_signature = v; + } + } + if credential.is_empty() || signed_headers_str.is_empty() || provided_signature.is_empty() { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Malformed Authorization header", + )); + } + + let cred_parts: Vec<&str> = credential.split('/').collect(); + if cred_parts.len() != 5 { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Malformed credential", + )); + } + + let access_key = cred_parts[0]; + let date_stamp = cred_parts[1]; + let region = cred_parts[2]; + let service = cred_parts[3]; + + let amz_date = req + .headers() + .get("x-amz-date") + .or_else(|| req.headers().get("date")) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + if amz_date.is_empty() { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::AccessDenied, + "Missing Date header", + )); + } + + if let Some(err) = + check_timestamp_freshness(amz_date, state.config.sigv4_timestamp_tolerance_secs) + { + return AuthResult::Denied(err); + } + + let secret_key = match state.iam.get_secret_key(access_key) { + Some(sk) => sk, + None => { + return AuthResult::Denied(S3Error::from_code(S3ErrorCode::InvalidAccessKeyId)); + } + }; + + let method = req.method().as_str(); + let canonical_uri = sigv4_canonical_path(req); + + let query_params = parse_query_params(req.uri().query().unwrap_or("")); + + let payload_hash = req + .headers() + .get("x-amz-content-sha256") + .and_then(|v| v.to_str().ok()) + .unwrap_or("UNSIGNED-PAYLOAD"); + + let signed_headers: Vec<&str> = signed_headers_str.split(';').collect(); + let header_values: Vec<(String, String)> = signed_headers + .iter() + .map(|&name| { + let value = req + .headers() + .get(name) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + (name.to_string(), value.to_string()) + }) + .collect(); + + let verified = sigv4::verify_sigv4_signature( + method, + canonical_uri, + &query_params, + signed_headers_str, + &header_values, + payload_hash, + amz_date, + date_stamp, + region, + service, + &secret_key, + provided_signature, + ); + + if !verified { + return AuthResult::Denied(S3Error::from_code(S3ErrorCode::SignatureDoesNotMatch)); + } + + match state.iam.get_principal(access_key) { + Some(p) => AuthResult::Ok(p), + None => AuthResult::Denied(S3Error::from_code(S3ErrorCode::InvalidAccessKeyId)), + } +} + +fn verify_sigv4_query(state: &AppState, req: &Request) -> AuthResult { + let query = req.uri().query().unwrap_or(""); + let params = parse_query_params(query); + let param_map: std::collections::HashMap<&str, &str> = params + .iter() + .map(|(k, v)| (k.as_str(), v.as_str())) + .collect(); + + let credential = match param_map.get("X-Amz-Credential") { + Some(c) => *c, + None => { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing X-Amz-Credential", + )); + } + }; + + let signed_headers_str = param_map + .get("X-Amz-SignedHeaders") + .copied() + .unwrap_or("host"); + let provided_signature = match param_map.get("X-Amz-Signature") { + Some(s) => *s, + None => { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing X-Amz-Signature", + )); + } + }; + let amz_date = match param_map.get("X-Amz-Date") { + Some(d) => *d, + None => { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing X-Amz-Date", + )); + } + }; + let expires_str = match param_map.get("X-Amz-Expires") { + Some(e) => *e, + None => { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Missing X-Amz-Expires", + )); + } + }; + + let cred_parts: Vec<&str> = credential.split('/').collect(); + if cred_parts.len() != 5 { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Malformed credential", + )); + } + + let access_key = cred_parts[0]; + let date_stamp = cred_parts[1]; + let region = cred_parts[2]; + let service = cred_parts[3]; + + let expires: u64 = match expires_str.parse() { + Ok(e) => e, + Err(_) => { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "Invalid X-Amz-Expires", + )); + } + }; + + if expires < state.config.presigned_url_min_expiry + || expires > state.config.presigned_url_max_expiry + { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::InvalidArgument, + "X-Amz-Expires out of range", + )); + } + + if let Ok(request_time) = NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ") { + let request_utc = request_time.and_utc(); + let now = Utc::now(); + let elapsed = (now - request_utc).num_seconds(); + if elapsed > expires as i64 { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::AccessDenied, + "Request has expired", + )); + } + if elapsed < -(state.config.sigv4_timestamp_tolerance_secs as i64) { + return AuthResult::Denied(S3Error::new( + S3ErrorCode::RequestTimeTooSkewed, + "Request is too far in the future", + )); + } + } + + let secret_key = match state.iam.get_secret_key(access_key) { + Some(sk) => sk, + None => { + return AuthResult::Denied(S3Error::from_code(S3ErrorCode::InvalidAccessKeyId)); + } + }; + + let method = req.method().as_str(); + let canonical_uri = sigv4_canonical_path(req); + + let query_params_no_sig: Vec<(String, String)> = params + .iter() + .filter(|(k, _)| k != "X-Amz-Signature") + .cloned() + .collect(); + + let payload_hash = "UNSIGNED-PAYLOAD"; + + let signed_headers: Vec<&str> = signed_headers_str.split(';').collect(); + let header_values: Vec<(String, String)> = signed_headers + .iter() + .map(|&name| { + let value = req + .headers() + .get(name) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + (name.to_string(), value.to_string()) + }) + .collect(); + + let verified = sigv4::verify_sigv4_signature( + method, + canonical_uri, + &query_params_no_sig, + signed_headers_str, + &header_values, + payload_hash, + amz_date, + date_stamp, + region, + service, + &secret_key, + provided_signature, + ); + + if !verified { + return AuthResult::Denied(S3Error::from_code(S3ErrorCode::SignatureDoesNotMatch)); + } + + match state.iam.get_principal(access_key) { + Some(p) => AuthResult::Ok(p), + None => AuthResult::Denied(S3Error::from_code(S3ErrorCode::InvalidAccessKeyId)), + } +} + +fn check_timestamp_freshness(amz_date: &str, tolerance_secs: u64) -> Option { + let request_time = NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ").ok()?; + let request_utc = request_time.and_utc(); + let now = Utc::now(); + let diff = (now - request_utc).num_seconds().unsigned_abs(); + + if diff > tolerance_secs { + return Some(S3Error::new( + S3ErrorCode::RequestTimeTooSkewed, + format!( + "The difference between the request time and the server's time is too large ({}s, tolerance {}s)", + diff, tolerance_secs + ), + )); + } + None +} + +fn parse_query_params(query: &str) -> Vec<(String, String)> { + if query.is_empty() { + return Vec::new(); + } + query + .split('&') + .filter_map(|pair| { + let mut parts = pair.splitn(2, '='); + let key = parts.next()?; + let value = parts.next().unwrap_or(""); + Some((urlencoding_decode(key), urlencoding_decode(value))) + }) + .collect() +} + +fn urlencoding_decode(s: &str) -> String { + percent_encoding::percent_decode_str(s) + .decode_utf8_lossy() + .into_owned() +} + +fn error_response(err: S3Error, resource: &str) -> Response { + let status = + StatusCode::from_u16(err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR); + let request_id = uuid::Uuid::new_v4().simple().to_string(); + let body = err + .with_resource(resource.to_string()) + .with_request_id(request_id) + .to_xml(); + (status, [("content-type", "application/xml")], body).into_response() +} diff --git a/crates/myfsio-server/src/middleware/bucket_cors.rs b/crates/myfsio-server/src/middleware/bucket_cors.rs new file mode 100644 index 0000000..0da5545 --- /dev/null +++ b/crates/myfsio-server/src/middleware/bucket_cors.rs @@ -0,0 +1,281 @@ +use axum::extract::{Request, State}; +use axum::http::{HeaderMap, HeaderValue, Method, StatusCode}; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; +use myfsio_storage::traits::StorageEngine; + +use crate::state::AppState; + +#[derive(Debug, Default, Clone)] +struct CorsRule { + allowed_origins: Vec, + allowed_methods: Vec, + allowed_headers: Vec, + expose_headers: Vec, + max_age_seconds: Option, +} + +fn parse_cors_config(xml: &str) -> Vec { + let doc = match roxmltree::Document::parse(xml) { + Ok(d) => d, + Err(_) => return Vec::new(), + }; + let mut rules = Vec::new(); + for rule_node in doc + .descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "CORSRule") + { + let mut rule = CorsRule::default(); + for child in rule_node.children().filter(|n| n.is_element()) { + let text = child.text().unwrap_or("").trim().to_string(); + match child.tag_name().name() { + "AllowedOrigin" => rule.allowed_origins.push(text), + "AllowedMethod" => rule.allowed_methods.push(text.to_ascii_uppercase()), + "AllowedHeader" => rule.allowed_headers.push(text), + "ExposeHeader" => rule.expose_headers.push(text), + "MaxAgeSeconds" => { + if let Ok(v) = text.parse::() { + rule.max_age_seconds = Some(v); + } + } + _ => {} + } + } + rules.push(rule); + } + rules +} + +fn match_origin(pattern: &str, origin: &str) -> bool { + if pattern == "*" { + return true; + } + if pattern == origin { + return true; + } + if let Some(suffix) = pattern.strip_prefix('*') { + return origin.ends_with(suffix); + } + if let Some(prefix) = pattern.strip_suffix('*') { + return origin.starts_with(prefix); + } + false +} + +fn match_header(pattern: &str, header: &str) -> bool { + if pattern == "*" { + return true; + } + pattern.eq_ignore_ascii_case(header) +} + +fn find_matching_rule<'a>( + rules: &'a [CorsRule], + origin: &str, + method: &str, + request_headers: &[&str], +) -> Option<&'a CorsRule> { + rules.iter().find(|rule| { + let origin_match = rule + .allowed_origins + .iter() + .any(|p| match_origin(p, origin)); + if !origin_match { + return false; + } + let method_match = rule + .allowed_methods + .iter() + .any(|m| m.eq_ignore_ascii_case(method)); + if !method_match { + return false; + } + request_headers.iter().all(|h| { + rule.allowed_headers + .iter() + .any(|pattern| match_header(pattern, h)) + }) + }) +} + +fn find_matching_rule_for_actual<'a>( + rules: &'a [CorsRule], + origin: &str, + method: &str, +) -> Option<&'a CorsRule> { + rules.iter().find(|rule| { + rule.allowed_origins + .iter() + .any(|p| match_origin(p, origin)) + && rule + .allowed_methods + .iter() + .any(|m| m.eq_ignore_ascii_case(method)) + }) +} + +fn bucket_from_path(path: &str) -> Option<&str> { + let trimmed = path.trim_start_matches('/'); + if trimmed.is_empty() { + return None; + } + if trimmed.starts_with("admin/") + || trimmed.starts_with("myfsio/") + || trimmed.starts_with("kms/") + { + return None; + } + let first = trimmed.split('/').next().unwrap_or(""); + if myfsio_storage::validation::validate_bucket_name(first).is_some() { + return None; + } + Some(first) +} + +async fn bucket_from_host(state: &AppState, headers: &HeaderMap) -> Option { + let host = headers + .get("host") + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.split(':').next())? + .trim() + .to_ascii_lowercase(); + let (candidate, _) = host.split_once('.')?; + if myfsio_storage::validation::validate_bucket_name(candidate).is_some() { + return None; + } + match state.storage.bucket_exists(candidate).await { + Ok(true) => Some(candidate.to_string()), + _ => None, + } +} + +async fn resolve_bucket(state: &AppState, headers: &HeaderMap, path: &str) -> Option { + if let Some(name) = bucket_from_host(state, headers).await { + return Some(name); + } + bucket_from_path(path).map(str::to_string) +} + +fn apply_rule_headers(headers: &mut axum::http::HeaderMap, rule: &CorsRule, origin: &str) { + headers.remove("access-control-allow-origin"); + headers.remove("vary"); + if let Ok(val) = HeaderValue::from_str(origin) { + headers.insert("access-control-allow-origin", val); + } + headers.insert("vary", HeaderValue::from_static("Origin")); + if !rule.expose_headers.is_empty() { + let value = rule.expose_headers.join(", "); + if let Ok(val) = HeaderValue::from_str(&value) { + headers.remove("access-control-expose-headers"); + headers.insert("access-control-expose-headers", val); + } + } +} + +fn strip_cors_response_headers(headers: &mut HeaderMap) { + headers.remove("access-control-allow-origin"); + headers.remove("access-control-allow-credentials"); + headers.remove("access-control-expose-headers"); + headers.remove("access-control-allow-methods"); + headers.remove("access-control-allow-headers"); + headers.remove("access-control-max-age"); +} + +pub async fn bucket_cors_layer( + State(state): State, + req: Request, + next: Next, +) -> Response { + let path = req.uri().path().to_string(); + let bucket = match resolve_bucket(&state, req.headers(), &path).await { + Some(name) => name, + None => return next.run(req).await, + }; + + let origin = req + .headers() + .get("origin") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + + let bucket_rules = if origin.is_some() { + match state.storage.get_bucket_config(&bucket).await { + Ok(cfg) => cfg + .cors + .as_ref() + .map(|v| match v { + serde_json::Value::String(s) => s.clone(), + other => other.to_string(), + }) + .map(|xml| parse_cors_config(&xml)) + .filter(|rules| !rules.is_empty()), + Err(_) => None, + } + } else { + None + }; + + let is_preflight = req.method() == Method::OPTIONS + && req.headers().contains_key("access-control-request-method"); + + if is_preflight { + if let (Some(origin), Some(rules)) = (origin.as_deref(), bucket_rules.as_ref()) { + let req_method = req + .headers() + .get("access-control-request-method") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let req_headers_raw = req + .headers() + .get("access-control-request-headers") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let req_headers: Vec<&str> = req_headers_raw + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect(); + + if let Some(rule) = find_matching_rule(rules, origin, req_method, &req_headers) { + let mut resp = StatusCode::NO_CONTENT.into_response(); + apply_rule_headers(resp.headers_mut(), rule, origin); + let methods_value = rule.allowed_methods.join(", "); + if let Ok(val) = HeaderValue::from_str(&methods_value) { + resp.headers_mut() + .insert("access-control-allow-methods", val); + } + let headers_value = if rule.allowed_headers.iter().any(|h| h == "*") { + req_headers_raw.to_string() + } else { + rule.allowed_headers.join(", ") + }; + if !headers_value.is_empty() { + if let Ok(val) = HeaderValue::from_str(&headers_value) { + resp.headers_mut() + .insert("access-control-allow-headers", val); + } + } + if let Some(max_age) = rule.max_age_seconds { + if let Ok(val) = HeaderValue::from_str(&max_age.to_string()) { + resp.headers_mut().insert("access-control-max-age", val); + } + } + return resp; + } + return (StatusCode::FORBIDDEN, "CORSResponse: CORS is not enabled").into_response(); + } + } + + let method = req.method().clone(); + let mut resp = next.run(req).await; + + if let (Some(origin), Some(rules)) = (origin.as_deref(), bucket_rules.as_ref()) { + if let Some(rule) = find_matching_rule_for_actual(rules, origin, method.as_str()) { + apply_rule_headers(resp.headers_mut(), rule, origin); + } else { + strip_cors_response_headers(resp.headers_mut()); + } + } + + resp +} diff --git a/crates/myfsio-server/src/middleware/mod.rs b/crates/myfsio-server/src/middleware/mod.rs new file mode 100644 index 0000000..af9adeb --- /dev/null +++ b/crates/myfsio-server/src/middleware/mod.rs @@ -0,0 +1,126 @@ +mod auth; +mod bucket_cors; +pub mod ratelimit; +pub mod session; +pub(crate) mod sha_body; + +pub use auth::auth_layer; +pub use bucket_cors::bucket_cors_layer; +pub use ratelimit::{rate_limit_layer, RateLimitLayerState}; +pub use session::{csrf_layer, session_layer, SessionHandle, SessionLayerState}; + +use axum::extract::{Request, State}; +use axum::middleware::Next; +use axum::response::Response; +use std::time::Instant; + +use crate::state::AppState; + +pub async fn server_header(req: Request, next: Next) -> Response { + let mut resp = next.run(req).await; + resp.headers_mut() + .insert("server", crate::SERVER_HEADER.parse().unwrap()); + resp +} + +pub async fn request_log_layer(req: Request, next: Next) -> Response { + let start = Instant::now(); + let method = req.method().clone(); + let uri = req.uri().clone(); + let version = req.version(); + let remote = req + .extensions() + .get::>() + .map(|ci| ci.0.ip().to_string()) + .unwrap_or_else(|| "-".to_string()); + + let response = next.run(req).await; + + let status = response.status().as_u16(); + let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0; + let bytes_out = response + .headers() + .get(axum::http::header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + + tracing::info!( + target: "myfsio::access", + remote = %remote, + method = %method, + uri = %uri, + version = ?version, + status, + bytes_out = bytes_out.unwrap_or(0), + elapsed_ms = format!("{:.3}", elapsed_ms), + "request" + ); + + response +} + +pub async fn ui_metrics_layer(State(state): State, req: Request, next: Next) -> Response { + let metrics = match state.metrics.clone() { + Some(m) => m, + None => return next.run(req).await, + }; + let start = Instant::now(); + let method = req.method().clone(); + let path = req.uri().path().to_string(); + let endpoint_type = classify_ui_endpoint(&path); + let bytes_in = req + .headers() + .get(axum::http::header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + + let response = next.run(req).await; + + let latency_ms = start.elapsed().as_secs_f64() * 1000.0; + let status = response.status().as_u16(); + let bytes_out = response + .headers() + .get(axum::http::header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + let error_code = if status >= 400 { Some("UIError") } else { None }; + metrics.record_request( + method.as_str(), + endpoint_type, + status, + latency_ms, + bytes_in, + bytes_out, + error_code, + ); + + response +} + +fn classify_ui_endpoint(path: &str) -> &'static str { + if path.contains("/upload") { + "ui_upload" + } else if path.starts_with("/ui/buckets/") { + "ui_bucket" + } else if path.starts_with("/ui/iam") { + "ui_iam" + } else if path.starts_with("/ui/sites") { + "ui_sites" + } else if path.starts_with("/ui/connections") { + "ui_connections" + } else if path.starts_with("/ui/metrics") { + "ui_metrics" + } else if path.starts_with("/ui/system") { + "ui_system" + } else if path.starts_with("/ui/website-domains") { + "ui_website_domains" + } else if path.starts_with("/ui/replication") { + "ui_replication" + } else if path.starts_with("/login") || path.starts_with("/logout") { + "ui_auth" + } else { + "ui_other" + } +} diff --git a/crates/myfsio-server/src/middleware/ratelimit.rs b/crates/myfsio-server/src/middleware/ratelimit.rs new file mode 100644 index 0000000..e1c624d --- /dev/null +++ b/crates/myfsio-server/src/middleware/ratelimit.rs @@ -0,0 +1,313 @@ +use std::collections::HashMap; +use std::net::SocketAddr; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use axum::extract::{ConnectInfo, Request, State}; +use axum::http::{header, Method, StatusCode}; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; +use parking_lot::Mutex; + +use crate::config::RateLimitSetting; + +#[derive(Clone)] +pub struct RateLimitLayerState { + default_limiter: Arc, + list_buckets_limiter: Option>, + bucket_ops_limiter: Option>, + object_ops_limiter: Option>, + head_ops_limiter: Option>, + num_trusted_proxies: usize, +} + +impl RateLimitLayerState { + pub fn new(setting: RateLimitSetting, num_trusted_proxies: usize) -> Self { + Self { + default_limiter: Arc::new(FixedWindowLimiter::new(setting)), + list_buckets_limiter: None, + bucket_ops_limiter: None, + object_ops_limiter: None, + head_ops_limiter: None, + num_trusted_proxies, + } + } + + pub fn with_per_op( + default: RateLimitSetting, + list_buckets: RateLimitSetting, + bucket_ops: RateLimitSetting, + object_ops: RateLimitSetting, + head_ops: RateLimitSetting, + num_trusted_proxies: usize, + ) -> Self { + Self { + default_limiter: Arc::new(FixedWindowLimiter::new(default)), + list_buckets_limiter: (list_buckets != default) + .then(|| Arc::new(FixedWindowLimiter::new(list_buckets))), + bucket_ops_limiter: (bucket_ops != default) + .then(|| Arc::new(FixedWindowLimiter::new(bucket_ops))), + object_ops_limiter: (object_ops != default) + .then(|| Arc::new(FixedWindowLimiter::new(object_ops))), + head_ops_limiter: (head_ops != default) + .then(|| Arc::new(FixedWindowLimiter::new(head_ops))), + num_trusted_proxies, + } + } + + fn select_limiter(&self, req: &Request) -> &Arc { + let path = req.uri().path(); + let method = req.method(); + if path == "/" && *method == Method::GET { + if let Some(ref limiter) = self.list_buckets_limiter { + return limiter; + } + } + let segments: Vec<&str> = path + .trim_start_matches('/') + .split('/') + .filter(|s| !s.is_empty()) + .collect(); + if *method == Method::HEAD { + if let Some(ref limiter) = self.head_ops_limiter { + return limiter; + } + } + if segments.len() == 1 { + if let Some(ref limiter) = self.bucket_ops_limiter { + return limiter; + } + } else if segments.len() >= 2 { + if let Some(ref limiter) = self.object_ops_limiter { + return limiter; + } + } + &self.default_limiter + } +} + +#[derive(Debug)] +struct FixedWindowLimiter { + setting: RateLimitSetting, + state: Mutex, +} + +#[derive(Debug)] +struct LimiterState { + entries: HashMap, + last_sweep: Instant, +} + +#[derive(Debug, Clone, Copy)] +struct LimitEntry { + window_started: Instant, + count: u32, +} + +const SWEEP_MIN_INTERVAL: Duration = Duration::from_secs(60); +const SWEEP_ENTRY_THRESHOLD: usize = 1024; + +impl FixedWindowLimiter { + fn new(setting: RateLimitSetting) -> Self { + Self { + setting, + state: Mutex::new(LimiterState { + entries: HashMap::new(), + last_sweep: Instant::now(), + }), + } + } + + fn check(&self, key: &str) -> Result<(), u64> { + let now = Instant::now(); + let window = Duration::from_secs(self.setting.window_seconds.max(1)); + let mut state = self.state.lock(); + + if state.entries.len() >= SWEEP_ENTRY_THRESHOLD + && now.duration_since(state.last_sweep) >= SWEEP_MIN_INTERVAL + { + state + .entries + .retain(|_, entry| now.duration_since(entry.window_started) < window); + state.last_sweep = now; + } + + let entry = state.entries.entry(key.to_string()).or_insert(LimitEntry { + window_started: now, + count: 0, + }); + + if now.duration_since(entry.window_started) >= window { + entry.window_started = now; + entry.count = 0; + } + + if entry.count >= self.setting.max_requests { + let elapsed = now.duration_since(entry.window_started); + let retry_after = window.saturating_sub(elapsed).as_secs().max(1); + return Err(retry_after); + } + + entry.count += 1; + Ok(()) + } +} + +pub async fn rate_limit_layer( + State(state): State, + req: Request, + next: Next, +) -> Response { + let key = rate_limit_key(&req, state.num_trusted_proxies); + let limiter = state.select_limiter(&req); + match limiter.check(&key) { + Ok(()) => next.run(req).await, + Err(retry_after) => { + let resource = req.uri().path().to_string(); + too_many_requests(retry_after, &resource) + } + } +} + +fn too_many_requests(retry_after: u64, resource: &str) -> Response { + let request_id = uuid::Uuid::new_v4().simple().to_string(); + let body = myfsio_xml::response::rate_limit_exceeded_xml(resource, &request_id); + let mut response = ( + StatusCode::SERVICE_UNAVAILABLE, + [ + (header::CONTENT_TYPE, "application/xml".to_string()), + (header::RETRY_AFTER, retry_after.to_string()), + ], + body, + ) + .into_response(); + if let Ok(value) = request_id.parse() { + response + .headers_mut() + .insert("x-amz-request-id", value); + } + response +} + +fn rate_limit_key(req: &Request, num_trusted_proxies: usize) -> String { + format!("ip:{}", client_ip(req, num_trusted_proxies)) +} + +fn client_ip(req: &Request, num_trusted_proxies: usize) -> String { + if num_trusted_proxies > 0 { + if let Some(value) = req + .headers() + .get("x-forwarded-for") + .and_then(|v| v.to_str().ok()) + { + let parts = value + .split(',') + .map(|part| part.trim()) + .filter(|part| !part.is_empty()) + .collect::>(); + if parts.len() > num_trusted_proxies { + let index = parts.len() - num_trusted_proxies - 1; + return parts[index].to_string(); + } + } + + if let Some(value) = req.headers().get("x-real-ip").and_then(|v| v.to_str().ok()) { + if !value.trim().is_empty() { + return value.trim().to_string(); + } + } + } + + req.extensions() + .get::>() + .map(|ConnectInfo(addr)| addr.ip().to_string()) + .unwrap_or_else(|| "unknown".to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::body::Body; + + #[test] + fn honors_trusted_proxy_count_for_forwarded_for() { + let req = Request::builder() + .header("x-forwarded-for", "198.51.100.1, 10.0.0.1, 10.0.0.2") + .body(Body::empty()) + .unwrap(); + assert_eq!(rate_limit_key(&req, 2), "ip:198.51.100.1"); + assert_eq!(rate_limit_key(&req, 1), "ip:10.0.0.1"); + } + + #[test] + fn falls_back_to_connect_info_when_forwarded_for_has_too_few_hops() { + let mut req = Request::builder() + .header("x-forwarded-for", "198.51.100.1") + .body(Body::empty()) + .unwrap(); + req.extensions_mut() + .insert(ConnectInfo(SocketAddr::from(([203, 0, 113, 9], 443)))); + + assert_eq!(rate_limit_key(&req, 2), "ip:203.0.113.9"); + } + + #[test] + fn ignores_forwarded_headers_when_no_proxies_are_trusted() { + let mut req = Request::builder() + .header("x-forwarded-for", "198.51.100.1") + .header("x-real-ip", "198.51.100.2") + .body(Body::empty()) + .unwrap(); + req.extensions_mut() + .insert(ConnectInfo(SocketAddr::from(([203, 0, 113, 9], 443)))); + + assert_eq!(rate_limit_key(&req, 0), "ip:203.0.113.9"); + } + + #[test] + fn uses_connect_info_for_direct_clients() { + let mut req = Request::builder().body(Body::empty()).unwrap(); + req.extensions_mut() + .insert(ConnectInfo(SocketAddr::from(([203, 0, 113, 10], 443)))); + + assert_eq!(rate_limit_key(&req, 0), "ip:203.0.113.10"); + } + + #[test] + fn fixed_window_rejects_after_quota() { + let limiter = FixedWindowLimiter::new(RateLimitSetting::new(2, 60)); + assert!(limiter.check("k").is_ok()); + assert!(limiter.check("k").is_ok()); + assert!(limiter.check("k").is_err()); + } + + #[test] + fn sweep_removes_expired_entries() { + let limiter = FixedWindowLimiter::new(RateLimitSetting::new(10, 1)); + let far_past = Instant::now() - (SWEEP_MIN_INTERVAL + Duration::from_secs(5)); + { + let mut state = limiter.state.lock(); + for i in 0..(SWEEP_ENTRY_THRESHOLD + 1024) { + state.entries.insert( + format!("stale-{}", i), + LimitEntry { + window_started: far_past, + count: 5, + }, + ); + } + state.last_sweep = far_past; + } + let seeded = limiter.state.lock().entries.len(); + assert_eq!(seeded, SWEEP_ENTRY_THRESHOLD + 1024); + + assert!(limiter.check("fresh").is_ok()); + + let remaining = limiter.state.lock().entries.len(); + assert_eq!( + remaining, 1, + "expected sweep to leave only the fresh entry, got {}", + remaining + ); + } +} diff --git a/crates/myfsio-server/src/middleware/session.rs b/crates/myfsio-server/src/middleware/session.rs new file mode 100644 index 0000000..3b36417 --- /dev/null +++ b/crates/myfsio-server/src/middleware/session.rs @@ -0,0 +1,267 @@ +use std::sync::Arc; + +use axum::extract::{Request, State}; +use axum::http::{header, HeaderValue, StatusCode}; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; +use cookie::{Cookie, SameSite}; +use parking_lot::Mutex; + +use crate::session::{ + csrf_tokens_match, SessionData, SessionStore, CSRF_FIELD_NAME, CSRF_HEADER_NAME, + SESSION_COOKIE_NAME, +}; + +#[derive(Clone)] +pub struct SessionLayerState { + pub store: Arc, + pub secure: bool, +} + +#[derive(Clone)] +pub struct SessionHandle { + pub id: String, + inner: Arc>, + dirty: Arc>, +} + +impl SessionHandle { + pub fn new(id: String, data: SessionData) -> Self { + Self { + id, + inner: Arc::new(Mutex::new(data)), + dirty: Arc::new(Mutex::new(false)), + } + } + + pub fn read(&self, f: impl FnOnce(&SessionData) -> R) -> R { + let guard = self.inner.lock(); + f(&guard) + } + + pub fn write(&self, f: impl FnOnce(&mut SessionData) -> R) -> R { + let mut guard = self.inner.lock(); + let out = f(&mut guard); + *self.dirty.lock() = true; + out + } + + pub fn snapshot(&self) -> SessionData { + self.inner.lock().clone() + } + + pub fn is_dirty(&self) -> bool { + *self.dirty.lock() + } +} + +pub async fn session_layer( + State(state): State, + mut req: Request, + next: Next, +) -> Response { + let cookie_id = extract_session_cookie(&req); + + let (session_id, session_data, is_new) = + match cookie_id.and_then(|id| state.store.get(&id).map(|data| (id.clone(), data))) { + Some((id, data)) => (id, data, false), + None => { + let (id, data) = state.store.create(); + (id, data, true) + } + }; + + let handle = SessionHandle::new(session_id.clone(), session_data); + req.extensions_mut().insert(handle.clone()); + + let mut resp = next.run(req).await; + + if handle.is_dirty() { + state.store.save(&handle.id, handle.snapshot()); + } + + if is_new { + let cookie = build_session_cookie(&session_id, state.secure); + if let Ok(value) = HeaderValue::from_str(&cookie.to_string()) { + resp.headers_mut().append(header::SET_COOKIE, value); + } + } + + resp +} + +pub async fn csrf_layer( + State(state): State, + req: Request, + next: Next, +) -> Response { + const CSRF_HEADER_ALIAS: &str = "x-csrftoken"; + + let method = req.method().clone(); + let needs_check = matches!( + method, + axum::http::Method::POST + | axum::http::Method::PUT + | axum::http::Method::PATCH + | axum::http::Method::DELETE + ); + + if !needs_check { + return next.run(req).await; + } + + let is_ui = req.uri().path().starts_with("/ui/") + || req.uri().path() == "/ui" + || req.uri().path() == "/login" + || req.uri().path() == "/logout"; + if !is_ui { + return next.run(req).await; + } + + let handle = match req.extensions().get::() { + Some(h) => h.clone(), + None => return (StatusCode::FORBIDDEN, "Missing session").into_response(), + }; + + let expected = handle.read(|s| s.csrf_token.clone()); + + let header_token = req + .headers() + .get(CSRF_HEADER_NAME) + .or_else(|| req.headers().get(CSRF_HEADER_ALIAS)) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + + if let Some(token) = header_token.as_deref() { + if csrf_tokens_match(&expected, token) { + return next.run(req).await; + } + } + + let content_type = req + .headers() + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + + let (parts, body) = req.into_parts(); + let bytes = match axum::body::to_bytes(body, usize::MAX).await { + Ok(b) => b, + Err(_) => return (StatusCode::BAD_REQUEST, "Body read failed").into_response(), + }; + + let form_token = if content_type.starts_with("application/x-www-form-urlencoded") { + extract_form_token(&bytes) + } else if content_type.starts_with("multipart/form-data") { + extract_multipart_token(&content_type, &bytes) + } else if content_type.starts_with("application/json") { + extract_json_token(&bytes) + } else { + None + }; + + if let Some(token) = form_token { + if csrf_tokens_match(&expected, &token) { + let req = Request::from_parts(parts, axum::body::Body::from(bytes)); + return next.run(req).await; + } + } + + tracing::warn!( + path = %parts.uri.path(), + content_type = %content_type, + expected_len = expected.len(), + header_present = header_token.is_some(), + "CSRF token mismatch" + ); + + let accept = parts + .headers + .get(header::ACCEPT) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let is_form_submit = content_type.starts_with("application/x-www-form-urlencoded") + || content_type.starts_with("multipart/form-data"); + let wants_json = + accept.contains("application/json") || content_type.starts_with("application/json"); + + if is_form_submit && !wants_json { + let ctx = crate::handlers::ui::base_context(&handle, None); + let mut resp = crate::handlers::ui::render(&state, "csrf_error.html", &ctx); + *resp.status_mut() = StatusCode::FORBIDDEN; + return resp; + } + + let mut resp = ( + StatusCode::FORBIDDEN, + [(header::CONTENT_TYPE, "application/json")], + r#"{"error":"Invalid CSRF token. Send it via the X-CSRF-Token header or a csrf_token field in the form/JSON body."}"#, + ) + .into_response(); + *resp.status_mut() = StatusCode::FORBIDDEN; + resp +} + +fn extract_multipart_token(content_type: &str, body: &[u8]) -> Option { + let boundary = multer::parse_boundary(content_type).ok()?; + let prefix = format!("--{}", boundary); + let text = std::str::from_utf8(body).ok()?; + let needle = "name=\"csrf_token\""; + let idx = text.find(needle)?; + let after = &text[idx + needle.len()..]; + let body_start = after.find("\r\n\r\n")? + 4; + let tail = &after[body_start..]; + let end = tail + .find(&format!("\r\n--{}", prefix.trim_start_matches("--"))) + .or_else(|| tail.find("\r\n--")) + .unwrap_or(tail.len()); + Some(tail[..end].trim().to_string()) +} + +fn extract_session_cookie(req: &Request) -> Option { + let raw = req.headers().get(header::COOKIE)?.to_str().ok()?; + for pair in raw.split(';') { + if let Ok(cookie) = Cookie::parse(pair.trim().to_string()) { + if cookie.name() == SESSION_COOKIE_NAME { + return Some(cookie.value().to_string()); + } + } + } + None +} + +fn build_session_cookie(id: &str, secure: bool) -> Cookie<'static> { + let mut cookie = Cookie::new(SESSION_COOKIE_NAME, id.to_string()); + cookie.set_http_only(true); + cookie.set_same_site(SameSite::Lax); + cookie.set_secure(secure); + cookie.set_path("/"); + cookie +} + +fn extract_json_token(body: &[u8]) -> Option { + let value: serde_json::Value = serde_json::from_slice(body).ok()?; + value + .get(CSRF_FIELD_NAME) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) +} + +fn extract_form_token(body: &[u8]) -> Option { + let text = std::str::from_utf8(body).ok()?; + let prefix = format!("{}=", CSRF_FIELD_NAME); + for pair in text.split('&') { + if let Some(rest) = pair.strip_prefix(&prefix) { + return urldecode(rest); + } + } + None +} + +fn urldecode(s: &str) -> Option { + percent_encoding::percent_decode_str(&s.replace('+', " ")) + .decode_utf8() + .ok() + .map(|c| c.into_owned()) +} diff --git a/crates/myfsio-server/src/middleware/sha_body.rs b/crates/myfsio-server/src/middleware/sha_body.rs new file mode 100644 index 0000000..cd2a97f --- /dev/null +++ b/crates/myfsio-server/src/middleware/sha_body.rs @@ -0,0 +1,107 @@ +use axum::body::Body; +use bytes::Bytes; +use http_body::{Body as HttpBody, Frame}; +use sha2::{Digest, Sha256}; +use std::error::Error; +use std::fmt; +use std::pin::Pin; +use std::task::{Context, Poll}; + +#[derive(Debug)] +struct Sha256MismatchError { + expected: String, + computed: String, +} + +impl Sha256MismatchError { + fn message(&self) -> String { + format!( + "The x-amz-content-sha256 you specified did not match what we received (expected {}, computed {})", + self.expected, self.computed + ) + } +} + +impl fmt::Display for Sha256MismatchError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "XAmzContentSHA256Mismatch: expected {}, computed {}", + self.expected, self.computed + ) + } +} + +impl Error for Sha256MismatchError {} + +pub struct Sha256VerifyBody { + inner: Body, + expected: String, + hasher: Option, +} + +impl Sha256VerifyBody { + pub fn new(inner: Body, expected_hex: String) -> Self { + Self { + inner, + expected: expected_hex.to_ascii_lowercase(), + hasher: Some(Sha256::new()), + } + } +} + +impl HttpBody for Sha256VerifyBody { + type Data = Bytes; + type Error = Box; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll, Self::Error>>> { + let this = self.as_mut().get_mut(); + match Pin::new(&mut this.inner).poll_frame(cx) { + Poll::Pending => Poll::Pending, + Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(Box::new(e)))), + Poll::Ready(Some(Ok(frame))) => { + if let Some(data) = frame.data_ref() { + if let Some(h) = this.hasher.as_mut() { + h.update(data); + } + } + Poll::Ready(Some(Ok(frame))) + } + Poll::Ready(None) => { + if let Some(hasher) = this.hasher.take() { + let computed = hex::encode(hasher.finalize()); + if computed != this.expected { + return Poll::Ready(Some(Err(Box::new(Sha256MismatchError { + expected: this.expected.clone(), + computed, + })))); + } + } + Poll::Ready(None) + } + } + } + + fn is_end_stream(&self) -> bool { + self.inner.is_end_stream() + } + + fn size_hint(&self) -> http_body::SizeHint { + self.inner.size_hint() + } +} + +pub fn is_hex_sha256(s: &str) -> bool { + s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit()) +} + +pub fn sha256_mismatch_message(err: &(dyn Error + 'static)) -> Option { + if let Some(mismatch) = err.downcast_ref::() { + return Some(mismatch.message()); + } + + err.source().and_then(sha256_mismatch_message) +} diff --git a/crates/myfsio-server/src/services/access_logging.rs b/crates/myfsio-server/src/services/access_logging.rs new file mode 100644 index 0000000..0bc03bc --- /dev/null +++ b/crates/myfsio-server/src/services/access_logging.rs @@ -0,0 +1,105 @@ +use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LoggingConfiguration { + pub target_bucket: String, + #[serde(default)] + pub target_prefix: String, + #[serde(default = "default_enabled")] + pub enabled: bool, +} + +fn default_enabled() -> bool { + true +} + +#[derive(Serialize, Deserialize)] +struct StoredLoggingFile { + #[serde(rename = "LoggingEnabled")] + logging_enabled: Option, +} + +#[derive(Serialize, Deserialize)] +struct StoredLoggingEnabled { + #[serde(rename = "TargetBucket")] + target_bucket: String, + #[serde(rename = "TargetPrefix", default)] + target_prefix: String, +} + +pub struct AccessLoggingService { + storage_root: PathBuf, + cache: RwLock>>, +} + +impl AccessLoggingService { + pub fn new(storage_root: &Path) -> Self { + Self { + storage_root: storage_root.to_path_buf(), + cache: RwLock::new(HashMap::new()), + } + } + + fn config_path(&self, bucket: &str) -> PathBuf { + self.storage_root + .join(".myfsio.sys") + .join("buckets") + .join(bucket) + .join("logging.json") + } + + pub fn get(&self, bucket: &str) -> Option { + if let Some(cached) = self.cache.read().get(bucket).cloned() { + return cached; + } + + let path = self.config_path(bucket); + let config = if path.exists() { + std::fs::read_to_string(&path) + .ok() + .and_then(|s| serde_json::from_str::(&s).ok()) + .and_then(|f| f.logging_enabled) + .map(|e| LoggingConfiguration { + target_bucket: e.target_bucket, + target_prefix: e.target_prefix, + enabled: true, + }) + } else { + None + }; + + self.cache + .write() + .insert(bucket.to_string(), config.clone()); + config + } + + pub fn set(&self, bucket: &str, config: LoggingConfiguration) -> std::io::Result<()> { + let path = self.config_path(bucket); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let stored = StoredLoggingFile { + logging_enabled: Some(StoredLoggingEnabled { + target_bucket: config.target_bucket.clone(), + target_prefix: config.target_prefix.clone(), + }), + }; + let json = serde_json::to_string_pretty(&stored) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + std::fs::write(&path, json)?; + self.cache.write().insert(bucket.to_string(), Some(config)); + Ok(()) + } + + pub fn delete(&self, bucket: &str) { + let path = self.config_path(bucket); + if path.exists() { + let _ = std::fs::remove_file(&path); + } + self.cache.write().insert(bucket.to_string(), None); + } +} diff --git a/crates/myfsio-server/src/services/acl.rs b/crates/myfsio-server/src/services/acl.rs new file mode 100644 index 0000000..99d54ce --- /dev/null +++ b/crates/myfsio-server/src/services/acl.rs @@ -0,0 +1,276 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::{HashMap, HashSet}; + +pub const ACL_METADATA_KEY: &str = "__acl__"; +pub const GRANTEE_ALL_USERS: &str = "*"; +pub const GRANTEE_AUTHENTICATED_USERS: &str = "authenticated"; + +const ACL_PERMISSION_FULL_CONTROL: &str = "FULL_CONTROL"; +const ACL_PERMISSION_WRITE: &str = "WRITE"; +const ACL_PERMISSION_WRITE_ACP: &str = "WRITE_ACP"; +const ACL_PERMISSION_READ: &str = "READ"; +const ACL_PERMISSION_READ_ACP: &str = "READ_ACP"; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AclGrant { + pub grantee: String, + pub permission: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct Acl { + pub owner: String, + #[serde(default)] + pub grants: Vec, +} + +impl Acl { + pub fn allowed_actions( + &self, + principal_id: Option<&str>, + is_authenticated: bool, + ) -> HashSet<&'static str> { + let mut actions = HashSet::new(); + if let Some(principal_id) = principal_id { + if principal_id == self.owner { + actions.extend(permission_to_actions(ACL_PERMISSION_FULL_CONTROL)); + } + } + for grant in &self.grants { + if grant.grantee == GRANTEE_ALL_USERS { + actions.extend(permission_to_actions(&grant.permission)); + } else if grant.grantee == GRANTEE_AUTHENTICATED_USERS && is_authenticated { + actions.extend(permission_to_actions(&grant.permission)); + } else if let Some(principal_id) = principal_id { + if grant.grantee == principal_id { + actions.extend(permission_to_actions(&grant.permission)); + } + } + } + actions + } +} + +pub fn create_canned_acl(canned_acl: &str, owner: &str) -> Acl { + let owner_grant = AclGrant { + grantee: owner.to_string(), + permission: ACL_PERMISSION_FULL_CONTROL.to_string(), + }; + match canned_acl { + "public-read" => Acl { + owner: owner.to_string(), + grants: vec![ + owner_grant, + AclGrant { + grantee: GRANTEE_ALL_USERS.to_string(), + permission: ACL_PERMISSION_READ.to_string(), + }, + ], + }, + "public-read-write" => Acl { + owner: owner.to_string(), + grants: vec![ + owner_grant, + AclGrant { + grantee: GRANTEE_ALL_USERS.to_string(), + permission: ACL_PERMISSION_READ.to_string(), + }, + AclGrant { + grantee: GRANTEE_ALL_USERS.to_string(), + permission: ACL_PERMISSION_WRITE.to_string(), + }, + ], + }, + "authenticated-read" => Acl { + owner: owner.to_string(), + grants: vec![ + owner_grant, + AclGrant { + grantee: GRANTEE_AUTHENTICATED_USERS.to_string(), + permission: ACL_PERMISSION_READ.to_string(), + }, + ], + }, + "bucket-owner-read" | "bucket-owner-full-control" | "private" | _ => Acl { + owner: owner.to_string(), + grants: vec![owner_grant], + }, + } +} + +pub fn acl_to_xml(acl: &Acl) -> String { + let mut xml = format!( + "\ + \ + {}{}\ + ", + xml_escape(&acl.owner), + xml_escape(&acl.owner), + ); + for grant in &acl.grants { + xml.push_str(""); + match grant.grantee.as_str() { + GRANTEE_ALL_USERS => { + xml.push_str( + "\ + http://acs.amazonaws.com/groups/global/AllUsers\ + ", + ); + } + GRANTEE_AUTHENTICATED_USERS => { + xml.push_str( + "\ + http://acs.amazonaws.com/groups/global/AuthenticatedUsers\ + ", + ); + } + other => { + xml.push_str(&format!( + "\ + {}{}\ + ", + xml_escape(other), + xml_escape(other), + )); + } + } + xml.push_str(&format!( + "{}", + xml_escape(&grant.permission) + )); + } + xml.push_str(""); + xml +} + +pub fn acl_from_bucket_config(value: &Value) -> Option { + match value { + Value::String(raw) => acl_from_xml(raw).or_else(|| serde_json::from_str(raw).ok()), + Value::Object(_) => serde_json::from_value(value.clone()).ok(), + _ => None, + } +} + +pub fn acl_from_object_metadata(metadata: &HashMap) -> Option { + metadata + .get(ACL_METADATA_KEY) + .and_then(|raw| serde_json::from_str::(raw).ok()) +} + +pub fn store_object_acl(metadata: &mut HashMap, acl: &Acl) { + if let Ok(serialized) = serde_json::to_string(acl) { + metadata.insert(ACL_METADATA_KEY.to_string(), serialized); + } +} + +fn acl_from_xml(xml: &str) -> Option { + let doc = roxmltree::Document::parse(xml).ok()?; + let owner = doc + .descendants() + .find(|node| node.is_element() && node.tag_name().name() == "Owner") + .and_then(|node| { + node.children() + .find(|child| child.is_element() && child.tag_name().name() == "ID") + .and_then(|child| child.text()) + }) + .unwrap_or("myfsio") + .trim() + .to_string(); + + let mut grants = Vec::new(); + for grant in doc + .descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "Grant") + { + let permission = grant + .children() + .find(|child| child.is_element() && child.tag_name().name() == "Permission") + .and_then(|child| child.text()) + .unwrap_or_default() + .trim() + .to_string(); + if permission.is_empty() { + continue; + } + let grantee_node = grant + .children() + .find(|child| child.is_element() && child.tag_name().name() == "Grantee"); + let grantee = grantee_node + .and_then(|node| { + let uri = node + .children() + .find(|child| child.is_element() && child.tag_name().name() == "URI") + .and_then(|child| child.text()) + .map(|text| text.trim().to_string()); + match uri.as_deref() { + Some("http://acs.amazonaws.com/groups/global/AllUsers") => { + Some(GRANTEE_ALL_USERS.to_string()) + } + Some("http://acs.amazonaws.com/groups/global/AuthenticatedUsers") => { + Some(GRANTEE_AUTHENTICATED_USERS.to_string()) + } + _ => node + .children() + .find(|child| child.is_element() && child.tag_name().name() == "ID") + .and_then(|child| child.text()) + .map(|text| text.trim().to_string()), + } + }) + .unwrap_or_default(); + if grantee.is_empty() { + continue; + } + grants.push(AclGrant { + grantee, + permission, + }); + } + + Some(Acl { owner, grants }) +} + +fn permission_to_actions(permission: &str) -> &'static [&'static str] { + match permission { + ACL_PERMISSION_FULL_CONTROL => &["read", "write", "delete", "list", "share"], + ACL_PERMISSION_WRITE => &["write", "delete"], + ACL_PERMISSION_WRITE_ACP => &["share"], + ACL_PERMISSION_READ => &["read", "list"], + ACL_PERMISSION_READ_ACP => &["share"], + _ => &[], + } +} + +fn xml_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn canned_acl_grants_public_read() { + let acl = create_canned_acl("public-read", "owner"); + let actions = acl.allowed_actions(None, false); + assert!(actions.contains("read")); + assert!(actions.contains("list")); + assert!(!actions.contains("write")); + } + + #[test] + fn xml_round_trip_preserves_grants() { + let acl = create_canned_acl("authenticated-read", "owner"); + let parsed = acl_from_bucket_config(&Value::String(acl_to_xml(&acl))).unwrap(); + assert_eq!(parsed.owner, "owner"); + assert_eq!(parsed.grants.len(), 2); + assert!(parsed + .grants + .iter() + .any(|grant| grant.grantee == GRANTEE_AUTHENTICATED_USERS)); + } +} diff --git a/crates/myfsio-server/src/services/gc.rs b/crates/myfsio-server/src/services/gc.rs new file mode 100644 index 0000000..a72fabc --- /dev/null +++ b/crates/myfsio-server/src/services/gc.rs @@ -0,0 +1,395 @@ +use serde_json::{json, Value}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Instant; +use tokio::sync::RwLock; + +pub struct GcConfig { + pub interval_hours: f64, + pub temp_file_max_age_hours: f64, + pub multipart_max_age_days: u64, + pub lock_file_max_age_hours: f64, + pub quarantine_max_age_days: u64, + pub dry_run: bool, +} + +impl Default for GcConfig { + fn default() -> Self { + Self { + interval_hours: 6.0, + temp_file_max_age_hours: 24.0, + multipart_max_age_days: 7, + lock_file_max_age_hours: 1.0, + quarantine_max_age_days: 7, + dry_run: false, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn dry_run_reports_but_does_not_delete_temp_files() { + let tmp = tempfile::tempdir().unwrap(); + let tmp_dir = tmp.path().join(".myfsio.sys").join("tmp"); + std::fs::create_dir_all(&tmp_dir).unwrap(); + let file_path = tmp_dir.join("stale.tmp"); + std::fs::write(&file_path, b"temporary").unwrap(); + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + + let service = GcService::new( + tmp.path().to_path_buf(), + GcConfig { + temp_file_max_age_hours: 0.0, + dry_run: true, + ..GcConfig::default() + }, + ); + + let result = service.run_now(false).await.unwrap(); + + assert_eq!(result["temp_files_deleted"], 1); + assert!(file_path.exists()); + } +} + +pub struct GcService { + storage_root: PathBuf, + config: GcConfig, + running: Arc>, + started_at: Arc>>, + history: Arc>>, + history_path: PathBuf, +} + +impl GcService { + pub fn new(storage_root: PathBuf, config: GcConfig) -> Self { + let history_path = storage_root + .join(".myfsio.sys") + .join("config") + .join("gc_history.json"); + + let history = if history_path.exists() { + std::fs::read_to_string(&history_path) + .ok() + .and_then(|s| serde_json::from_str::(&s).ok()) + .and_then(|v| v.get("executions").and_then(|e| e.as_array().cloned())) + .unwrap_or_default() + } else { + Vec::new() + }; + + Self { + storage_root, + config, + running: Arc::new(RwLock::new(false)), + started_at: Arc::new(RwLock::new(None)), + history: Arc::new(RwLock::new(history)), + history_path, + } + } + + pub async fn status(&self) -> Value { + let running = *self.running.read().await; + let scan_elapsed_seconds = self + .started_at + .read() + .await + .as_ref() + .map(|started| started.elapsed().as_secs_f64()); + json!({ + "enabled": true, + "running": running, + "scanning": running, + "scan_elapsed_seconds": scan_elapsed_seconds, + "interval_hours": self.config.interval_hours, + "temp_file_max_age_hours": self.config.temp_file_max_age_hours, + "multipart_max_age_days": self.config.multipart_max_age_days, + "lock_file_max_age_hours": self.config.lock_file_max_age_hours, + "quarantine_max_age_days": self.config.quarantine_max_age_days, + "dry_run": self.config.dry_run, + }) + } + + pub async fn history(&self) -> Value { + let history = self.history.read().await; + let mut executions: Vec = history.iter().cloned().collect(); + executions.reverse(); + json!({ "executions": executions }) + } + + pub async fn run_now(&self, dry_run: bool) -> Result { + { + let mut running = self.running.write().await; + if *running { + return Err("GC already running".to_string()); + } + *running = true; + } + *self.started_at.write().await = Some(Instant::now()); + + let start = Instant::now(); + let result = self.execute_gc(dry_run || self.config.dry_run).await; + let elapsed = start.elapsed().as_secs_f64(); + + *self.running.write().await = false; + *self.started_at.write().await = None; + + let mut result_json = result.clone(); + if let Some(obj) = result_json.as_object_mut() { + obj.insert("execution_time_seconds".to_string(), json!(elapsed)); + } + + let record = json!({ + "timestamp": chrono::Utc::now().timestamp_millis() as f64 / 1000.0, + "dry_run": dry_run || self.config.dry_run, + "result": result_json, + }); + + { + let mut history = self.history.write().await; + history.push(record); + if history.len() > 50 { + let excess = history.len() - 50; + history.drain(..excess); + } + } + self.save_history().await; + + Ok(result) + } + + async fn execute_gc(&self, dry_run: bool) -> Value { + let mut temp_files_deleted = 0u64; + let mut temp_bytes_freed = 0u64; + let mut multipart_uploads_deleted = 0u64; + let mut lock_files_deleted = 0u64; + let mut empty_dirs_removed = 0u64; + let mut quarantine_entries_deleted = 0u64; + let mut quarantine_bytes_freed = 0u64; + let mut errors: Vec = Vec::new(); + + let now = std::time::SystemTime::now(); + let temp_max_age = + std::time::Duration::from_secs_f64(self.config.temp_file_max_age_hours * 3600.0); + let multipart_max_age = + std::time::Duration::from_secs(self.config.multipart_max_age_days * 86400); + let lock_max_age = + std::time::Duration::from_secs_f64(self.config.lock_file_max_age_hours * 3600.0); + let quarantine_max_age = + std::time::Duration::from_secs(self.config.quarantine_max_age_days * 86400); + + let tmp_dir = self.storage_root.join(".myfsio.sys").join("tmp"); + if tmp_dir.exists() { + match std::fs::read_dir(&tmp_dir) { + Ok(entries) => { + for entry in entries.flatten() { + if let Ok(metadata) = entry.metadata() { + if let Ok(modified) = metadata.modified() { + if let Ok(age) = now.duration_since(modified) { + if age > temp_max_age { + let size = metadata.len(); + if !dry_run { + if let Err(e) = std::fs::remove_file(entry.path()) { + errors.push(format!( + "Failed to remove temp file: {}", + e + )); + continue; + } + } + temp_files_deleted += 1; + temp_bytes_freed += size; + } + } + } + } + } + } + Err(e) => errors.push(format!("Failed to read tmp dir: {}", e)), + } + } + + let multipart_dir = self.storage_root.join(".myfsio.sys").join("multipart"); + if multipart_dir.exists() { + if let Ok(bucket_dirs) = std::fs::read_dir(&multipart_dir) { + for bucket_entry in bucket_dirs.flatten() { + if let Ok(uploads) = std::fs::read_dir(bucket_entry.path()) { + for upload in uploads.flatten() { + if let Ok(metadata) = upload.metadata() { + if let Ok(modified) = metadata.modified() { + if let Ok(age) = now.duration_since(modified) { + if age > multipart_max_age { + if !dry_run { + let _ = std::fs::remove_dir_all(upload.path()); + } + multipart_uploads_deleted += 1; + } + } + } + } + } + } + } + } + } + + let buckets_dir = self.storage_root.join(".myfsio.sys").join("buckets"); + if buckets_dir.exists() { + if let Ok(bucket_dirs) = std::fs::read_dir(&buckets_dir) { + for bucket_entry in bucket_dirs.flatten() { + let locks_dir = bucket_entry.path().join("locks"); + if locks_dir.exists() { + if let Ok(locks) = std::fs::read_dir(&locks_dir) { + for lock in locks.flatten() { + if let Ok(metadata) = lock.metadata() { + if let Ok(modified) = metadata.modified() { + if let Ok(age) = now.duration_since(modified) { + if age > lock_max_age { + if !dry_run { + let _ = std::fs::remove_file(lock.path()); + } + lock_files_deleted += 1; + } + } + } + } + } + } + } + } + } + } + + let quarantine_dir = self.storage_root.join(".myfsio.sys").join("quarantine"); + if quarantine_dir.exists() { + if let Ok(bucket_dirs) = std::fs::read_dir(&quarantine_dir) { + for bucket_entry in bucket_dirs.flatten() { + if !bucket_entry.path().is_dir() { + continue; + } + if let Ok(ts_dirs) = std::fs::read_dir(bucket_entry.path()) { + for ts_entry in ts_dirs.flatten() { + let ts_path = ts_entry.path(); + if !ts_path.is_dir() { + continue; + } + let modified = ts_entry + .metadata() + .ok() + .and_then(|m| m.modified().ok()); + let Some(modified) = modified else { + continue; + }; + let Ok(age) = now.duration_since(modified) else { + continue; + }; + if age <= quarantine_max_age { + continue; + } + let bytes = dir_total_bytes(&ts_path); + if !dry_run { + if let Err(e) = std::fs::remove_dir_all(&ts_path) { + errors.push(format!( + "Failed to remove quarantine {}: {}", + ts_path.display(), + e + )); + continue; + } + } + quarantine_entries_deleted += 1; + quarantine_bytes_freed += bytes; + } + } + if !dry_run { + if let Ok(mut remaining) = std::fs::read_dir(bucket_entry.path()) { + if remaining.next().is_none() { + let _ = std::fs::remove_dir(bucket_entry.path()); + } + } + } + } + } + } + + if !dry_run { + for dir in [&tmp_dir, &multipart_dir] { + if dir.exists() { + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + if entry.path().is_dir() { + if let Ok(mut contents) = std::fs::read_dir(entry.path()) { + if contents.next().is_none() { + let _ = std::fs::remove_dir(entry.path()); + empty_dirs_removed += 1; + } + } + } + } + } + } + } + } + + json!({ + "temp_files_deleted": temp_files_deleted, + "temp_bytes_freed": temp_bytes_freed, + "multipart_uploads_deleted": multipart_uploads_deleted, + "lock_files_deleted": lock_files_deleted, + "empty_dirs_removed": empty_dirs_removed, + "quarantine_entries_deleted": quarantine_entries_deleted, + "quarantine_bytes_freed": quarantine_bytes_freed, + "errors": errors, + }) + } + + async fn save_history(&self) { + let history = self.history.read().await; + let data = json!({ "executions": *history }); + if let Some(parent) = self.history_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let _ = std::fs::write( + &self.history_path, + serde_json::to_string_pretty(&data).unwrap_or_default(), + ); + } + + pub fn start_background(self: Arc) -> tokio::task::JoinHandle<()> { + let interval = std::time::Duration::from_secs_f64(self.config.interval_hours * 3600.0); + tokio::spawn(async move { + let mut timer = tokio::time::interval(interval); + timer.tick().await; + loop { + timer.tick().await; + tracing::info!("GC cycle starting"); + match self.run_now(false).await { + Ok(result) => tracing::info!("GC cycle complete: {:?}", result), + Err(e) => tracing::warn!("GC cycle failed: {}", e), + } + } + }) + } +} + +fn dir_total_bytes(path: &std::path::Path) -> u64 { + let mut total: u64 = 0; + let mut stack: Vec = vec![path.to_path_buf()]; + while let Some(dir) = stack.pop() { + let Ok(entries) = std::fs::read_dir(&dir) else { + continue; + }; + for entry in entries.flatten() { + let Ok(ft) = entry.file_type() else { continue }; + if ft.is_dir() { + stack.push(entry.path()); + } else if ft.is_file() { + total = total.saturating_add(entry.metadata().map(|m| m.len()).unwrap_or(0)); + } + } + } + total +} diff --git a/crates/myfsio-server/src/services/integrity.rs b/crates/myfsio-server/src/services/integrity.rs new file mode 100644 index 0000000..d81985e --- /dev/null +++ b/crates/myfsio-server/src/services/integrity.rs @@ -0,0 +1,1348 @@ +use myfsio_common::constants::{ + BUCKET_META_DIR, BUCKET_VERSIONS_DIR, INDEX_FILE, SYSTEM_BUCKETS_DIR, SYSTEM_ROOT, +}; +use myfsio_storage::fs_backend::{ + is_multipart_etag, metadata_is_corrupted, FsStorageBackend, META_KEY_CORRUPTED, + META_KEY_CORRUPTED_AT, META_KEY_CORRUPTION_DETAIL, META_KEY_QUARANTINE_PATH, +}; +use myfsio_storage::traits::StorageEngine; +use serde_json::{json, Map, Value}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Instant; +use tokio::sync::{RwLock, Semaphore}; + +use crate::services::peer_fetch::{HealOutcome, PeerFetcher}; + +const MAX_ISSUES: usize = 500; +const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"]; +const QUARANTINE_DIR: &str = "quarantine"; + +pub struct IntegrityConfig { + pub interval_hours: f64, + pub batch_size: usize, + pub auto_heal: bool, + pub dry_run: bool, + pub heal_concurrency: usize, + pub quarantine_retention_days: u64, +} + +impl Default for IntegrityConfig { + fn default() -> Self { + Self { + interval_hours: 24.0, + batch_size: 10_000, + auto_heal: false, + dry_run: false, + heal_concurrency: 4, + quarantine_retention_days: 7, + } + } +} + +pub struct IntegrityService { + storage: Arc, + storage_root: PathBuf, + config: IntegrityConfig, + peer_fetcher: Option>, + running: Arc>, + started_at: Arc>>, + history: Arc>>, + history_path: PathBuf, +} + +#[derive(Default, Clone)] +struct HealStats { + found: u64, + healed: u64, + poisoned: u64, + peer_mismatch: u64, + peer_unavailable: u64, + verify_failed: u64, + failed: u64, + skipped: u64, +} + +impl HealStats { + fn to_value(&self) -> Value { + json!({ + "found": self.found, + "healed": self.healed, + "poisoned": self.poisoned, + "peer_mismatch": self.peer_mismatch, + "peer_unavailable": self.peer_unavailable, + "verify_failed": self.verify_failed, + "failed": self.failed, + "skipped": self.skipped, + }) + } +} + +#[derive(Default)] +struct ScanState { + objects_scanned: u64, + buckets_scanned: u64, + corrupted_objects: u64, + orphaned_objects: u64, + phantom_metadata: u64, + stale_versions: u64, + etag_cache_inconsistencies: u64, + issues: Vec, + errors: Vec, +} + +impl ScanState { + fn batch_exhausted(&self, batch_size: usize) -> bool { + self.objects_scanned >= batch_size as u64 + } + + fn push_issue(&mut self, issue_type: &str, bucket: &str, key: &str, detail: String) { + if self.issues.len() < MAX_ISSUES { + self.issues.push(json!({ + "issue_type": issue_type, + "bucket": bucket, + "key": key, + "detail": detail, + })); + } + } +} + +impl IntegrityService { + pub fn new( + storage: Arc, + storage_root: &Path, + config: IntegrityConfig, + peer_fetcher: Option>, + ) -> Self { + let history_path = storage_root + .join(SYSTEM_ROOT) + .join("config") + .join("integrity_history.json"); + + let history = if history_path.exists() { + std::fs::read_to_string(&history_path) + .ok() + .and_then(|s| serde_json::from_str::(&s).ok()) + .and_then(|v| v.get("executions").and_then(|e| e.as_array().cloned())) + .unwrap_or_default() + } else { + Vec::new() + }; + + Self { + storage, + storage_root: storage_root.to_path_buf(), + config, + peer_fetcher, + running: Arc::new(RwLock::new(false)), + started_at: Arc::new(RwLock::new(None)), + history: Arc::new(RwLock::new(history)), + history_path, + } + } + + pub async fn status(&self) -> Value { + let running = *self.running.read().await; + let scan_elapsed_seconds = self + .started_at + .read() + .await + .as_ref() + .map(|started| started.elapsed().as_secs_f64()); + json!({ + "enabled": true, + "running": running, + "scanning": running, + "scan_elapsed_seconds": scan_elapsed_seconds, + "interval_hours": self.config.interval_hours, + "batch_size": self.config.batch_size, + "auto_heal": self.config.auto_heal, + "dry_run": self.config.dry_run, + "heal_concurrency": self.config.heal_concurrency, + "peer_heal_available": self.peer_fetcher.is_some(), + }) + } + + pub async fn history(&self) -> Value { + let history = self.history.read().await; + let mut executions: Vec = history.iter().cloned().collect(); + executions.reverse(); + json!({ "executions": executions }) + } + + pub async fn run_now(&self, dry_run: bool, auto_heal: bool) -> Result { + { + let mut running = self.running.write().await; + if *running { + return Err("Integrity check already running".to_string()); + } + *running = true; + } + *self.started_at.write().await = Some(Instant::now()); + + let start = Instant::now(); + let storage_root = self.storage_root.clone(); + let batch_size = self.config.batch_size; + let scan_state = + tokio::task::spawn_blocking(move || scan_all_buckets(&storage_root, batch_size)) + .await + .unwrap_or_else(|e| { + let mut st = ScanState::default(); + st.errors.push(format!("scan task failed: {}", e)); + st + }); + + let heal_stats = if auto_heal && !dry_run { + self.run_heal_phase(&scan_state).await + } else { + BTreeMap::new() + }; + + let elapsed = start.elapsed().as_secs_f64(); + + *self.running.write().await = false; + *self.started_at.write().await = None; + + let result_json = build_result_json(scan_state, heal_stats, elapsed); + + let record = json!({ + "timestamp": chrono::Utc::now().timestamp_millis() as f64 / 1000.0, + "dry_run": dry_run, + "auto_heal": auto_heal, + "result": result_json.clone(), + }); + + { + let mut history = self.history.write().await; + history.push(record); + if history.len() > 50 { + let excess = history.len() - 50; + history.drain(..excess); + } + } + self.save_history().await; + + Ok(result_json) + } + + async fn run_heal_phase(&self, scan: &ScanState) -> BTreeMap { + let mut stats: BTreeMap = BTreeMap::new(); + let issues: Vec = scan.issues.clone(); + let semaphore = Arc::new(Semaphore::new(self.config.heal_concurrency.max(1))); + let mut tasks: Vec> = Vec::new(); + + for issue in issues { + let issue_type = issue + .get("issue_type") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let bucket = issue + .get("bucket") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let key = issue + .get("key") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let detail = issue + .get("detail") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + stats.entry(issue_type.clone()).or_default().found += 1; + + let permit = match semaphore.clone().acquire_owned().await { + Ok(p) => p, + Err(_) => continue, + }; + let storage = self.storage.clone(); + let storage_root = self.storage_root.clone(); + let peer_fetcher = self.peer_fetcher.clone(); + + tasks.push(tokio::spawn(async move { + let _permit = permit; + heal_issue( + &storage, + &storage_root, + peer_fetcher.as_deref(), + &issue_type, + &bucket, + &key, + &detail, + ) + .await + })); + } + + for task in tasks { + if let Ok(report) = task.await { + let entry = stats.entry(report.issue_type).or_default(); + match report.status { + HealStatus::Healed => entry.healed += 1, + HealStatus::Poisoned => entry.poisoned += 1, + HealStatus::PeerMismatch => entry.peer_mismatch += 1, + HealStatus::PeerUnavailable => entry.peer_unavailable += 1, + HealStatus::VerifyFailed => entry.verify_failed += 1, + HealStatus::Failed => entry.failed += 1, + HealStatus::Skipped => entry.skipped += 1, + } + } + } + + stats + } + + async fn save_history(&self) { + let history = self.history.read().await; + let data = json!({ "executions": *history }); + if let Some(parent) = self.history_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let _ = std::fs::write( + &self.history_path, + serde_json::to_string_pretty(&data).unwrap_or_default(), + ); + } + + pub fn start_background(self: Arc) -> tokio::task::JoinHandle<()> { + let interval = std::time::Duration::from_secs_f64(self.config.interval_hours * 3600.0); + let auto_heal = self.config.auto_heal; + let dry_run = self.config.dry_run; + tokio::spawn(async move { + let mut timer = tokio::time::interval(interval); + timer.tick().await; + loop { + timer.tick().await; + tracing::info!("Integrity check starting"); + match self.run_now(dry_run, auto_heal).await { + Ok(result) => tracing::info!("Integrity check complete: {:?}", result), + Err(e) => tracing::warn!("Integrity check failed: {}", e), + } + } + }) + } +} + +#[derive(Debug)] +enum HealStatus { + Healed, + Poisoned, + PeerMismatch, + PeerUnavailable, + VerifyFailed, + Failed, + Skipped, +} + +struct HealReport { + issue_type: String, + status: HealStatus, +} + +async fn heal_issue( + storage: &FsStorageBackend, + storage_root: &Path, + peer_fetcher: Option<&PeerFetcher>, + issue_type: &str, + bucket: &str, + key: &str, + detail: &str, +) -> HealReport { + let status = match issue_type { + "corrupted_object" => { + heal_corrupted(storage, storage_root, peer_fetcher, bucket, key, detail).await + } + "stale_version" => heal_stale_version(storage_root, bucket, key).await, + "etag_cache_inconsistency" => heal_etag_cache(storage_root, bucket, key, detail).await, + "phantom_metadata" => heal_phantom_metadata(storage, bucket, key).await, + _ => HealStatus::Skipped, + }; + HealReport { + issue_type: issue_type.to_string(), + status, + } +} + +async fn heal_corrupted( + storage: &FsStorageBackend, + storage_root: &Path, + peer_fetcher: Option<&PeerFetcher>, + bucket: &str, + key: &str, + detail: &str, +) -> HealStatus { + let stored_etag = parse_stored_etag(detail); + let actual_etag = parse_actual_etag(detail); + + let live_path = storage_root.join(bucket).join(key); + let quarantine_rel = quarantine_relative_path(bucket, key); + let quarantine_full = storage_root.join(&quarantine_rel); + + if let Some(parent) = quarantine_full.parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + tracing::error!("Heal {}/{}: mkdir quarantine failed: {}", bucket, key, e); + return HealStatus::Failed; + } + } + + if live_path.exists() { + if let Err(e) = std::fs::rename(&live_path, &quarantine_full) { + tracing::error!( + "Heal {}/{}: quarantine rename failed: {}", + bucket, + key, + e + ); + return HealStatus::Failed; + } + } + + let quarantine_rel_str = quarantine_rel.to_string_lossy().replace('\\', "/"); + + if !stored_etag.is_empty() { + if let Some(fetcher) = peer_fetcher { + let nonce = uuid::Uuid::new_v4().simple().to_string(); + let temp_path = live_path.with_file_name(format!( + "{}.healing.{}", + live_path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "healing".to_string()), + nonce + )); + match fetcher + .fetch_for_heal(bucket, key, &stored_etag, &temp_path) + .await + { + HealOutcome::Healed { peer_etag, bytes } => { + if let Err(e) = atomic_swap(&temp_path, &live_path) { + tracing::error!( + "Heal {}/{}: atomic swap failed: {} (restoring from quarantine)", + bucket, + key, + e + ); + let _ = std::fs::rename(&quarantine_full, &live_path); + let _ = std::fs::remove_file(&temp_path); + return HealStatus::Failed; + } + let _ = clear_poison_metadata(storage, bucket, key).await; + tracing::info!( + "Healed {}/{} from peer (etag={}, bytes={})", + bucket, + key, + peer_etag, + bytes + ); + return HealStatus::Healed; + } + HealOutcome::PeerMismatch { stored, peer } => { + let msg = format!("peer etag {} != stored {}", peer, stored); + let _ = + poison_metadata(storage, bucket, key, &msg, &quarantine_rel_str).await; + tracing::warn!("Heal {}/{}: peer mismatch ({}), poisoned", bucket, key, msg); + return HealStatus::PeerMismatch; + } + HealOutcome::PeerUnavailable { error } => { + tracing::warn!( + "Heal {}/{}: peer unavailable ({}), poisoning", + bucket, + key, + error + ); + let msg = format!( + "etag mismatch (stored={}, actual={}) — peer unavailable: {}", + stored_etag, actual_etag, error + ); + let _ = + poison_metadata(storage, bucket, key, &msg, &quarantine_rel_str).await; + return HealStatus::PeerUnavailable; + } + HealOutcome::VerifyFailed { expected, actual } => { + let msg = format!("peer download verify failed: expected={} actual={}", expected, actual); + let _ = + poison_metadata(storage, bucket, key, &msg, &quarantine_rel_str).await; + tracing::warn!("Heal {}/{}: {}", bucket, key, msg); + return HealStatus::VerifyFailed; + } + HealOutcome::NotConfigured => { + let msg = format!( + "etag mismatch (stored={}, actual={}); no peer configured", + stored_etag, actual_etag + ); + let _ = + poison_metadata(storage, bucket, key, &msg, &quarantine_rel_str).await; + return HealStatus::Poisoned; + } + } + } + } + + let msg = format!( + "etag mismatch (stored={}, actual={}); no peer fetcher", + stored_etag, actual_etag + ); + let _ = poison_metadata(storage, bucket, key, &msg, &quarantine_rel_str).await; + HealStatus::Poisoned +} + +async fn heal_stale_version(storage_root: &Path, bucket: &str, key: &str) -> HealStatus { + let versions_root = storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_VERSIONS_DIR); + let src = versions_root.join(key); + if !src.exists() { + return HealStatus::Skipped; + } + let ts = chrono::Utc::now().format("%Y%m%dT%H%M%S").to_string(); + let dst = storage_root + .join(SYSTEM_ROOT) + .join(QUARANTINE_DIR) + .join(bucket) + .join(&ts) + .join("versions") + .join(key); + if let Some(parent) = dst.parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + tracing::error!("Stale-version quarantine mkdir failed {}/{}: {}", bucket, key, e); + return HealStatus::Failed; + } + } + if let Err(e) = std::fs::rename(&src, &dst) { + tracing::error!("Stale-version quarantine rename failed {}/{}: {}", bucket, key, e); + return HealStatus::Failed; + } + tracing::info!("Quarantined stale version {}/{}", bucket, key); + HealStatus::Healed +} + +async fn heal_etag_cache( + storage_root: &Path, + bucket: &str, + key: &str, + _detail: &str, +) -> HealStatus { + let etag_index_path = storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join("etag_index.json"); + if !etag_index_path.exists() { + return HealStatus::Skipped; + } + + let meta_root = storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_META_DIR); + let entries = collect_index_entries(&meta_root); + let canonical = entries.get(key).and_then(|info| stored_etag(&info.entry)); + + let mut cache: HashMap = match std::fs::read_to_string(&etag_index_path) + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + { + Some(Value::Object(m)) => m.into_iter().collect(), + _ => return HealStatus::Failed, + }; + + match canonical { + Some(etag) => { + cache.insert(key.to_string(), Value::String(etag)); + } + None => { + cache.remove(key); + } + } + + let json_obj: serde_json::Map = cache.into_iter().collect(); + match std::fs::write( + &etag_index_path, + serde_json::to_string_pretty(&Value::Object(json_obj)).unwrap_or_default(), + ) { + Ok(_) => HealStatus::Healed, + Err(e) => { + tracing::error!("etag-cache rewrite failed {}/{}: {}", bucket, key, e); + HealStatus::Failed + } + } +} + +async fn heal_phantom_metadata( + storage: &FsStorageBackend, + bucket: &str, + key: &str, +) -> HealStatus { + match storage.delete_object_metadata_entry(bucket, key).await { + Ok(_) => { + tracing::info!("Dropped phantom metadata for {}/{}", bucket, key); + HealStatus::Healed + } + Err(e) => { + tracing::error!("Failed to drop phantom metadata {}/{}: {}", bucket, key, e); + HealStatus::Failed + } + } +} + +async fn poison_metadata( + storage: &FsStorageBackend, + bucket: &str, + key: &str, + detail: &str, + quarantine_rel: &str, +) -> Result<(), String> { + let mut meta = storage + .get_object_metadata(bucket, key) + .await + .unwrap_or_default(); + meta.insert(META_KEY_CORRUPTED.to_string(), "true".to_string()); + meta.insert( + META_KEY_CORRUPTED_AT.to_string(), + chrono::Utc::now().to_rfc3339(), + ); + meta.insert(META_KEY_CORRUPTION_DETAIL.to_string(), detail.to_string()); + meta.insert( + META_KEY_QUARANTINE_PATH.to_string(), + quarantine_rel.to_string(), + ); + storage + .put_object_metadata(bucket, key, &meta) + .await + .map_err(|e| e.to_string()) +} + +async fn clear_poison_metadata( + storage: &FsStorageBackend, + bucket: &str, + key: &str, +) -> Result<(), String> { + let mut meta = storage + .get_object_metadata(bucket, key) + .await + .unwrap_or_default(); + meta.remove(META_KEY_CORRUPTED); + meta.remove(META_KEY_CORRUPTED_AT); + meta.remove(META_KEY_CORRUPTION_DETAIL); + meta.remove(META_KEY_QUARANTINE_PATH); + storage + .put_object_metadata(bucket, key, &meta) + .await + .map_err(|e| e.to_string()) +} + +fn quarantine_relative_path(bucket: &str, key: &str) -> PathBuf { + let ts = chrono::Utc::now().format("%Y%m%dT%H%M%S").to_string(); + PathBuf::from(SYSTEM_ROOT) + .join(QUARANTINE_DIR) + .join(bucket) + .join(ts) + .join(key) +} + +fn atomic_swap(src: &Path, dst: &Path) -> std::io::Result<()> { + if let Some(parent) = dst.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::rename(src, dst) +} + +fn parse_stored_etag(detail: &str) -> String { + detail + .split_whitespace() + .find_map(|s| s.strip_prefix("stored_etag=")) + .unwrap_or("") + .to_string() +} + +fn parse_actual_etag(detail: &str) -> String { + detail + .split_whitespace() + .find_map(|s| s.strip_prefix("actual_etag=")) + .unwrap_or("") + .to_string() +} + +fn build_result_json( + state: ScanState, + heal_stats: BTreeMap, + elapsed: f64, +) -> Value { + let issues_healed: u64 = heal_stats.values().map(|s| s.healed).sum(); + let heal_stats_json: serde_json::Map = heal_stats + .iter() + .map(|(k, v)| (k.clone(), v.to_value())) + .collect(); + + json!({ + "objects_scanned": state.objects_scanned, + "buckets_scanned": state.buckets_scanned, + "corrupted_objects": state.corrupted_objects, + "orphaned_objects": state.orphaned_objects, + "phantom_metadata": state.phantom_metadata, + "stale_versions": state.stale_versions, + "etag_cache_inconsistencies": state.etag_cache_inconsistencies, + "issues_healed": issues_healed, + "heal_stats": Value::Object(heal_stats_json), + "issues": state.issues, + "errors": state.errors, + "execution_time_seconds": elapsed, + }) +} + +fn scan_all_buckets(storage_root: &Path, batch_size: usize) -> ScanState { + let mut state = ScanState::default(); + let buckets = match list_bucket_names(storage_root) { + Ok(b) => b, + Err(e) => { + state.errors.push(format!("list buckets: {}", e)); + return state; + } + }; + + for bucket in &buckets { + if state.batch_exhausted(batch_size) { + break; + } + state.buckets_scanned += 1; + + let bucket_path = storage_root.join(bucket); + let meta_root = storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_META_DIR); + + let index_entries = collect_index_entries(&meta_root); + + check_corrupted(&mut state, bucket, &bucket_path, &index_entries, batch_size); + check_phantom(&mut state, bucket, &bucket_path, &index_entries, batch_size); + check_orphaned(&mut state, bucket, &bucket_path, &index_entries, batch_size); + check_stale_versions(&mut state, storage_root, bucket, batch_size); + check_etag_cache(&mut state, storage_root, bucket, &index_entries, batch_size); + } + + state +} + +fn list_bucket_names(storage_root: &Path) -> std::io::Result> { + let mut names = Vec::new(); + if !storage_root.exists() { + return Ok(names); + } + for entry in std::fs::read_dir(storage_root)? { + let entry = entry?; + let name = entry.file_name().to_string_lossy().to_string(); + if name == SYSTEM_ROOT { + continue; + } + if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) { + names.push(name); + } + } + Ok(names) +} + +#[allow(dead_code)] +struct IndexEntryInfo { + entry: Value, + index_file: PathBuf, + key_name: String, +} + +fn collect_index_entries(meta_root: &Path) -> HashMap { + let mut out: HashMap = HashMap::new(); + if !meta_root.exists() { + return out; + } + + let mut stack: Vec = vec![meta_root.to_path_buf()]; + while let Some(dir) = stack.pop() { + let rd = match std::fs::read_dir(&dir) { + Ok(r) => r, + Err(_) => continue, + }; + for entry in rd.flatten() { + let path = entry.path(); + let ft = match entry.file_type() { + Ok(t) => t, + Err(_) => continue, + }; + if ft.is_dir() { + stack.push(path); + continue; + } + if entry.file_name().to_string_lossy() != INDEX_FILE { + continue; + } + let rel_dir = match path.parent().and_then(|p| p.strip_prefix(meta_root).ok()) { + Some(p) => p.to_path_buf(), + None => continue, + }; + let dir_prefix = if rel_dir.as_os_str().is_empty() { + String::new() + } else { + rel_dir + .components() + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .collect::>() + .join("/") + }; + + let content = match std::fs::read_to_string(&path) { + Ok(c) => c, + Err(_) => continue, + }; + let index_data: Map = match serde_json::from_str(&content) { + Ok(Value::Object(m)) => m, + _ => continue, + }; + + for (key_name, entry_val) in index_data { + let full_key = if dir_prefix.is_empty() { + key_name.clone() + } else { + format!("{}/{}", dir_prefix, key_name) + }; + out.insert( + full_key, + IndexEntryInfo { + entry: entry_val, + index_file: path.clone(), + key_name, + }, + ); + } + } + } + out +} + +fn stored_etag(entry: &Value) -> Option { + entry + .get("metadata") + .and_then(|m| m.get("__etag__")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) +} + +fn entry_metadata_map(entry: &Value) -> HashMap { + entry + .get("metadata") + .and_then(|m| m.as_object()) + .map(|m| { + m.iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect() + }) + .unwrap_or_default() +} + +fn check_corrupted( + state: &mut ScanState, + bucket: &str, + bucket_path: &Path, + entries: &HashMap, + batch_size: usize, +) { + let mut keys: Vec<&String> = entries.keys().collect(); + keys.sort(); + + for full_key in keys { + if state.batch_exhausted(batch_size) { + return; + } + let info = &entries[full_key]; + let object_path = bucket_path.join(full_key); + if !object_path.exists() { + continue; + } + let meta_map = entry_metadata_map(&info.entry); + if metadata_is_corrupted(&meta_map) { + continue; + } + state.objects_scanned += 1; + + let Some(stored) = stored_etag(&info.entry) else { + continue; + }; + + if is_multipart_etag(&stored) { + continue; + } + + match myfsio_crypto::hashing::md5_file(&object_path) { + Ok(actual) => { + if actual != stored { + state.corrupted_objects += 1; + state.push_issue( + "corrupted_object", + bucket, + full_key, + format!("stored_etag={} actual_etag={}", stored, actual), + ); + } + } + Err(e) => state + .errors + .push(format!("hash {}/{}: {}", bucket, full_key, e)), + } + } +} + +fn check_phantom( + state: &mut ScanState, + bucket: &str, + bucket_path: &Path, + entries: &HashMap, + batch_size: usize, +) { + let mut keys: Vec<&String> = entries.keys().collect(); + keys.sort(); + + for full_key in keys { + if state.batch_exhausted(batch_size) { + return; + } + let info = &entries[full_key]; + if metadata_is_corrupted(&entry_metadata_map(&info.entry)) { + continue; + } + state.objects_scanned += 1; + let object_path = bucket_path.join(full_key); + if !object_path.exists() { + state.phantom_metadata += 1; + state.push_issue( + "phantom_metadata", + bucket, + full_key, + "metadata entry without file on disk".to_string(), + ); + } + } +} + +fn check_orphaned( + state: &mut ScanState, + bucket: &str, + bucket_path: &Path, + entries: &HashMap, + batch_size: usize, +) { + let indexed: HashSet<&String> = entries.keys().collect(); + let mut stack: Vec<(PathBuf, String)> = vec![(bucket_path.to_path_buf(), String::new())]; + + while let Some((dir, prefix)) = stack.pop() { + if state.batch_exhausted(batch_size) { + return; + } + let rd = match std::fs::read_dir(&dir) { + Ok(r) => r, + Err(_) => continue, + }; + for entry in rd.flatten() { + if state.batch_exhausted(batch_size) { + return; + } + let name = entry.file_name().to_string_lossy().to_string(); + let ft = match entry.file_type() { + Ok(t) => t, + Err(_) => continue, + }; + if ft.is_dir() { + if prefix.is_empty() && INTERNAL_FOLDERS.contains(&name.as_str()) { + continue; + } + let new_prefix = if prefix.is_empty() { + name + } else { + format!("{}/{}", prefix, name) + }; + stack.push((entry.path(), new_prefix)); + } else if ft.is_file() { + let full_key = if prefix.is_empty() { + name + } else { + format!("{}/{}", prefix, name) + }; + state.objects_scanned += 1; + if !indexed.contains(&full_key) { + state.orphaned_objects += 1; + state.push_issue( + "orphaned_object", + bucket, + &full_key, + "file exists without metadata entry".to_string(), + ); + } + } + } + } +} + +fn check_stale_versions( + state: &mut ScanState, + storage_root: &Path, + bucket: &str, + batch_size: usize, +) { + let versions_root = storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_VERSIONS_DIR); + if !versions_root.exists() { + return; + } + + let mut stack: Vec = vec![versions_root.clone()]; + while let Some(dir) = stack.pop() { + if state.batch_exhausted(batch_size) { + return; + } + let rd = match std::fs::read_dir(&dir) { + Ok(r) => r, + Err(_) => continue, + }; + + let mut bin_stems: HashMap = HashMap::new(); + let mut json_stems: HashMap = HashMap::new(); + let mut subdirs: Vec = Vec::new(); + + for entry in rd.flatten() { + let ft = match entry.file_type() { + Ok(t) => t, + Err(_) => continue, + }; + let path = entry.path(); + if ft.is_dir() { + subdirs.push(path); + continue; + } + let name = entry.file_name().to_string_lossy().to_string(); + if let Some(stem) = name.strip_suffix(".bin") { + bin_stems.insert(stem.to_string(), path); + } else if let Some(stem) = name.strip_suffix(".json") { + json_stems.insert(stem.to_string(), path); + } + } + + for (stem, path) in &bin_stems { + if state.batch_exhausted(batch_size) { + return; + } + state.objects_scanned += 1; + if !json_stems.contains_key(stem) { + state.stale_versions += 1; + let key = path + .strip_prefix(&versions_root) + .map(|p| p.to_string_lossy().replace('\\', "/")) + .unwrap_or_else(|_| path.display().to_string()); + state.push_issue( + "stale_version", + bucket, + &key, + "version data without manifest".to_string(), + ); + } + } + + for (stem, path) in &json_stems { + if state.batch_exhausted(batch_size) { + return; + } + state.objects_scanned += 1; + if !bin_stems.contains_key(stem) { + state.stale_versions += 1; + let key = path + .strip_prefix(&versions_root) + .map(|p| p.to_string_lossy().replace('\\', "/")) + .unwrap_or_else(|_| path.display().to_string()); + state.push_issue( + "stale_version", + bucket, + &key, + "version manifest without data".to_string(), + ); + } + } + + stack.extend(subdirs); + } +} + +fn check_etag_cache( + state: &mut ScanState, + storage_root: &Path, + bucket: &str, + entries: &HashMap, + batch_size: usize, +) { + let etag_index_path = storage_root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join("etag_index.json"); + if !etag_index_path.exists() { + return; + } + + let cache: HashMap = match std::fs::read_to_string(&etag_index_path) + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + { + Some(Value::Object(m)) => m.into_iter().collect(), + _ => return, + }; + + for (full_key, cached_val) in cache { + if state.batch_exhausted(batch_size) { + return; + } + state.objects_scanned += 1; + let Some(cached_etag) = cached_val.as_str() else { + continue; + }; + let Some(info) = entries.get(&full_key) else { + continue; + }; + let Some(stored) = stored_etag(&info.entry) else { + continue; + }; + if cached_etag != stored { + state.etag_cache_inconsistencies += 1; + state.push_issue( + "etag_cache_inconsistency", + bucket, + &full_key, + format!("cached_etag={} index_etag={}", cached_etag, stored), + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn md5_hex(bytes: &[u8]) -> String { + myfsio_crypto::hashing::md5_bytes(bytes) + } + + fn write_index(meta_dir: &Path, entries: &[(&str, &str)]) { + fs::create_dir_all(meta_dir).unwrap(); + let mut map = Map::new(); + for (name, etag) in entries { + map.insert( + name.to_string(), + json!({ "metadata": { "__etag__": etag } }), + ); + } + fs::write( + meta_dir.join(INDEX_FILE), + serde_json::to_string(&Value::Object(map)).unwrap(), + ) + .unwrap(); + } + + #[test] + fn scan_detects_each_issue_type() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let bucket = "testbucket"; + let bucket_path = root.join(bucket); + let meta_root = root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_META_DIR); + fs::create_dir_all(&bucket_path).unwrap(); + + let clean_bytes = b"clean file contents"; + let clean_etag = md5_hex(clean_bytes); + fs::write(bucket_path.join("clean.txt"), clean_bytes).unwrap(); + + let corrupted_bytes = b"actual content"; + fs::write(bucket_path.join("corrupted.txt"), corrupted_bytes).unwrap(); + + fs::write(bucket_path.join("orphan.txt"), b"no metadata").unwrap(); + + write_index( + &meta_root, + &[ + ("clean.txt", &clean_etag), + ("corrupted.txt", "00000000000000000000000000000000"), + ("phantom.txt", "deadbeefdeadbeefdeadbeefdeadbeef"), + ], + ); + + let versions_root = root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_VERSIONS_DIR) + .join("someobject"); + fs::create_dir_all(&versions_root).unwrap(); + fs::write(versions_root.join("v1.bin"), b"orphan bin").unwrap(); + fs::write(versions_root.join("v2.json"), b"{}").unwrap(); + + let etag_index = root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join("etag_index.json"); + fs::write( + &etag_index, + serde_json::to_string(&json!({ "clean.txt": "stale-cached-etag" })).unwrap(), + ) + .unwrap(); + + let state = scan_all_buckets(root, 10_000); + + assert_eq!(state.corrupted_objects, 1, "corrupted"); + assert_eq!(state.phantom_metadata, 1, "phantom"); + assert_eq!(state.orphaned_objects, 1, "orphaned"); + assert_eq!(state.stale_versions, 2, "stale versions"); + assert_eq!(state.etag_cache_inconsistencies, 1, "etag cache"); + assert_eq!(state.buckets_scanned, 1); + assert!( + state.errors.is_empty(), + "unexpected errors: {:?}", + state.errors + ); + } + + #[test] + fn skips_system_root_as_bucket() { + let tmp = tempfile::tempdir().unwrap(); + fs::create_dir_all(tmp.path().join(SYSTEM_ROOT).join("config")).unwrap(); + let state = scan_all_buckets(tmp.path(), 100); + assert_eq!(state.buckets_scanned, 0); + } + + #[test] + fn poisoned_entries_are_skipped_during_corruption_scan() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let bucket = "testbucket"; + let bucket_path = root.join(bucket); + let meta_root = root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_META_DIR); + fs::create_dir_all(&bucket_path).unwrap(); + fs::create_dir_all(&meta_root).unwrap(); + + let bytes = b"some bytes that wont match"; + fs::write(bucket_path.join("rotted.txt"), bytes).unwrap(); + + let mut map = Map::new(); + map.insert( + "rotted.txt".to_string(), + json!({ + "metadata": { + "__etag__": "00000000000000000000000000000000", + "__corrupted__": "true", + "__corruption_detail__": "etag mismatch (already poisoned)", + } + }), + ); + fs::write( + meta_root.join(INDEX_FILE), + serde_json::to_string(&Value::Object(map)).unwrap(), + ) + .unwrap(); + + let state = scan_all_buckets(root, 10_000); + assert_eq!(state.corrupted_objects, 0, "poisoned entries must not re-flag"); + } + + #[test] + fn parse_etag_helpers() { + let detail = "stored_etag=abc123 actual_etag=def456"; + assert_eq!(parse_stored_etag(detail), "abc123"); + assert_eq!(parse_actual_etag(detail), "def456"); + } + + #[test] + fn poisoned_entry_with_missing_file_is_not_phantom() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let bucket = "testbucket"; + let bucket_path = root.join(bucket); + let meta_root = root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_META_DIR); + fs::create_dir_all(&bucket_path).unwrap(); + fs::create_dir_all(&meta_root).unwrap(); + + let mut map = Map::new(); + map.insert( + "quarantined.txt".to_string(), + json!({ + "metadata": { + "__etag__": "deadbeefdeadbeefdeadbeefdeadbeef", + "__corrupted__": "true", + "__corruption_detail__": "etag mismatch (no peer)", + "__quarantine_path__": ".myfsio.sys/quarantine/testbucket/2026/quarantined.txt", + } + }), + ); + fs::write( + meta_root.join(INDEX_FILE), + serde_json::to_string(&Value::Object(map)).unwrap(), + ) + .unwrap(); + + let state = scan_all_buckets(root, 10_000); + assert_eq!( + state.phantom_metadata, 0, + "poisoned entries with quarantined files must not be reported as phantom metadata" + ); + assert_eq!(state.corrupted_objects, 0); + } + + #[test] + fn healthy_multipart_object_is_not_flagged_corrupted() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let bucket = "testbucket"; + let bucket_path = root.join(bucket); + let meta_root = root + .join(SYSTEM_ROOT) + .join(SYSTEM_BUCKETS_DIR) + .join(bucket) + .join(BUCKET_META_DIR); + fs::create_dir_all(&bucket_path).unwrap(); + + fs::write(bucket_path.join("multi.bin"), b"healthy multipart body").unwrap(); + + write_index( + &meta_root, + &[( + "multi.bin", + "deadbeefdeadbeefdeadbeefdeadbeef-3", + )], + ); + + let state = scan_all_buckets(root, 10_000); + assert_eq!( + state.corrupted_objects, 0, + "multipart-style ETags must not be checked against whole-body MD5" + ); + assert!(state.errors.is_empty(), "unexpected errors: {:?}", state.errors); + } +} diff --git a/crates/myfsio-server/src/services/lifecycle.rs b/crates/myfsio-server/src/services/lifecycle.rs new file mode 100644 index 0000000..0483696 --- /dev/null +++ b/crates/myfsio-server/src/services/lifecycle.rs @@ -0,0 +1,637 @@ +use chrono::{DateTime, Duration, Utc}; +use myfsio_storage::fs_backend::FsStorageBackend; +use myfsio_storage::traits::StorageEngine; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tokio::sync::RwLock; + +pub struct LifecycleConfig { + pub interval_seconds: u64, + pub max_history_per_bucket: usize, +} + +impl Default for LifecycleConfig { + fn default() -> Self { + Self { + interval_seconds: 3600, + max_history_per_bucket: 50, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LifecycleExecutionRecord { + pub timestamp: f64, + pub bucket_name: String, + pub objects_deleted: u64, + pub versions_deleted: u64, + pub uploads_aborted: u64, + #[serde(default)] + pub errors: Vec, + pub execution_time_seconds: f64, +} + +#[derive(Debug, Clone, Default)] +struct BucketLifecycleResult { + bucket_name: String, + objects_deleted: u64, + versions_deleted: u64, + uploads_aborted: u64, + errors: Vec, + execution_time_seconds: f64, +} + +#[derive(Debug, Clone, Default)] +struct ParsedLifecycleRule { + status: String, + prefix: String, + expiration_days: Option, + expiration_date: Option>, + noncurrent_days: Option, + abort_incomplete_multipart_days: Option, +} + +pub struct LifecycleService { + storage: Arc, + storage_root: PathBuf, + config: LifecycleConfig, + running: Arc>, +} + +impl LifecycleService { + pub fn new( + storage: Arc, + storage_root: impl Into, + config: LifecycleConfig, + ) -> Self { + Self { + storage, + storage_root: storage_root.into(), + config, + running: Arc::new(RwLock::new(false)), + } + } + + pub async fn run_cycle(&self) -> Result { + { + let mut running = self.running.write().await; + if *running { + return Err("Lifecycle already running".to_string()); + } + *running = true; + } + + let result = self.evaluate_rules().await; + *self.running.write().await = false; + Ok(result) + } + + async fn evaluate_rules(&self) -> Value { + let buckets = match self.storage.list_buckets().await { + Ok(buckets) => buckets, + Err(err) => return json!({ "error": err.to_string() }), + }; + + let mut bucket_results = Vec::new(); + let mut total_objects_deleted = 0u64; + let mut total_versions_deleted = 0u64; + let mut total_uploads_aborted = 0u64; + let mut errors = Vec::new(); + + for bucket in &buckets { + let started_at = std::time::Instant::now(); + let mut result = BucketLifecycleResult { + bucket_name: bucket.name.clone(), + ..Default::default() + }; + + let config = match self.storage.get_bucket_config(&bucket.name).await { + Ok(config) => config, + Err(err) => { + result.errors.push(err.to_string()); + result.execution_time_seconds = started_at.elapsed().as_secs_f64(); + self.append_history(&result); + errors.extend(result.errors.clone()); + bucket_results.push(result); + continue; + } + }; + let Some(lifecycle) = config.lifecycle.as_ref() else { + continue; + }; + let rules = parse_lifecycle_rules(lifecycle); + if rules.is_empty() { + continue; + } + + for rule in &rules { + if rule.status != "Enabled" { + continue; + } + if let Some(err) = self + .apply_expiration_rule(&bucket.name, rule, &mut result) + .await + { + result.errors.push(err); + } + if let Some(err) = self + .apply_noncurrent_expiration_rule(&bucket.name, rule, &mut result) + .await + { + result.errors.push(err); + } + if let Some(err) = self + .apply_abort_incomplete_multipart_rule(&bucket.name, rule, &mut result) + .await + { + result.errors.push(err); + } + } + + result.execution_time_seconds = started_at.elapsed().as_secs_f64(); + if result.objects_deleted > 0 + || result.versions_deleted > 0 + || result.uploads_aborted > 0 + || !result.errors.is_empty() + { + total_objects_deleted += result.objects_deleted; + total_versions_deleted += result.versions_deleted; + total_uploads_aborted += result.uploads_aborted; + errors.extend(result.errors.clone()); + self.append_history(&result); + bucket_results.push(result); + } + } + + json!({ + "objects_deleted": total_objects_deleted, + "versions_deleted": total_versions_deleted, + "multipart_aborted": total_uploads_aborted, + "buckets_evaluated": buckets.len(), + "results": bucket_results.iter().map(result_to_json).collect::>(), + "errors": errors, + }) + } + + async fn apply_expiration_rule( + &self, + bucket: &str, + rule: &ParsedLifecycleRule, + result: &mut BucketLifecycleResult, + ) -> Option { + let cutoff = if let Some(days) = rule.expiration_days { + Some(Utc::now() - Duration::days(days as i64)) + } else { + rule.expiration_date + }; + let Some(cutoff) = cutoff else { + return None; + }; + + let params = myfsio_common::types::ListParams { + max_keys: 10_000, + prefix: if rule.prefix.is_empty() { + None + } else { + Some(rule.prefix.clone()) + }, + ..Default::default() + }; + match self.storage.list_objects(bucket, ¶ms).await { + Ok(objects) => { + for object in &objects.objects { + if object.last_modified < cutoff { + if let Err(err) = self.storage.delete_object(bucket, &object.key).await { + result + .errors + .push(format!("{}:{}: {}", bucket, object.key, err)); + } else { + result.objects_deleted += 1; + } + } + } + None + } + Err(err) => Some(format!("Failed to list objects for {}: {}", bucket, err)), + } + } + + async fn apply_noncurrent_expiration_rule( + &self, + bucket: &str, + rule: &ParsedLifecycleRule, + result: &mut BucketLifecycleResult, + ) -> Option { + let Some(days) = rule.noncurrent_days else { + return None; + }; + let cutoff = Utc::now() - Duration::days(days as i64); + let versions_root = version_root_for_bucket(&self.storage_root, bucket); + if !versions_root.exists() { + return None; + } + + let mut stack = VecDeque::from([versions_root]); + while let Some(current) = stack.pop_front() { + let entries = match std::fs::read_dir(¤t) { + Ok(entries) => entries, + Err(err) => return Some(err.to_string()), + }; + for entry in entries.flatten() { + let file_type = match entry.file_type() { + Ok(file_type) => file_type, + Err(_) => continue, + }; + if file_type.is_dir() { + stack.push_back(entry.path()); + continue; + } + if entry.path().extension().and_then(|ext| ext.to_str()) != Some("json") { + continue; + } + let contents = match std::fs::read_to_string(entry.path()) { + Ok(contents) => contents, + Err(_) => continue, + }; + let Ok(manifest) = serde_json::from_str::(&contents) else { + continue; + }; + let key = manifest + .get("key") + .and_then(|value| value.as_str()) + .unwrap_or_default() + .to_string(); + if !rule.prefix.is_empty() && !key.starts_with(&rule.prefix) { + continue; + } + let archived_at = manifest + .get("archived_at") + .and_then(|value| value.as_str()) + .and_then(|value| DateTime::parse_from_rfc3339(value).ok()) + .map(|value| value.with_timezone(&Utc)); + if archived_at.is_none() || archived_at.unwrap() >= cutoff { + continue; + } + let version_id = manifest + .get("version_id") + .and_then(|value| value.as_str()) + .unwrap_or_default(); + let data_path = entry.path().with_file_name(format!("{}.bin", version_id)); + let _ = std::fs::remove_file(&data_path); + let _ = std::fs::remove_file(entry.path()); + result.versions_deleted += 1; + } + } + None + } + + async fn apply_abort_incomplete_multipart_rule( + &self, + bucket: &str, + rule: &ParsedLifecycleRule, + result: &mut BucketLifecycleResult, + ) -> Option { + let Some(days) = rule.abort_incomplete_multipart_days else { + return None; + }; + let cutoff = Utc::now() - Duration::days(days as i64); + match self.storage.list_multipart_uploads(bucket).await { + Ok(uploads) => { + for upload in &uploads { + if upload.initiated < cutoff { + if let Err(err) = self + .storage + .abort_multipart(bucket, &upload.upload_id) + .await + { + result + .errors + .push(format!("abort {}: {}", upload.upload_id, err)); + } else { + result.uploads_aborted += 1; + } + } + } + None + } + Err(err) => Some(format!( + "Failed to list multipart uploads for {}: {}", + bucket, err + )), + } + } + + fn append_history(&self, result: &BucketLifecycleResult) { + let path = lifecycle_history_path(&self.storage_root, &result.bucket_name); + let mut history = load_history(&path); + history.insert( + 0, + LifecycleExecutionRecord { + timestamp: Utc::now().timestamp_millis() as f64 / 1000.0, + bucket_name: result.bucket_name.clone(), + objects_deleted: result.objects_deleted, + versions_deleted: result.versions_deleted, + uploads_aborted: result.uploads_aborted, + errors: result.errors.clone(), + execution_time_seconds: result.execution_time_seconds, + }, + ); + history.truncate(self.config.max_history_per_bucket); + let payload = json!({ + "executions": history, + }); + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let _ = std::fs::write( + &path, + serde_json::to_string_pretty(&payload).unwrap_or_else(|_| "{}".to_string()), + ); + } + + pub fn start_background(self: Arc) -> tokio::task::JoinHandle<()> { + let interval = std::time::Duration::from_secs(self.config.interval_seconds); + tokio::spawn(async move { + let mut timer = tokio::time::interval(interval); + timer.tick().await; + loop { + timer.tick().await; + tracing::info!("Lifecycle evaluation starting"); + match self.run_cycle().await { + Ok(result) => tracing::info!("Lifecycle cycle complete: {:?}", result), + Err(err) => tracing::warn!("Lifecycle cycle failed: {}", err), + } + } + }) + } +} + +pub fn read_history(storage_root: &Path, bucket_name: &str, limit: usize, offset: usize) -> Value { + let path = lifecycle_history_path(storage_root, bucket_name); + let mut history = load_history(&path); + let total = history.len(); + let executions = history + .drain(offset.min(total)..) + .take(limit) + .collect::>(); + json!({ + "executions": executions, + "total": total, + "limit": limit, + "offset": offset, + "enabled": true, + }) +} + +fn load_history(path: &Path) -> Vec { + if !path.exists() { + return Vec::new(); + } + std::fs::read_to_string(path) + .ok() + .and_then(|contents| serde_json::from_str::(&contents).ok()) + .and_then(|value| value.get("executions").cloned()) + .and_then(|value| serde_json::from_value::>(value).ok()) + .unwrap_or_default() +} + +fn lifecycle_history_path(storage_root: &Path, bucket_name: &str) -> PathBuf { + storage_root + .join(".myfsio.sys") + .join("buckets") + .join(bucket_name) + .join("lifecycle_history.json") +} + +fn version_root_for_bucket(storage_root: &Path, bucket_name: &str) -> PathBuf { + storage_root + .join(".myfsio.sys") + .join("buckets") + .join(bucket_name) + .join("versions") +} + +fn parse_lifecycle_rules(value: &Value) -> Vec { + match value { + Value::String(raw) => parse_lifecycle_rules_from_string(raw), + Value::Array(items) => items.iter().filter_map(parse_lifecycle_rule).collect(), + Value::Object(map) => map + .get("Rules") + .and_then(|rules| rules.as_array()) + .map(|rules| rules.iter().filter_map(parse_lifecycle_rule).collect()) + .unwrap_or_default(), + _ => Vec::new(), + } +} + +fn parse_lifecycle_rules_from_string(raw: &str) -> Vec { + if let Ok(json) = serde_json::from_str::(raw) { + return parse_lifecycle_rules(&json); + } + let Ok(doc) = roxmltree::Document::parse(raw) else { + return Vec::new(); + }; + doc.descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "Rule") + .map(|rule| ParsedLifecycleRule { + status: child_text(&rule, "Status").unwrap_or_else(|| "Enabled".to_string()), + prefix: child_text(&rule, "Prefix") + .or_else(|| { + rule.descendants() + .find(|node| { + node.is_element() + && node.tag_name().name() == "Filter" + && node.children().any(|child| { + child.is_element() && child.tag_name().name() == "Prefix" + }) + }) + .and_then(|filter| child_text(&filter, "Prefix")) + }) + .unwrap_or_default(), + expiration_days: rule + .descendants() + .find(|node| node.is_element() && node.tag_name().name() == "Expiration") + .and_then(|expiration| child_text(&expiration, "Days")) + .and_then(|value| value.parse::().ok()), + expiration_date: rule + .descendants() + .find(|node| node.is_element() && node.tag_name().name() == "Expiration") + .and_then(|expiration| child_text(&expiration, "Date")) + .as_deref() + .and_then(parse_datetime), + noncurrent_days: rule + .descendants() + .find(|node| { + node.is_element() && node.tag_name().name() == "NoncurrentVersionExpiration" + }) + .and_then(|node| child_text(&node, "NoncurrentDays")) + .and_then(|value| value.parse::().ok()), + abort_incomplete_multipart_days: rule + .descendants() + .find(|node| { + node.is_element() && node.tag_name().name() == "AbortIncompleteMultipartUpload" + }) + .and_then(|node| child_text(&node, "DaysAfterInitiation")) + .and_then(|value| value.parse::().ok()), + }) + .collect() +} + +fn parse_lifecycle_rule(value: &Value) -> Option { + let map = value.as_object()?; + Some(ParsedLifecycleRule { + status: map + .get("Status") + .and_then(|value| value.as_str()) + .unwrap_or("Enabled") + .to_string(), + prefix: map + .get("Prefix") + .and_then(|value| value.as_str()) + .or_else(|| { + map.get("Filter") + .and_then(|value| value.get("Prefix")) + .and_then(|value| value.as_str()) + }) + .unwrap_or_default() + .to_string(), + expiration_days: map + .get("Expiration") + .and_then(|value| value.get("Days")) + .and_then(|value| value.as_u64()), + expiration_date: map + .get("Expiration") + .and_then(|value| value.get("Date")) + .and_then(|value| value.as_str()) + .and_then(parse_datetime), + noncurrent_days: map + .get("NoncurrentVersionExpiration") + .and_then(|value| value.get("NoncurrentDays")) + .and_then(|value| value.as_u64()), + abort_incomplete_multipart_days: map + .get("AbortIncompleteMultipartUpload") + .and_then(|value| value.get("DaysAfterInitiation")) + .and_then(|value| value.as_u64()), + }) +} + +fn parse_datetime(value: &str) -> Option> { + DateTime::parse_from_rfc3339(value) + .ok() + .map(|value| value.with_timezone(&Utc)) +} + +fn child_text(node: &roxmltree::Node<'_, '_>, name: &str) -> Option { + node.children() + .find(|child| child.is_element() && child.tag_name().name() == name) + .and_then(|child| child.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()) +} + +fn result_to_json(result: &BucketLifecycleResult) -> Value { + json!({ + "bucket_name": result.bucket_name, + "objects_deleted": result.objects_deleted, + "versions_deleted": result.versions_deleted, + "uploads_aborted": result.uploads_aborted, + "errors": result.errors, + "execution_time_seconds": result.execution_time_seconds, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Duration; + + #[test] + fn parses_rules_from_xml() { + let xml = r#" + + + Enabled + logs/ + 10 + 30 + 7 + + "#; + let rules = parse_lifecycle_rules(&Value::String(xml.to_string())); + assert_eq!(rules.len(), 1); + assert_eq!(rules[0].prefix, "logs/"); + assert_eq!(rules[0].expiration_days, Some(10)); + assert_eq!(rules[0].noncurrent_days, Some(30)); + assert_eq!(rules[0].abort_incomplete_multipart_days, Some(7)); + } + + #[tokio::test] + async fn run_cycle_writes_history_and_deletes_noncurrent_versions() { + let tmp = tempfile::tempdir().unwrap(); + let storage = Arc::new(FsStorageBackend::new(tmp.path().to_path_buf())); + storage.create_bucket("docs").await.unwrap(); + storage.set_versioning("docs", true).await.unwrap(); + + storage + .put_object( + "docs", + "logs/file.txt", + Box::pin(std::io::Cursor::new(b"old".to_vec())), + None, + ) + .await + .unwrap(); + storage + .put_object( + "docs", + "logs/file.txt", + Box::pin(std::io::Cursor::new(b"new".to_vec())), + None, + ) + .await + .unwrap(); + + let versions_root = version_root_for_bucket(tmp.path(), "docs") + .join("logs") + .join("file.txt"); + let manifest = std::fs::read_dir(&versions_root) + .unwrap() + .flatten() + .find(|entry| entry.path().extension().and_then(|ext| ext.to_str()) == Some("json")) + .unwrap() + .path(); + let old_manifest = json!({ + "version_id": "ver-1", + "key": "logs/file.txt", + "size": 3, + "archived_at": (Utc::now() - Duration::days(45)).to_rfc3339(), + "etag": "etag", + }); + std::fs::write(&manifest, serde_json::to_string(&old_manifest).unwrap()).unwrap(); + std::fs::write(manifest.with_file_name("ver-1.bin"), b"old").unwrap(); + + let lifecycle_xml = r#" + + + Enabled + logs/ + 30 + + "#; + let mut config = storage.get_bucket_config("docs").await.unwrap(); + config.lifecycle = Some(Value::String(lifecycle_xml.to_string())); + storage.set_bucket_config("docs", &config).await.unwrap(); + + let service = + LifecycleService::new(storage.clone(), tmp.path(), LifecycleConfig::default()); + let result = service.run_cycle().await.unwrap(); + assert_eq!(result["versions_deleted"], 1); + + let history = read_history(tmp.path(), "docs", 50, 0); + assert_eq!(history["total"], 1); + assert_eq!(history["executions"][0]["versions_deleted"], 1); + } +} diff --git a/crates/myfsio-server/src/services/metrics.rs b/crates/myfsio-server/src/services/metrics.rs new file mode 100644 index 0000000..59314bc --- /dev/null +++ b/crates/myfsio-server/src/services/metrics.rs @@ -0,0 +1,368 @@ +use chrono::{DateTime, Utc}; +use parking_lot::Mutex; +use rand::Rng; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +const MAX_LATENCY_SAMPLES: usize = 5000; + +pub struct MetricsConfig { + pub interval_minutes: u64, + pub retention_hours: u64, +} + +impl Default for MetricsConfig { + fn default() -> Self { + Self { + interval_minutes: 5, + retention_hours: 24, + } + } +} + +#[derive(Debug, Clone)] +struct OperationStats { + count: u64, + success_count: u64, + error_count: u64, + latency_sum_ms: f64, + latency_min_ms: f64, + latency_max_ms: f64, + bytes_in: u64, + bytes_out: u64, + latency_samples: Vec, +} + +impl Default for OperationStats { + fn default() -> Self { + Self { + count: 0, + success_count: 0, + error_count: 0, + latency_sum_ms: 0.0, + latency_min_ms: f64::INFINITY, + latency_max_ms: 0.0, + bytes_in: 0, + bytes_out: 0, + latency_samples: Vec::new(), + } + } +} + +impl OperationStats { + fn record(&mut self, latency_ms: f64, success: bool, bytes_in: u64, bytes_out: u64) { + self.count += 1; + if success { + self.success_count += 1; + } else { + self.error_count += 1; + } + self.latency_sum_ms += latency_ms; + if latency_ms < self.latency_min_ms { + self.latency_min_ms = latency_ms; + } + if latency_ms > self.latency_max_ms { + self.latency_max_ms = latency_ms; + } + self.bytes_in += bytes_in; + self.bytes_out += bytes_out; + + if self.latency_samples.len() < MAX_LATENCY_SAMPLES { + self.latency_samples.push(latency_ms); + } else { + let mut rng = rand::thread_rng(); + let j = rng.gen_range(0..self.count as usize); + if j < MAX_LATENCY_SAMPLES { + self.latency_samples[j] = latency_ms; + } + } + } + + fn compute_percentile(sorted: &[f64], p: f64) -> f64 { + if sorted.is_empty() { + return 0.0; + } + let k = (sorted.len() - 1) as f64 * (p / 100.0); + let f = k.floor() as usize; + let c = (f + 1).min(sorted.len() - 1); + let d = k - f as f64; + sorted[f] + d * (sorted[c] - sorted[f]) + } + + fn to_json(&self) -> Value { + let avg = if self.count > 0 { + self.latency_sum_ms / self.count as f64 + } else { + 0.0 + }; + let min = if self.latency_min_ms.is_infinite() { + 0.0 + } else { + self.latency_min_ms + }; + let mut sorted = self.latency_samples.clone(); + sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + json!({ + "count": self.count, + "success_count": self.success_count, + "error_count": self.error_count, + "latency_avg_ms": round2(avg), + "latency_min_ms": round2(min), + "latency_max_ms": round2(self.latency_max_ms), + "latency_p50_ms": round2(Self::compute_percentile(&sorted, 50.0)), + "latency_p95_ms": round2(Self::compute_percentile(&sorted, 95.0)), + "latency_p99_ms": round2(Self::compute_percentile(&sorted, 99.0)), + "bytes_in": self.bytes_in, + "bytes_out": self.bytes_out, + }) + } +} + +fn round2(v: f64) -> f64 { + (v * 100.0).round() / 100.0 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetricsSnapshot { + pub timestamp: DateTime, + pub window_seconds: u64, + pub by_method: HashMap, + pub by_endpoint: HashMap, + pub by_status_class: HashMap, + pub error_codes: HashMap, + pub totals: Value, +} + +struct Inner { + by_method: HashMap, + by_endpoint: HashMap, + by_status_class: HashMap, + error_codes: HashMap, + totals: OperationStats, + window_start: f64, + snapshots: Vec, +} + +pub struct MetricsService { + config: MetricsConfig, + inner: Arc>, + snapshots_path: PathBuf, +} + +impl MetricsService { + pub fn new(storage_root: &Path, config: MetricsConfig) -> Self { + let snapshots_path = storage_root + .join(".myfsio.sys") + .join("config") + .join("operation_metrics.json"); + + let mut snapshots: Vec = if snapshots_path.exists() { + std::fs::read_to_string(&snapshots_path) + .ok() + .and_then(|s| serde_json::from_str::(&s).ok()) + .and_then(|v| { + v.get("snapshots").and_then(|s| { + serde_json::from_value::>(s.clone()).ok() + }) + }) + .unwrap_or_default() + } else { + Vec::new() + }; + let cutoff = now_secs() - (config.retention_hours * 3600) as f64; + snapshots.retain(|s| s.timestamp.timestamp() as f64 > cutoff); + + Self { + config, + inner: Arc::new(Mutex::new(Inner { + by_method: HashMap::new(), + by_endpoint: HashMap::new(), + by_status_class: HashMap::new(), + error_codes: HashMap::new(), + totals: OperationStats::default(), + window_start: now_secs(), + snapshots, + })), + snapshots_path, + } + } + + pub fn record_request( + &self, + method: &str, + endpoint_type: &str, + status_code: u16, + latency_ms: f64, + bytes_in: u64, + bytes_out: u64, + error_code: Option<&str>, + ) { + let success = (200..400).contains(&status_code); + let status_class = format!("{}xx", status_code / 100); + + let mut inner = self.inner.lock(); + inner + .by_method + .entry(method.to_string()) + .or_default() + .record(latency_ms, success, bytes_in, bytes_out); + inner + .by_endpoint + .entry(endpoint_type.to_string()) + .or_default() + .record(latency_ms, success, bytes_in, bytes_out); + *inner.by_status_class.entry(status_class).or_insert(0) += 1; + if let Some(code) = error_code { + *inner.error_codes.entry(code.to_string()).or_insert(0) += 1; + } + inner + .totals + .record(latency_ms, success, bytes_in, bytes_out); + } + + pub fn get_current_stats(&self) -> Value { + let inner = self.inner.lock(); + let window_seconds = (now_secs() - inner.window_start).max(0.0) as u64; + let by_method: HashMap = inner + .by_method + .iter() + .map(|(k, v)| (k.clone(), v.to_json())) + .collect(); + let by_endpoint: HashMap = inner + .by_endpoint + .iter() + .map(|(k, v)| (k.clone(), v.to_json())) + .collect(); + json!({ + "timestamp": Utc::now().to_rfc3339(), + "window_seconds": window_seconds, + "by_method": by_method, + "by_endpoint": by_endpoint, + "by_status_class": inner.by_status_class, + "error_codes": inner.error_codes, + "totals": inner.totals.to_json(), + }) + } + + pub fn get_history(&self, hours: Option) -> Vec { + let inner = self.inner.lock(); + let mut snapshots = inner.snapshots.clone(); + if let Some(h) = hours { + let cutoff = now_secs() - (h * 3600) as f64; + snapshots.retain(|s| s.timestamp.timestamp() as f64 > cutoff); + } + snapshots + } + + pub fn snapshot(&self) -> Value { + let current = self.get_current_stats(); + let history = self.get_history(None); + json!({ + "enabled": true, + "current": current, + "snapshots": history, + }) + } + + fn take_snapshot(&self) { + let snapshot = { + let mut inner = self.inner.lock(); + let window_seconds = (now_secs() - inner.window_start).max(0.0) as u64; + + let by_method: HashMap = inner + .by_method + .iter() + .map(|(k, v)| (k.clone(), v.to_json())) + .collect(); + let by_endpoint: HashMap = inner + .by_endpoint + .iter() + .map(|(k, v)| (k.clone(), v.to_json())) + .collect(); + + let snap = MetricsSnapshot { + timestamp: Utc::now(), + window_seconds, + by_method, + by_endpoint, + by_status_class: inner.by_status_class.clone(), + error_codes: inner.error_codes.clone(), + totals: inner.totals.to_json(), + }; + + inner.snapshots.push(snap.clone()); + let cutoff = now_secs() - (self.config.retention_hours * 3600) as f64; + inner + .snapshots + .retain(|s| s.timestamp.timestamp() as f64 > cutoff); + + inner.by_method.clear(); + inner.by_endpoint.clear(); + inner.by_status_class.clear(); + inner.error_codes.clear(); + inner.totals = OperationStats::default(); + inner.window_start = now_secs(); + + snap + }; + let _ = snapshot; + self.save_snapshots(); + } + + fn save_snapshots(&self) { + let snapshots = { self.inner.lock().snapshots.clone() }; + if let Some(parent) = self.snapshots_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let data = json!({ "snapshots": snapshots }); + let _ = std::fs::write( + &self.snapshots_path, + serde_json::to_string_pretty(&data).unwrap_or_default(), + ); + } + + pub fn start_background(self: Arc) -> tokio::task::JoinHandle<()> { + let interval = std::time::Duration::from_secs(self.config.interval_minutes * 60); + tokio::spawn(async move { + let mut timer = tokio::time::interval(interval); + timer.tick().await; + loop { + timer.tick().await; + self.take_snapshot(); + } + }) + } +} + +pub fn classify_endpoint(path: &str) -> &'static str { + if path.is_empty() || path == "/" { + return "service"; + } + let trimmed = path.trim_end_matches('/'); + if trimmed.starts_with("/ui") { + return "ui"; + } + if trimmed.starts_with("/kms") { + return "kms"; + } + if trimmed.starts_with("/myfsio") { + return "service"; + } + let parts: Vec<&str> = trimmed.trim_start_matches('/').split('/').collect(); + match parts.len() { + 0 => "service", + 1 => "bucket", + _ => "object", + } +} + +fn now_secs() -> f64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs_f64()) + .unwrap_or(0.0) +} diff --git a/crates/myfsio-server/src/services/mod.rs b/crates/myfsio-server/src/services/mod.rs new file mode 100644 index 0000000..f21b6fc --- /dev/null +++ b/crates/myfsio-server/src/services/mod.rs @@ -0,0 +1,15 @@ +pub mod access_logging; +pub mod acl; +pub mod gc; +pub mod integrity; +pub mod lifecycle; +pub mod metrics; +pub mod notifications; +pub mod object_lock; +pub mod peer_fetch; +pub mod replication; +pub mod s3_client; +pub mod site_registry; +pub mod site_sync; +pub mod system_metrics; +pub mod website_domains; diff --git a/crates/myfsio-server/src/services/notifications.rs b/crates/myfsio-server/src/services/notifications.rs new file mode 100644 index 0000000..9d06cc7 --- /dev/null +++ b/crates/myfsio-server/src/services/notifications.rs @@ -0,0 +1,296 @@ +use crate::state::AppState; +use chrono::{DateTime, Utc}; +use myfsio_storage::traits::StorageEngine; +use serde::Serialize; +use serde_json::json; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct WebhookDestination { + pub url: String, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct NotificationConfiguration { + pub id: String, + pub events: Vec, + pub destination: WebhookDestination, + pub prefix_filter: String, + pub suffix_filter: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct NotificationEvent { + #[serde(rename = "eventVersion")] + event_version: &'static str, + #[serde(rename = "eventSource")] + event_source: &'static str, + #[serde(rename = "awsRegion")] + aws_region: &'static str, + #[serde(rename = "eventTime")] + event_time: String, + #[serde(rename = "eventName")] + event_name: String, + #[serde(rename = "userIdentity")] + user_identity: serde_json::Value, + #[serde(rename = "requestParameters")] + request_parameters: serde_json::Value, + #[serde(rename = "responseElements")] + response_elements: serde_json::Value, + s3: serde_json::Value, +} + +impl NotificationConfiguration { + pub fn matches_event(&self, event_name: &str, object_key: &str) -> bool { + let event_match = self.events.iter().any(|pattern| { + if let Some(prefix) = pattern.strip_suffix('*') { + event_name.starts_with(prefix) + } else { + pattern == event_name + } + }); + if !event_match { + return false; + } + if !self.prefix_filter.is_empty() && !object_key.starts_with(&self.prefix_filter) { + return false; + } + if !self.suffix_filter.is_empty() && !object_key.ends_with(&self.suffix_filter) { + return false; + } + true + } +} + +pub fn parse_notification_configurations( + xml: &str, +) -> Result, String> { + let doc = roxmltree::Document::parse(xml).map_err(|err| err.to_string())?; + let mut configs = Vec::new(); + + for webhook in doc + .descendants() + .filter(|node| node.is_element() && node.tag_name().name() == "WebhookConfiguration") + { + let id = child_text(&webhook, "Id").unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + let events = webhook + .children() + .filter(|node| node.is_element() && node.tag_name().name() == "Event") + .filter_map(|node| node.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()) + .collect::>(); + + let destination = webhook + .children() + .find(|node| node.is_element() && node.tag_name().name() == "Destination"); + let url = destination + .as_ref() + .and_then(|node| child_text(node, "Url")) + .unwrap_or_default(); + if url.trim().is_empty() { + return Err("Destination URL is required".to_string()); + } + + let mut prefix_filter = String::new(); + let mut suffix_filter = String::new(); + if let Some(filter) = webhook + .children() + .find(|node| node.is_element() && node.tag_name().name() == "Filter") + { + if let Some(key) = filter + .children() + .find(|node| node.is_element() && node.tag_name().name() == "S3Key") + { + for rule in key + .children() + .filter(|node| node.is_element() && node.tag_name().name() == "FilterRule") + { + let name = child_text(&rule, "Name").unwrap_or_default(); + let value = child_text(&rule, "Value").unwrap_or_default(); + if name == "prefix" { + prefix_filter = value; + } else if name == "suffix" { + suffix_filter = value; + } + } + } + } + + configs.push(NotificationConfiguration { + id, + events, + destination: WebhookDestination { url }, + prefix_filter, + suffix_filter, + }); + } + + Ok(configs) +} + +pub fn emit_object_created( + state: &AppState, + bucket: &str, + key: &str, + size: u64, + etag: Option<&str>, + request_id: &str, + source_ip: &str, + user_identity: &str, + operation: &str, +) { + emit_notifications( + state.clone(), + bucket.to_string(), + key.to_string(), + format!("s3:ObjectCreated:{}", operation), + size, + etag.unwrap_or_default().to_string(), + request_id.to_string(), + source_ip.to_string(), + user_identity.to_string(), + ); +} + +pub fn emit_object_removed( + state: &AppState, + bucket: &str, + key: &str, + request_id: &str, + source_ip: &str, + user_identity: &str, + operation: &str, +) { + emit_notifications( + state.clone(), + bucket.to_string(), + key.to_string(), + format!("s3:ObjectRemoved:{}", operation), + 0, + String::new(), + request_id.to_string(), + source_ip.to_string(), + user_identity.to_string(), + ); +} + +fn emit_notifications( + state: AppState, + bucket: String, + key: String, + event_name: String, + size: u64, + etag: String, + request_id: String, + source_ip: String, + user_identity: String, +) { + tokio::spawn(async move { + let config = match state.storage.get_bucket_config(&bucket).await { + Ok(config) => config, + Err(_) => return, + }; + let raw = match config.notification { + Some(serde_json::Value::String(raw)) => raw, + _ => return, + }; + let configs = match parse_notification_configurations(&raw) { + Ok(configs) => configs, + Err(err) => { + tracing::warn!("Invalid notification config for bucket {}: {}", bucket, err); + return; + } + }; + + let record = NotificationEvent { + event_version: "2.1", + event_source: "myfsio:s3", + aws_region: "local", + event_time: format_event_time(Utc::now()), + event_name: event_name.clone(), + user_identity: json!({ "principalId": if user_identity.is_empty() { "ANONYMOUS" } else { &user_identity } }), + request_parameters: json!({ "sourceIPAddress": if source_ip.is_empty() { "127.0.0.1" } else { &source_ip } }), + response_elements: json!({ + "x-amz-request-id": request_id, + "x-amz-id-2": request_id, + }), + s3: json!({ + "s3SchemaVersion": "1.0", + "configurationId": "notification", + "bucket": { + "name": bucket, + "ownerIdentity": { "principalId": "local" }, + "arn": format!("arn:aws:s3:::{}", bucket), + }, + "object": { + "key": key, + "size": size, + "eTag": etag, + "versionId": "null", + "sequencer": format!("{:016X}", Utc::now().timestamp_millis()), + } + }), + }; + let payload = json!({ "Records": [record] }); + let client = reqwest::Client::new(); + + for config in configs { + if !config.matches_event(&event_name, &key) { + continue; + } + let result = client + .post(&config.destination.url) + .header("content-type", "application/json") + .json(&payload) + .send() + .await; + if let Err(err) = result { + tracing::warn!( + "Failed to deliver notification for {} to {}: {}", + event_name, + config.destination.url, + err + ); + } + } + }); +} + +fn format_event_time(value: DateTime) -> String { + value.format("%Y-%m-%dT%H:%M:%S.000Z").to_string() +} + +fn child_text(node: &roxmltree::Node<'_, '_>, name: &str) -> Option { + node.children() + .find(|child| child.is_element() && child.tag_name().name() == name) + .and_then(|child| child.text()) + .map(|text| text.trim().to_string()) + .filter(|text| !text.is_empty()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_webhook_configuration() { + let xml = r#" + + + upload + s3:ObjectCreated:* + https://example.com/hook + + + prefixlogs/ + suffix.txt + + + + "#; + let configs = parse_notification_configurations(xml).unwrap(); + assert_eq!(configs.len(), 1); + assert!(configs[0].matches_event("s3:ObjectCreated:Put", "logs/test.txt")); + assert!(!configs[0].matches_event("s3:ObjectRemoved:Delete", "logs/test.txt")); + } +} diff --git a/crates/myfsio-server/src/services/object_lock.rs b/crates/myfsio-server/src/services/object_lock.rs new file mode 100644 index 0000000..7e93883 --- /dev/null +++ b/crates/myfsio-server/src/services/object_lock.rs @@ -0,0 +1,128 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +pub const LEGAL_HOLD_METADATA_KEY: &str = "__legal_hold__"; +pub const RETENTION_METADATA_KEY: &str = "__object_retention__"; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum RetentionMode { + GOVERNANCE, + COMPLIANCE, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ObjectLockRetention { + pub mode: RetentionMode, + pub retain_until_date: DateTime, +} + +impl ObjectLockRetention { + pub fn is_expired(&self) -> bool { + Utc::now() > self.retain_until_date + } +} + +pub fn get_object_retention(metadata: &HashMap) -> Option { + metadata + .get(RETENTION_METADATA_KEY) + .and_then(|raw| serde_json::from_str::(raw).ok()) +} + +pub fn set_object_retention( + metadata: &mut HashMap, + retention: &ObjectLockRetention, +) -> Result<(), String> { + let encoded = serde_json::to_string(retention).map_err(|err| err.to_string())?; + metadata.insert(RETENTION_METADATA_KEY.to_string(), encoded); + Ok(()) +} + +pub fn get_legal_hold(metadata: &HashMap) -> bool { + metadata + .get(LEGAL_HOLD_METADATA_KEY) + .map(|value| value.eq_ignore_ascii_case("ON") || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) +} + +pub fn set_legal_hold(metadata: &mut HashMap, enabled: bool) { + metadata.insert( + LEGAL_HOLD_METADATA_KEY.to_string(), + if enabled { "ON" } else { "OFF" }.to_string(), + ); +} + +pub fn ensure_retention_mutable( + metadata: &HashMap, + bypass_governance: bool, +) -> Result<(), String> { + let Some(existing) = get_object_retention(metadata) else { + return Ok(()); + }; + if existing.is_expired() { + return Ok(()); + } + match existing.mode { + RetentionMode::COMPLIANCE => Err(format!( + "Cannot modify retention on object with COMPLIANCE mode until retention expires" + )), + RetentionMode::GOVERNANCE if !bypass_governance => Err( + "Cannot modify GOVERNANCE retention without bypass-governance permission".to_string(), + ), + RetentionMode::GOVERNANCE => Ok(()), + } +} + +pub fn can_delete_object( + metadata: &HashMap, + bypass_governance: bool, +) -> Result<(), String> { + if get_legal_hold(metadata) { + return Err("Object is under legal hold".to_string()); + } + if let Some(retention) = get_object_retention(metadata) { + if !retention.is_expired() { + return match retention.mode { + RetentionMode::COMPLIANCE => Err(format!( + "Object is locked in COMPLIANCE mode until {}", + retention.retain_until_date.to_rfc3339() + )), + RetentionMode::GOVERNANCE if !bypass_governance => Err(format!( + "Object is locked in GOVERNANCE mode until {}", + retention.retain_until_date.to_rfc3339() + )), + RetentionMode::GOVERNANCE => Ok(()), + }; + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Duration; + + #[test] + fn legal_hold_blocks_delete() { + let mut metadata = HashMap::new(); + set_legal_hold(&mut metadata, true); + let err = can_delete_object(&metadata, false).unwrap_err(); + assert!(err.contains("legal hold")); + } + + #[test] + fn governance_requires_bypass() { + let mut metadata = HashMap::new(); + set_object_retention( + &mut metadata, + &ObjectLockRetention { + mode: RetentionMode::GOVERNANCE, + retain_until_date: Utc::now() + Duration::hours(1), + }, + ) + .unwrap(); + assert!(can_delete_object(&metadata, false).is_err()); + assert!(can_delete_object(&metadata, true).is_ok()); + } +} diff --git a/crates/myfsio-server/src/services/peer_fetch.rs b/crates/myfsio-server/src/services/peer_fetch.rs new file mode 100644 index 0000000..687360e --- /dev/null +++ b/crates/myfsio-server/src/services/peer_fetch.rs @@ -0,0 +1,256 @@ +use std::collections::HashMap; +use std::path::Path; +use std::pin::Pin; +use std::sync::Arc; + +use aws_sdk_s3::Client; +use md5::{Digest, Md5}; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt}; + +use myfsio_storage::fs_backend::{is_multipart_etag, FsStorageBackend}; +use myfsio_storage::traits::StorageEngine; + +use crate::services::replication::ReplicationManager; +use crate::services::s3_client::{build_client, ClientOptions}; +use crate::stores::connections::ConnectionStore; + +pub struct PeerFetcher { + storage: Arc, + connections: Arc, + replication: Arc, + client_options: ClientOptions, +} + +#[derive(Debug)] +pub enum HealOutcome { + Healed { peer_etag: String, bytes: u64 }, + PeerMismatch { stored: String, peer: String }, + PeerUnavailable { error: String }, + NotConfigured, + VerifyFailed { expected: String, actual: String }, +} + +impl PeerFetcher { + pub fn new( + storage: Arc, + connections: Arc, + replication: Arc, + client_options: ClientOptions, + ) -> Self { + Self { + storage, + connections, + replication, + client_options, + } + } + + fn build_client_for_bucket(&self, bucket: &str) -> Option<(Client, String)> { + let rule = self.replication.get_rule(bucket)?; + if !rule.enabled { + return None; + } + let conn = self.connections.get(&rule.target_connection_id)?; + let client = build_client(&conn, &self.client_options); + Some((client, rule.target_bucket)) + } + + pub async fn fetch_into_storage( + &self, + client: &Client, + remote_bucket: &str, + local_bucket: &str, + key: &str, + ) -> bool { + let resp = match client + .get_object() + .bucket(remote_bucket) + .key(key) + .send() + .await + { + Ok(r) => r, + Err(err) => { + tracing::error!("Pull GetObject failed {}/{}: {:?}", local_bucket, key, err); + return false; + } + }; + + let head = match client + .head_object() + .bucket(remote_bucket) + .key(key) + .send() + .await + { + Ok(r) => r, + Err(err) => { + tracing::error!("Pull HeadObject failed {}/{}: {:?}", local_bucket, key, err); + return false; + } + }; + + let metadata: Option> = head + .metadata() + .map(|m| m.iter().map(|(k, v)| (k.clone(), v.clone())).collect()); + + let stream = resp.body.into_async_read(); + let boxed: Pin> = Box::pin(stream); + + match self + .storage + .put_object(local_bucket, key, boxed, metadata) + .await + { + Ok(_) => { + tracing::debug!("Pulled object {}/{} from remote", local_bucket, key); + true + } + Err(err) => { + tracing::error!( + "Store pulled object failed {}/{}: {}", + local_bucket, + key, + err + ); + false + } + } + } + + pub async fn fetch_for_heal( + &self, + local_bucket: &str, + key: &str, + expected_etag: &str, + dest_path: &Path, + ) -> HealOutcome { + let (client, target_bucket) = match self.build_client_for_bucket(local_bucket) { + Some(v) => v, + None => return HealOutcome::NotConfigured, + }; + + let head = match client + .head_object() + .bucket(&target_bucket) + .key(key) + .send() + .await + { + Ok(r) => r, + Err(err) => { + return HealOutcome::PeerUnavailable { + error: format!("HeadObject: {:?}", err), + }; + } + }; + + let peer_etag = head.e_tag().unwrap_or("").trim_matches('"').to_string(); + if peer_etag.is_empty() { + return HealOutcome::PeerUnavailable { + error: "remote returned empty ETag".into(), + }; + } + if peer_etag != expected_etag { + return HealOutcome::PeerMismatch { + stored: expected_etag.to_string(), + peer: peer_etag, + }; + } + + let resp = match client + .get_object() + .bucket(&target_bucket) + .key(key) + .send() + .await + { + Ok(r) => r, + Err(err) => { + return HealOutcome::PeerUnavailable { + error: format!("GetObject: {:?}", err), + }; + } + }; + + if let Some(parent) = dest_path.parent() { + if let Err(e) = tokio::fs::create_dir_all(parent).await { + return HealOutcome::PeerUnavailable { + error: format!("mkdir parent: {}", e), + }; + } + } + + let mut file = match tokio::fs::File::create(dest_path).await { + Ok(f) => f, + Err(e) => { + return HealOutcome::PeerUnavailable { + error: format!("create temp: {}", e), + }; + } + }; + let mut reader = resp.body.into_async_read(); + let mut hasher = Md5::new(); + let mut buf = vec![0u8; 64 * 1024]; + let mut total: u64 = 0; + loop { + let n = match reader.read(&mut buf).await { + Ok(n) => n, + Err(e) => { + drop(file); + let _ = tokio::fs::remove_file(dest_path).await; + return HealOutcome::PeerUnavailable { + error: format!("read body: {}", e), + }; + } + }; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + if let Err(e) = file.write_all(&buf[..n]).await { + drop(file); + let _ = tokio::fs::remove_file(dest_path).await; + return HealOutcome::PeerUnavailable { + error: format!("write temp: {}", e), + }; + } + total += n as u64; + } + if let Err(e) = file.flush().await { + return HealOutcome::PeerUnavailable { + error: format!("flush temp: {}", e), + }; + } + drop(file); + + let actual = format!("{:x}", hasher.finalize()); + if !is_multipart_etag(expected_etag) && actual != expected_etag { + let _ = tokio::fs::remove_file(dest_path).await; + return HealOutcome::VerifyFailed { + expected: expected_etag.to_string(), + actual, + }; + } + + HealOutcome::Healed { + peer_etag, + bytes: total, + } + } +} + +#[cfg(test)] +mod tests { + use myfsio_storage::fs_backend::is_multipart_etag; + + #[test] + fn detects_multipart_etags() { + assert!(is_multipart_etag("d41d8cd98f00b204e9800998ecf8427e-3")); + assert!(is_multipart_etag("00000000000000000000000000000000-1")); + assert!(!is_multipart_etag("d41d8cd98f00b204e9800998ecf8427e")); + assert!(!is_multipart_etag("d41d8cd98f00b204e9800998ecf8427e-")); + assert!(!is_multipart_etag("not-hex-at-all-1")); + assert!(!is_multipart_etag("d41d8cd98f00b204e9800998ecf8427e-abc")); + } +} diff --git a/crates/myfsio-server/src/services/replication.rs b/crates/myfsio-server/src/services/replication.rs new file mode 100644 index 0000000..7efa047 --- /dev/null +++ b/crates/myfsio-server/src/services/replication.rs @@ -0,0 +1,724 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use aws_sdk_s3::primitives::ByteStream; +use parking_lot::Mutex; +use serde::{Deserialize, Serialize}; +use tokio::sync::Semaphore; + +use myfsio_common::types::ListParams; +use myfsio_storage::fs_backend::{metadata_is_corrupted, FsStorageBackend}; +use myfsio_storage::traits::StorageEngine; + +use crate::services::s3_client::{build_client, check_endpoint_health, ClientOptions}; +use crate::stores::connections::{ConnectionStore, RemoteConnection}; + +pub const MODE_NEW_ONLY: &str = "new_only"; +pub const MODE_ALL: &str = "all"; +pub const MODE_BIDIRECTIONAL: &str = "bidirectional"; + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ReplicationStats { + #[serde(default)] + pub objects_synced: u64, + #[serde(default)] + pub objects_pending: u64, + #[serde(default)] + pub objects_orphaned: u64, + #[serde(default)] + pub bytes_synced: u64, + #[serde(default)] + pub last_sync_at: Option, + #[serde(default)] + pub last_sync_key: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReplicationRule { + pub bucket_name: String, + pub target_connection_id: String, + pub target_bucket: String, + #[serde(default = "default_true")] + pub enabled: bool, + #[serde(default = "default_mode")] + pub mode: String, + #[serde(default)] + pub created_at: Option, + #[serde(default)] + pub stats: ReplicationStats, + #[serde(default = "default_true")] + pub sync_deletions: bool, + #[serde(default)] + pub last_pull_at: Option, + #[serde(default)] + pub filter_prefix: Option, +} + +fn default_true() -> bool { + true +} +fn default_mode() -> String { + MODE_NEW_ONLY.to_string() +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReplicationFailure { + pub object_key: String, + pub error_message: String, + pub timestamp: f64, + pub failure_count: u32, + pub bucket_name: String, + pub action: String, + #[serde(default)] + pub last_error_code: Option, +} + +pub struct ReplicationFailureStore { + storage_root: PathBuf, + max_failures_per_bucket: usize, + cache: Mutex>>, +} + +impl ReplicationFailureStore { + pub fn new(storage_root: PathBuf, max_failures_per_bucket: usize) -> Self { + Self { + storage_root, + max_failures_per_bucket, + cache: Mutex::new(HashMap::new()), + } + } + + fn path(&self, bucket: &str) -> PathBuf { + self.storage_root + .join(".myfsio.sys") + .join("buckets") + .join(bucket) + .join("replication_failures.json") + } + + fn load_from_disk(&self, bucket: &str) -> Vec { + let path = self.path(bucket); + if !path.exists() { + return Vec::new(); + } + match std::fs::read_to_string(&path) { + Ok(text) => { + let parsed: serde_json::Value = match serde_json::from_str(&text) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + parsed + .get("failures") + .and_then(|v| serde_json::from_value(v.clone()).ok()) + .unwrap_or_default() + } + Err(_) => Vec::new(), + } + } + + fn save_to_disk(&self, bucket: &str, failures: &[ReplicationFailure]) { + let path = self.path(bucket); + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let trimmed = &failures[..failures.len().min(self.max_failures_per_bucket)]; + let data = serde_json::json!({ "failures": trimmed }); + let _ = std::fs::write( + &path, + serde_json::to_string_pretty(&data).unwrap_or_default(), + ); + } + + pub fn load(&self, bucket: &str) -> Vec { + let mut cache = self.cache.lock(); + if let Some(existing) = cache.get(bucket) { + return existing.clone(); + } + let loaded = self.load_from_disk(bucket); + cache.insert(bucket.to_string(), loaded.clone()); + loaded + } + + pub fn save(&self, bucket: &str, failures: Vec) { + let trimmed: Vec = failures + .into_iter() + .take(self.max_failures_per_bucket) + .collect(); + self.save_to_disk(bucket, &trimmed); + self.cache.lock().insert(bucket.to_string(), trimmed); + } + + pub fn add(&self, bucket: &str, failure: ReplicationFailure) { + let mut failures = self.load(bucket); + if let Some(existing) = failures + .iter_mut() + .find(|f| f.object_key == failure.object_key) + { + existing.failure_count += 1; + existing.timestamp = failure.timestamp; + existing.error_message = failure.error_message.clone(); + existing.last_error_code = failure.last_error_code.clone(); + } else { + failures.insert(0, failure); + } + self.save(bucket, failures); + } + + pub fn remove(&self, bucket: &str, object_key: &str) -> bool { + let failures = self.load(bucket); + let before = failures.len(); + let after: Vec<_> = failures + .into_iter() + .filter(|f| f.object_key != object_key) + .collect(); + if after.len() != before { + self.save(bucket, after); + true + } else { + false + } + } + + pub fn clear(&self, bucket: &str) { + self.cache.lock().remove(bucket); + let path = self.path(bucket); + let _ = std::fs::remove_file(path); + } + + pub fn get(&self, bucket: &str, object_key: &str) -> Option { + self.load(bucket) + .into_iter() + .find(|f| f.object_key == object_key) + } + + pub fn count(&self, bucket: &str) -> usize { + self.load(bucket).len() + } +} + +pub struct ReplicationManager { + storage: Arc, + connections: Arc, + rules_path: PathBuf, + rules: Mutex>, + client_options: ClientOptions, + streaming_threshold_bytes: u64, + pub failures: Arc, + semaphore: Arc, +} + +impl ReplicationManager { + pub fn new( + storage: Arc, + connections: Arc, + storage_root: &Path, + connect_timeout: Duration, + read_timeout: Duration, + max_retries: u32, + streaming_threshold_bytes: u64, + max_failures_per_bucket: usize, + ) -> Self { + let rules_path = storage_root + .join(".myfsio.sys") + .join("config") + .join("replication_rules.json"); + let rules = load_rules(&rules_path); + let failures = Arc::new(ReplicationFailureStore::new( + storage_root.to_path_buf(), + max_failures_per_bucket, + )); + let client_options = ClientOptions { + connect_timeout, + read_timeout, + max_attempts: max_retries, + }; + Self { + storage, + connections, + rules_path, + rules: Mutex::new(rules), + client_options, + streaming_threshold_bytes, + failures, + semaphore: Arc::new(Semaphore::new(4)), + } + } + + pub fn reload_rules(&self) { + *self.rules.lock() = load_rules(&self.rules_path); + } + + pub fn list_rules(&self) -> Vec { + self.rules.lock().values().cloned().collect() + } + + pub fn get_rule(&self, bucket: &str) -> Option { + self.rules.lock().get(bucket).cloned() + } + + pub fn set_rule(&self, rule: ReplicationRule) { + { + let mut guard = self.rules.lock(); + guard.insert(rule.bucket_name.clone(), rule); + } + self.save_rules(); + } + + pub fn delete_rule(&self, bucket: &str) { + { + let mut guard = self.rules.lock(); + guard.remove(bucket); + } + self.save_rules(); + } + + pub fn save_rules(&self) { + let snapshot: HashMap = self.rules.lock().clone(); + if let Some(parent) = self.rules_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(text) = serde_json::to_string_pretty(&snapshot) { + let _ = std::fs::write(&self.rules_path, text); + } + } + + fn update_last_sync(&self, bucket: &str, key: &str) { + { + let mut guard = self.rules.lock(); + if let Some(rule) = guard.get_mut(bucket) { + rule.stats.last_sync_at = Some(now_secs()); + rule.stats.last_sync_key = Some(key.to_string()); + } + } + self.save_rules(); + } + + pub async fn trigger(self: Arc, bucket: String, key: String, action: String) { + let rule = match self.get_rule(&bucket) { + Some(r) if r.enabled => r, + _ => return, + }; + let connection = match self.connections.get(&rule.target_connection_id) { + Some(c) => c, + None => { + tracing::warn!( + "Replication skipped for {}/{}: connection {} not found", + bucket, + key, + rule.target_connection_id + ); + return; + } + }; + let permit = match self.semaphore.clone().try_acquire_owned() { + Ok(p) => p, + Err(_) => { + let sem = self.semaphore.clone(); + match sem.acquire_owned().await { + Ok(p) => p, + Err(_) => return, + } + } + }; + let manager = self.clone(); + tokio::spawn(async move { + let _permit = permit; + manager + .replicate_task(&bucket, &key, &rule, &connection, &action) + .await; + }); + } + + pub async fn replicate_existing_objects(self: Arc, bucket: String) -> usize { + let rule = match self.get_rule(&bucket) { + Some(r) if r.enabled => r, + _ => return 0, + }; + let connection = match self.connections.get(&rule.target_connection_id) { + Some(c) => c, + None => { + tracing::warn!( + "Cannot replicate existing objects for {}: connection {} not found", + bucket, + rule.target_connection_id + ); + return 0; + } + }; + if !self.check_endpoint(&connection).await { + tracing::warn!( + "Cannot replicate existing objects for {}: endpoint {} is unreachable", + bucket, + connection.endpoint_url + ); + return 0; + } + + let mut continuation_token: Option = None; + let mut submitted = 0usize; + + loop { + let page = match self + .storage + .list_objects( + &bucket, + &ListParams { + max_keys: 1000, + continuation_token: continuation_token.clone(), + prefix: rule.filter_prefix.clone(), + start_after: None, + }, + ) + .await + { + Ok(page) => page, + Err(err) => { + tracing::error!( + "Failed to list existing objects for replication in {}: {}", + bucket, + err + ); + break; + } + }; + + let next_token = page.next_continuation_token.clone(); + let is_truncated = page.is_truncated; + + for object in page.objects { + submitted += 1; + self.clone() + .trigger(bucket.clone(), object.key, "write".to_string()) + .await; + } + + if !is_truncated { + break; + } + + continuation_token = next_token; + if continuation_token.is_none() { + break; + } + } + + submitted + } + + pub fn schedule_existing_objects_sync(self: Arc, bucket: String) { + tokio::spawn(async move { + let submitted = self + .clone() + .replicate_existing_objects(bucket.clone()) + .await; + if submitted > 0 { + tracing::info!( + "Scheduled {} existing object(s) for replication in {}", + submitted, + bucket + ); + } + }); + } + + async fn replicate_task( + &self, + bucket: &str, + object_key: &str, + rule: &ReplicationRule, + conn: &RemoteConnection, + action: &str, + ) { + if object_key.contains("..") || object_key.starts_with('/') || object_key.starts_with('\\') + { + tracing::error!("Invalid object key (path traversal): {}", object_key); + return; + } + + let client = build_client(conn, &self.client_options); + + if action == "delete" { + match client + .delete_object() + .bucket(&rule.target_bucket) + .key(object_key) + .send() + .await + { + Ok(_) => { + tracing::info!( + "Replicated DELETE {}/{} to {} ({})", + bucket, + object_key, + conn.name, + rule.target_bucket + ); + self.update_last_sync(bucket, object_key); + self.failures.remove(bucket, object_key); + } + Err(err) => { + let msg = format!("{:?}", err); + tracing::error!( + "Replication DELETE failed {}/{}: {}", + bucket, + object_key, + msg + ); + self.failures.add( + bucket, + ReplicationFailure { + object_key: object_key.to_string(), + error_message: msg, + timestamp: now_secs(), + failure_count: 1, + bucket_name: bucket.to_string(), + action: "delete".to_string(), + last_error_code: None, + }, + ); + } + } + return; + } + + if let Ok(src_meta) = self.storage.get_object_metadata(bucket, object_key).await { + if metadata_is_corrupted(&src_meta) { + tracing::warn!( + "Replication skipped for {}/{}: source object is poisoned (corrupted)", + bucket, + object_key + ); + return; + } + } + + let src_path = match self.storage.get_object_path(bucket, object_key).await { + Ok(p) => p, + Err(_) => { + tracing::error!("Source object not found: {}/{}", bucket, object_key); + return; + } + }; + let file_size = match tokio::fs::metadata(&src_path).await { + Ok(m) => m.len(), + Err(_) => 0, + }; + let content_type = mime_guess::from_path(&src_path) + .first_raw() + .map(|s| s.to_string()); + + let upload_result = upload_object( + &client, + &rule.target_bucket, + object_key, + &src_path, + file_size, + self.streaming_threshold_bytes, + content_type.as_deref(), + ) + .await; + + let final_result = match upload_result { + Err(err) if is_no_such_bucket(&err) => { + tracing::info!( + "Target bucket {} not found, creating it", + rule.target_bucket + ); + match client + .create_bucket() + .bucket(&rule.target_bucket) + .send() + .await + { + Ok(_) | Err(_) => { + upload_object( + &client, + &rule.target_bucket, + object_key, + &src_path, + file_size, + self.streaming_threshold_bytes, + content_type.as_deref(), + ) + .await + } + } + } + other => other, + }; + + match final_result { + Ok(()) => { + tracing::info!( + "Replicated {}/{} to {} ({})", + bucket, + object_key, + conn.name, + rule.target_bucket + ); + self.update_last_sync(bucket, object_key); + self.failures.remove(bucket, object_key); + } + Err(err) => { + let msg = err.to_string(); + tracing::error!("Replication failed {}/{}: {}", bucket, object_key, msg); + self.failures.add( + bucket, + ReplicationFailure { + object_key: object_key.to_string(), + error_message: msg, + timestamp: now_secs(), + failure_count: 1, + bucket_name: bucket.to_string(), + action: action.to_string(), + last_error_code: None, + }, + ); + } + } + } + + pub async fn check_endpoint(&self, conn: &RemoteConnection) -> bool { + let client = build_client(conn, &self.client_options); + check_endpoint_health(&client).await + } + + pub async fn retry_failed(&self, bucket: &str, object_key: &str) -> bool { + let failure = match self.failures.get(bucket, object_key) { + Some(f) => f, + None => return false, + }; + let rule = match self.get_rule(bucket) { + Some(r) if r.enabled => r, + _ => return false, + }; + let conn = match self.connections.get(&rule.target_connection_id) { + Some(c) => c, + None => return false, + }; + self.replicate_task(bucket, object_key, &rule, &conn, &failure.action) + .await; + true + } + + pub async fn retry_all(&self, bucket: &str) -> (usize, usize) { + let failures = self.failures.load(bucket); + if failures.is_empty() { + return (0, 0); + } + let rule = match self.get_rule(bucket) { + Some(r) if r.enabled => r, + _ => return (0, failures.len()), + }; + let conn = match self.connections.get(&rule.target_connection_id) { + Some(c) => c, + None => return (0, failures.len()), + }; + let mut submitted = 0; + for failure in failures { + self.replicate_task(bucket, &failure.object_key, &rule, &conn, &failure.action) + .await; + submitted += 1; + } + (submitted, 0) + } + + pub fn get_failure_count(&self, bucket: &str) -> usize { + self.failures.count(bucket) + } + + pub fn get_failed_items( + &self, + bucket: &str, + limit: usize, + offset: usize, + ) -> Vec { + self.failures + .load(bucket) + .into_iter() + .skip(offset) + .take(limit) + .collect() + } + + pub fn dismiss_failure(&self, bucket: &str, key: &str) -> bool { + self.failures.remove(bucket, key) + } + + pub fn clear_failures(&self, bucket: &str) { + self.failures.clear(bucket); + } + + pub fn rules_snapshot(&self) -> HashMap { + self.rules.lock().clone() + } + + pub fn update_last_pull(&self, bucket: &str, at: f64) { + { + let mut guard = self.rules.lock(); + if let Some(rule) = guard.get_mut(bucket) { + rule.last_pull_at = Some(at); + } + } + self.save_rules(); + } + + pub fn client_options(&self) -> &ClientOptions { + &self.client_options + } +} + +fn is_no_such_bucket(err: &E) -> bool { + let text = format!("{:?}", err); + text.contains("NoSuchBucket") +} + +async fn upload_object( + client: &aws_sdk_s3::Client, + bucket: &str, + key: &str, + path: &Path, + file_size: u64, + streaming_threshold: u64, + content_type: Option<&str>, +) -> Result<(), aws_sdk_s3::error::SdkError> { + let mut req = client.put_object().bucket(bucket).key(key); + if let Some(ct) = content_type { + req = req.content_type(ct); + } + + let body = if file_size >= streaming_threshold { + ByteStream::from_path(path).await.map_err(|e| { + aws_sdk_s3::error::SdkError::construction_failure(Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + e, + ))) + })? + } else { + let bytes = tokio::fs::read(path) + .await + .map_err(|e| aws_sdk_s3::error::SdkError::construction_failure(Box::new(e)))?; + ByteStream::from(bytes) + }; + + req.body(body).send().await.map(|_| ()) +} + +fn load_rules(path: &Path) -> HashMap { + if !path.exists() { + return HashMap::new(); + } + match std::fs::read_to_string(path) { + Ok(text) => serde_json::from_str(&text).unwrap_or_default(), + Err(_) => HashMap::new(), + } +} + +fn now_secs() -> f64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs_f64()) + .unwrap_or(0.0) +} diff --git a/crates/myfsio-server/src/services/s3_client.rs b/crates/myfsio-server/src/services/s3_client.rs new file mode 100644 index 0000000..7f085eb --- /dev/null +++ b/crates/myfsio-server/src/services/s3_client.rs @@ -0,0 +1,64 @@ +use std::time::Duration; + +use aws_config::BehaviorVersion; +use aws_credential_types::Credentials; +use aws_sdk_s3::config::{Region, SharedCredentialsProvider}; +use aws_sdk_s3::Client; + +use crate::stores::connections::RemoteConnection; + +pub struct ClientOptions { + pub connect_timeout: Duration, + pub read_timeout: Duration, + pub max_attempts: u32, +} + +impl Default for ClientOptions { + fn default() -> Self { + Self { + connect_timeout: Duration::from_secs(5), + read_timeout: Duration::from_secs(30), + max_attempts: 2, + } + } +} + +pub fn build_client(connection: &RemoteConnection, options: &ClientOptions) -> Client { + let credentials = Credentials::new( + connection.access_key.clone(), + connection.secret_key.clone(), + None, + None, + "myfsio-replication", + ); + + let timeout_config = aws_smithy_types::timeout::TimeoutConfig::builder() + .connect_timeout(options.connect_timeout) + .read_timeout(options.read_timeout) + .build(); + + let retry_config = + aws_smithy_types::retry::RetryConfig::standard().with_max_attempts(options.max_attempts); + + let config = aws_sdk_s3::config::Builder::new() + .behavior_version(BehaviorVersion::latest()) + .credentials_provider(SharedCredentialsProvider::new(credentials)) + .region(Region::new(connection.region.clone())) + .endpoint_url(connection.endpoint_url.clone()) + .force_path_style(true) + .timeout_config(timeout_config) + .retry_config(retry_config) + .build(); + + Client::from_conf(config) +} + +pub async fn check_endpoint_health(client: &Client) -> bool { + match client.list_buckets().send().await { + Ok(_) => true, + Err(err) => { + tracing::warn!("Endpoint health check failed: {:?}", err); + false + } + } +} diff --git a/crates/myfsio-server/src/services/site_registry.rs b/crates/myfsio-server/src/services/site_registry.rs new file mode 100644 index 0000000..00ab5c5 --- /dev/null +++ b/crates/myfsio-server/src/services/site_registry.rs @@ -0,0 +1,148 @@ +use chrono::Utc; +use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SiteInfo { + pub site_id: String, + pub endpoint: String, + #[serde(default = "default_region")] + pub region: String, + #[serde(default = "default_priority")] + pub priority: i32, + #[serde(default)] + pub display_name: String, + #[serde(default)] + pub created_at: Option, +} + +fn default_region() -> String { + "us-east-1".to_string() +} +fn default_priority() -> i32 { + 100 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PeerSite { + pub site_id: String, + pub endpoint: String, + #[serde(default = "default_region")] + pub region: String, + #[serde(default = "default_priority")] + pub priority: i32, + #[serde(default)] + pub display_name: String, + #[serde(default)] + pub connection_id: Option, + #[serde(default)] + pub created_at: Option, + #[serde(default)] + pub is_healthy: bool, + #[serde(default)] + pub last_health_check: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +struct RegistryData { + #[serde(default)] + local: Option, + #[serde(default)] + peers: Vec, +} + +pub struct SiteRegistry { + path: PathBuf, + data: Arc>, +} + +impl SiteRegistry { + pub fn new(storage_root: &std::path::Path) -> Self { + let path = storage_root + .join(".myfsio.sys") + .join("config") + .join("site_registry.json"); + let data = if path.exists() { + std::fs::read_to_string(&path) + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default() + } else { + RegistryData::default() + }; + Self { + path, + data: Arc::new(RwLock::new(data)), + } + } + + fn save(&self) { + let data = self.data.read(); + if let Some(parent) = self.path.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(json) = serde_json::to_string_pretty(&*data) { + let _ = std::fs::write(&self.path, json); + } + } + + pub fn get_local_site(&self) -> Option { + self.data.read().local.clone() + } + + pub fn set_local_site(&self, site: SiteInfo) { + self.data.write().local = Some(site); + self.save(); + } + + pub fn list_peers(&self) -> Vec { + self.data.read().peers.clone() + } + + pub fn get_peer(&self, site_id: &str) -> Option { + self.data + .read() + .peers + .iter() + .find(|p| p.site_id == site_id) + .cloned() + } + + pub fn add_peer(&self, peer: PeerSite) { + self.data.write().peers.push(peer); + self.save(); + } + + pub fn update_peer(&self, peer: PeerSite) { + let mut data = self.data.write(); + if let Some(existing) = data.peers.iter_mut().find(|p| p.site_id == peer.site_id) { + *existing = peer; + } + drop(data); + self.save(); + } + + pub fn delete_peer(&self, site_id: &str) -> bool { + let mut data = self.data.write(); + let len_before = data.peers.len(); + data.peers.retain(|p| p.site_id != site_id); + let removed = data.peers.len() < len_before; + drop(data); + if removed { + self.save(); + } + removed + } + + pub fn update_health(&self, site_id: &str, is_healthy: bool) { + let mut data = self.data.write(); + if let Some(peer) = data.peers.iter_mut().find(|p| p.site_id == site_id) { + peer.is_healthy = is_healthy; + peer.last_health_check = Some(Utc::now().to_rfc3339()); + } + drop(data); + self.save(); + } +} diff --git a/crates/myfsio-server/src/services/site_sync.rs b/crates/myfsio-server/src/services/site_sync.rs new file mode 100644 index 0000000..c70595a --- /dev/null +++ b/crates/myfsio-server/src/services/site_sync.rs @@ -0,0 +1,463 @@ +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use aws_sdk_s3::Client; +use parking_lot::Mutex; +use serde::{Deserialize, Serialize}; +use tokio::sync::Notify; + +use myfsio_common::types::{ListParams, ObjectMeta}; +use myfsio_storage::fs_backend::FsStorageBackend; +use myfsio_storage::traits::StorageEngine; + +use crate::services::peer_fetch::PeerFetcher; +use crate::services::replication::{ReplicationManager, ReplicationRule, MODE_BIDIRECTIONAL}; +use crate::services::s3_client::{build_client, ClientOptions}; +use crate::stores::connections::ConnectionStore; + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct SyncedObjectInfo { + pub last_synced_at: f64, + pub remote_etag: String, + pub source: String, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct SyncState { + #[serde(default)] + pub synced_objects: HashMap, + #[serde(default)] + pub last_full_sync: Option, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct SiteSyncStats { + pub last_sync_at: Option, + pub objects_pulled: u64, + pub objects_skipped: u64, + pub conflicts_resolved: u64, + pub deletions_applied: u64, + pub errors: u64, +} + +#[derive(Debug, Clone)] +struct RemoteObjectMeta { + last_modified: f64, + etag: String, +} + +pub struct SiteSyncWorker { + storage: Arc, + connections: Arc, + replication: Arc, + peer_fetcher: Arc, + storage_root: PathBuf, + interval: Duration, + batch_size: usize, + clock_skew_tolerance: f64, + client_options: ClientOptions, + bucket_stats: Mutex>, + shutdown: Arc, +} + +impl SiteSyncWorker { + pub fn new( + storage: Arc, + connections: Arc, + replication: Arc, + storage_root: PathBuf, + interval_seconds: u64, + batch_size: usize, + connect_timeout: Duration, + read_timeout: Duration, + max_retries: u32, + clock_skew_tolerance: f64, + ) -> Self { + let client_options = ClientOptions { + connect_timeout, + read_timeout, + max_attempts: max_retries, + }; + let peer_fetcher = Arc::new(PeerFetcher::new( + storage.clone(), + connections.clone(), + replication.clone(), + ClientOptions { + connect_timeout, + read_timeout, + max_attempts: max_retries, + }, + )); + Self { + storage, + connections, + replication, + peer_fetcher, + storage_root, + interval: Duration::from_secs(interval_seconds), + batch_size, + clock_skew_tolerance, + client_options, + bucket_stats: Mutex::new(HashMap::new()), + shutdown: Arc::new(Notify::new()), + } + } + + pub fn peer_fetcher(&self) -> Arc { + self.peer_fetcher.clone() + } + + pub fn shutdown(&self) { + self.shutdown.notify_waiters(); + } + + pub fn get_stats(&self, bucket: &str) -> Option { + self.bucket_stats.lock().get(bucket).cloned() + } + + pub async fn run(self: Arc) { + tracing::info!( + "Site sync worker started (interval={}s)", + self.interval.as_secs() + ); + loop { + tokio::select! { + _ = tokio::time::sleep(self.interval) => {} + _ = self.shutdown.notified() => { + tracing::info!("Site sync worker shutting down"); + return; + } + } + self.run_cycle().await; + } + } + + async fn run_cycle(&self) { + let rules = self.replication.rules_snapshot(); + for (bucket, rule) in rules { + if rule.mode != MODE_BIDIRECTIONAL || !rule.enabled { + continue; + } + match self.sync_bucket(&rule).await { + Ok(stats) => { + self.bucket_stats.lock().insert(bucket, stats); + } + Err(e) => { + tracing::error!("Site sync failed for bucket {}: {}", bucket, e); + } + } + } + } + + pub async fn trigger_sync(&self, bucket: &str) -> Option { + let rule = self.replication.get_rule(bucket)?; + if rule.mode != MODE_BIDIRECTIONAL || !rule.enabled { + return None; + } + match self.sync_bucket(&rule).await { + Ok(stats) => { + self.bucket_stats + .lock() + .insert(bucket.to_string(), stats.clone()); + Some(stats) + } + Err(e) => { + tracing::error!("Site sync trigger failed for {}: {}", bucket, e); + None + } + } + } + + async fn sync_bucket(&self, rule: &ReplicationRule) -> Result { + let mut stats = SiteSyncStats::default(); + let connection = self + .connections + .get(&rule.target_connection_id) + .ok_or_else(|| format!("connection {} not found", rule.target_connection_id))?; + + let local_objects = self + .list_local_objects(&rule.bucket_name) + .await + .map_err(|e| format!("list local failed: {}", e))?; + + let client = build_client(&connection, &self.client_options); + let remote_objects = self + .list_remote_objects(&client, &rule.target_bucket) + .await + .map_err(|e| format!("list remote failed: {}", e))?; + + let mut sync_state = self.load_sync_state(&rule.bucket_name); + + let mut to_pull: Vec = Vec::new(); + for (key, remote_meta) in &remote_objects { + if let Some(local_meta) = local_objects.get(key) { + match self.resolve_conflict(local_meta, remote_meta) { + "pull" => { + to_pull.push(key.clone()); + stats.conflicts_resolved += 1; + } + _ => { + stats.objects_skipped += 1; + } + } + } else { + to_pull.push(key.clone()); + } + } + + let mut pulled = 0usize; + for key in &to_pull { + if pulled >= self.batch_size { + break; + } + let remote_meta = match remote_objects.get(key) { + Some(m) => m, + None => continue, + }; + if self + .pull_object(&client, &rule.target_bucket, &rule.bucket_name, key) + .await + { + stats.objects_pulled += 1; + pulled += 1; + sync_state.synced_objects.insert( + key.clone(), + SyncedObjectInfo { + last_synced_at: now_secs(), + remote_etag: remote_meta.etag.clone(), + source: "remote".to_string(), + }, + ); + } else { + stats.errors += 1; + } + } + + if rule.sync_deletions { + let tracked_keys: Vec = sync_state.synced_objects.keys().cloned().collect(); + for key in tracked_keys { + if remote_objects.contains_key(&key) { + continue; + } + let local_meta = match local_objects.get(&key) { + Some(m) => m, + None => continue, + }; + let tracked = match sync_state.synced_objects.get(&key) { + Some(t) => t.clone(), + None => continue, + }; + if tracked.source != "remote" { + continue; + } + let local_ts = local_meta.last_modified.timestamp() as f64; + if local_ts <= tracked.last_synced_at + && self.apply_remote_deletion(&rule.bucket_name, &key).await + { + stats.deletions_applied += 1; + sync_state.synced_objects.remove(&key); + } + } + } + + sync_state.last_full_sync = Some(now_secs()); + self.save_sync_state(&rule.bucket_name, &sync_state); + + self.replication + .update_last_pull(&rule.bucket_name, now_secs()); + + stats.last_sync_at = Some(now_secs()); + tracing::info!( + "Site sync completed for {}: pulled={}, skipped={}, conflicts={}, deletions={}, errors={}", + rule.bucket_name, + stats.objects_pulled, + stats.objects_skipped, + stats.conflicts_resolved, + stats.deletions_applied, + stats.errors, + ); + Ok(stats) + } + + async fn list_local_objects( + &self, + bucket: &str, + ) -> Result, String> { + let mut result = HashMap::new(); + let mut token: Option = None; + loop { + let params = ListParams { + max_keys: 1000, + continuation_token: token.clone(), + prefix: None, + start_after: None, + }; + let page = self + .storage + .list_objects(bucket, ¶ms) + .await + .map_err(|e| e.to_string())?; + for obj in page.objects { + result.insert(obj.key.clone(), obj); + } + if !page.is_truncated { + break; + } + token = page.next_continuation_token; + if token.is_none() { + break; + } + } + Ok(result) + } + + async fn list_remote_objects( + &self, + client: &Client, + bucket: &str, + ) -> Result, String> { + let mut result = HashMap::new(); + let mut continuation: Option = None; + loop { + let mut req = client.list_objects_v2().bucket(bucket); + if let Some(ref t) = continuation { + req = req.continuation_token(t); + } + let resp = match req.send().await { + Ok(r) => r, + Err(err) => { + if is_not_found_error(&err) { + return Ok(result); + } + return Err(format!("{:?}", err)); + } + }; + for obj in resp.contents() { + let key = match obj.key() { + Some(k) => k.to_string(), + None => continue, + }; + let last_modified = obj + .last_modified() + .and_then(|t| { + let secs = t.secs(); + let nanos = t.subsec_nanos(); + Some(secs as f64 + nanos as f64 / 1_000_000_000.0) + }) + .unwrap_or(0.0); + let etag = obj.e_tag().unwrap_or("").trim_matches('"').to_string(); + result.insert( + key, + RemoteObjectMeta { + last_modified, + etag, + }, + ); + } + if resp.is_truncated().unwrap_or(false) { + continuation = resp.next_continuation_token().map(|s| s.to_string()); + if continuation.is_none() { + break; + } + } else { + break; + } + } + Ok(result) + } + + fn resolve_conflict(&self, local: &ObjectMeta, remote: &RemoteObjectMeta) -> &'static str { + let local_ts = local.last_modified.timestamp() as f64 + + local.last_modified.timestamp_subsec_nanos() as f64 / 1_000_000_000.0; + let remote_ts = remote.last_modified; + + if (remote_ts - local_ts).abs() < self.clock_skew_tolerance { + let local_etag = local.etag.clone().unwrap_or_default(); + let local_etag_trim = local_etag.trim_matches('"'); + if remote.etag == local_etag_trim { + return "skip"; + } + if remote.etag.as_str() > local_etag_trim { + return "pull"; + } + return "keep"; + } + + if remote_ts > local_ts { + "pull" + } else { + "keep" + } + } + + async fn pull_object( + &self, + client: &Client, + remote_bucket: &str, + local_bucket: &str, + key: &str, + ) -> bool { + self.peer_fetcher + .fetch_into_storage(client, remote_bucket, local_bucket, key) + .await + } + + async fn apply_remote_deletion(&self, bucket: &str, key: &str) -> bool { + match self.storage.delete_object(bucket, key).await { + Ok(_) => { + tracing::debug!("Applied remote deletion for {}/{}", bucket, key); + true + } + Err(err) => { + tracing::error!("Remote deletion failed {}/{}: {}", bucket, key, err); + false + } + } + } + + fn sync_state_path(&self, bucket: &str) -> PathBuf { + self.storage_root + .join(".myfsio.sys") + .join("buckets") + .join(bucket) + .join("site_sync_state.json") + } + + fn load_sync_state(&self, bucket: &str) -> SyncState { + let path = self.sync_state_path(bucket); + if !path.exists() { + return SyncState::default(); + } + match std::fs::read_to_string(&path) { + Ok(text) => serde_json::from_str(&text).unwrap_or_default(), + Err(_) => SyncState::default(), + } + } + + fn save_sync_state(&self, bucket: &str, state: &SyncState) { + let path = self.sync_state_path(bucket); + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(text) = serde_json::to_string_pretty(state) { + let _ = std::fs::write(&path, text); + } + } +} + +fn now_secs() -> f64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs_f64()) + .unwrap_or(0.0) +} + +fn is_not_found_error(err: &aws_sdk_s3::error::SdkError) -> bool { + let msg = format!("{:?}", err); + msg.contains("NoSuchBucket") + || msg.contains("code: Some(\"NotFound\")") + || msg.contains("code: Some(\"NoSuchBucket\")") + || msg.contains("status: 404") +} diff --git a/crates/myfsio-server/src/services/system_metrics.rs b/crates/myfsio-server/src/services/system_metrics.rs new file mode 100644 index 0000000..6c0849d --- /dev/null +++ b/crates/myfsio-server/src/services/system_metrics.rs @@ -0,0 +1,203 @@ +use chrono::{DateTime, Utc}; +use myfsio_storage::fs_backend::FsStorageBackend; +use myfsio_storage::traits::StorageEngine; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use sysinfo::{Disks, System}; +use tokio::sync::RwLock; + +#[derive(Debug, Clone)] +pub struct SystemMetricsConfig { + pub interval_minutes: u64, + pub retention_hours: u64, +} + +impl Default for SystemMetricsConfig { + fn default() -> Self { + Self { + interval_minutes: 5, + retention_hours: 24, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SystemMetricsSnapshot { + pub timestamp: DateTime, + pub cpu_percent: f64, + pub memory_percent: f64, + pub disk_percent: f64, + pub storage_bytes: u64, +} + +pub struct SystemMetricsService { + storage_root: PathBuf, + storage: Arc, + config: SystemMetricsConfig, + history: Arc>>, + history_path: PathBuf, +} + +impl SystemMetricsService { + pub fn new( + storage_root: &Path, + storage: Arc, + config: SystemMetricsConfig, + ) -> Self { + let history_path = storage_root + .join(".myfsio.sys") + .join("config") + .join("metrics_history.json"); + + let mut history = if history_path.exists() { + std::fs::read_to_string(&history_path) + .ok() + .and_then(|s| serde_json::from_str::(&s).ok()) + .and_then(|v| { + v.get("history").and_then(|h| { + serde_json::from_value::>(h.clone()).ok() + }) + }) + .unwrap_or_default() + } else { + Vec::new() + }; + prune_history(&mut history, config.retention_hours); + + Self { + storage_root: storage_root.to_path_buf(), + storage, + config, + history: Arc::new(RwLock::new(history)), + history_path, + } + } + + pub async fn get_history(&self, hours: Option) -> Vec { + let mut history = self.history.read().await.clone(); + prune_history(&mut history, hours.unwrap_or(self.config.retention_hours)); + history + } + + async fn take_snapshot(&self) { + let snapshot = collect_snapshot(&self.storage_root, &self.storage).await; + let mut history = self.history.write().await; + history.push(snapshot); + prune_history(&mut history, self.config.retention_hours); + drop(history); + self.save_history().await; + } + + async fn save_history(&self) { + let history = self.history.read().await; + let data = json!({ "history": *history }); + if let Some(parent) = self.history_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let _ = std::fs::write( + &self.history_path, + serde_json::to_string_pretty(&data).unwrap_or_default(), + ); + } + + pub fn start_background(self: Arc) -> tokio::task::JoinHandle<()> { + let interval = + std::time::Duration::from_secs(self.config.interval_minutes.saturating_mul(60)); + tokio::spawn(async move { + self.take_snapshot().await; + let mut timer = tokio::time::interval(interval); + loop { + timer.tick().await; + self.take_snapshot().await; + } + }) + } +} + +fn prune_history(history: &mut Vec, retention_hours: u64) { + let cutoff = Utc::now() - chrono::Duration::hours(retention_hours as i64); + history.retain(|item| item.timestamp > cutoff); +} + +fn sample_system_now() -> (f64, f64) { + let mut system = System::new(); + system.refresh_cpu_usage(); + std::thread::sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL); + system.refresh_cpu_usage(); + system.refresh_memory(); + + let cpu_percent = system.global_cpu_usage() as f64; + let memory_percent = if system.total_memory() > 0 { + (system.used_memory() as f64 / system.total_memory() as f64) * 100.0 + } else { + 0.0 + }; + (cpu_percent, memory_percent) +} + +fn normalize_path_for_mount(path: &Path) -> String { + let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf()); + let raw = canonical.to_string_lossy().to_string(); + let stripped = raw.strip_prefix(r"\\?\").unwrap_or(&raw); + stripped.to_lowercase() +} + +fn sample_disk(path: &Path) -> (u64, u64) { + let disks = Disks::new_with_refreshed_list(); + let path_str = normalize_path_for_mount(path); + let mut best: Option<(usize, u64, u64)> = None; + + for disk in disks.list() { + let mount_raw = disk.mount_point().to_string_lossy().to_string(); + let mount = mount_raw + .strip_prefix(r"\\?\") + .unwrap_or(&mount_raw) + .to_lowercase(); + let total = disk.total_space(); + let free = disk.available_space(); + if path_str.starts_with(&mount) { + let len = mount.len(); + match best { + Some((best_len, _, _)) if len <= best_len => {} + _ => best = Some((len, total, free)), + } + } + } + + best.map(|(_, total, free)| (total, free)).unwrap_or((0, 0)) +} + +async fn collect_snapshot( + storage_root: &Path, + storage: &Arc, +) -> SystemMetricsSnapshot { + let (cpu_percent, memory_percent) = sample_system_now(); + let (disk_total, disk_free) = sample_disk(storage_root); + let disk_percent = if disk_total > 0 { + ((disk_total - disk_free) as f64 / disk_total as f64) * 100.0 + } else { + 0.0 + }; + + let mut storage_bytes = 0u64; + let buckets = storage.list_buckets().await.unwrap_or_default(); + for bucket in buckets { + if let Ok(stats) = storage.bucket_stats(&bucket.name).await { + storage_bytes += stats.total_bytes(); + } + } + + SystemMetricsSnapshot { + timestamp: Utc::now(), + cpu_percent: round2(cpu_percent), + memory_percent: round2(memory_percent), + disk_percent: round2(disk_percent), + storage_bytes, + } +} + +fn round2(value: f64) -> f64 { + (value * 100.0).round() / 100.0 +} diff --git a/crates/myfsio-server/src/services/website_domains.rs b/crates/myfsio-server/src/services/website_domains.rs new file mode 100644 index 0000000..c36277d --- /dev/null +++ b/crates/myfsio-server/src/services/website_domains.rs @@ -0,0 +1,197 @@ +use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +struct DomainData { + #[serde(default)] + mappings: HashMap, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum DomainDataFile { + Wrapped(DomainData), + Flat(HashMap), +} + +impl DomainDataFile { + fn into_domain_data(self) -> DomainData { + match self { + Self::Wrapped(data) => data, + Self::Flat(mappings) => DomainData { + mappings: mappings + .into_iter() + .map(|(domain, bucket)| (normalize_domain(&domain), bucket)) + .collect(), + }, + } + } +} + +pub struct WebsiteDomainStore { + path: PathBuf, + data: Arc>, +} + +impl WebsiteDomainStore { + pub fn new(storage_root: &std::path::Path) -> Self { + let path = storage_root + .join(".myfsio.sys") + .join("config") + .join("website_domains.json"); + let data = if path.exists() { + std::fs::read_to_string(&path) + .ok() + .and_then(|s| serde_json::from_str::(&s).ok()) + .map(DomainDataFile::into_domain_data) + .unwrap_or_default() + } else { + DomainData::default() + }; + Self { + path, + data: Arc::new(RwLock::new(data)), + } + } + + fn save(&self) { + let data = self.data.read(); + if let Some(parent) = self.path.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(json) = serde_json::to_string_pretty(&data.mappings) { + let _ = std::fs::write(&self.path, json); + } + } + + pub fn list_all(&self) -> Vec { + self.data + .read() + .mappings + .iter() + .map(|(domain, bucket)| { + serde_json::json!({ + "domain": domain, + "bucket": bucket, + }) + }) + .collect() + } + + pub fn get_bucket(&self, domain: &str) -> Option { + let domain = normalize_domain(domain); + self.data.read().mappings.get(&domain).cloned() + } + + pub fn set_mapping(&self, domain: &str, bucket: &str) { + let domain = normalize_domain(domain); + self.data + .write() + .mappings + .insert(domain, bucket.to_string()); + self.save(); + } + + pub fn delete_mapping(&self, domain: &str) -> bool { + let domain = normalize_domain(domain); + let removed = self.data.write().mappings.remove(&domain).is_some(); + if removed { + self.save(); + } + removed + } +} + +pub fn normalize_domain(domain: &str) -> String { + domain.trim().to_ascii_lowercase() +} + +pub fn is_valid_domain(domain: &str) -> bool { + if domain.is_empty() || domain.len() > 253 { + return false; + } + let labels: Vec<&str> = domain.split('.').collect(); + if labels.len() < 2 { + return false; + } + for label in &labels { + if label.is_empty() || label.len() > 63 { + return false; + } + if !label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { + return false; + } + if label.starts_with('-') || label.ends_with('-') { + return false; + } + } + true +} + +#[cfg(test)] +mod tests { + use super::WebsiteDomainStore; + use serde_json::json; + use tempfile::tempdir; + + #[test] + fn loads_legacy_flat_mapping_file() { + let tmp = tempdir().expect("tempdir"); + let config_dir = tmp.path().join(".myfsio.sys").join("config"); + std::fs::create_dir_all(&config_dir).expect("create config dir"); + std::fs::write( + config_dir.join("website_domains.json"), + r#"{"Example.COM":"site-bucket"}"#, + ) + .expect("write config"); + + let store = WebsiteDomainStore::new(tmp.path()); + + assert_eq!( + store.get_bucket("example.com"), + Some("site-bucket".to_string()) + ); + } + + #[test] + fn loads_wrapped_mapping_file() { + let tmp = tempdir().expect("tempdir"); + let config_dir = tmp.path().join(".myfsio.sys").join("config"); + std::fs::create_dir_all(&config_dir).expect("create config dir"); + std::fs::write( + config_dir.join("website_domains.json"), + r#"{"mappings":{"example.com":"site-bucket"}}"#, + ) + .expect("write config"); + + let store = WebsiteDomainStore::new(tmp.path()); + + assert_eq!( + store.get_bucket("example.com"), + Some("site-bucket".to_string()) + ); + } + + #[test] + fn saves_in_shared_plain_mapping_format() { + let tmp = tempdir().expect("tempdir"); + let store = WebsiteDomainStore::new(tmp.path()); + + store.set_mapping("Example.COM", "site-bucket"); + + let saved = std::fs::read_to_string( + tmp.path() + .join(".myfsio.sys") + .join("config") + .join("website_domains.json"), + ) + .expect("read config"); + let json: serde_json::Value = serde_json::from_str(&saved).expect("parse config"); + + assert_eq!(json, json!({"example.com": "site-bucket"})); + } +} diff --git a/crates/myfsio-server/src/session.rs b/crates/myfsio-server/src/session.rs new file mode 100644 index 0000000..dbed866 --- /dev/null +++ b/crates/myfsio-server/src/session.rs @@ -0,0 +1,133 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use parking_lot::RwLock; +use rand::RngCore; +use serde::{Deserialize, Serialize}; + +pub const SESSION_COOKIE_NAME: &str = "myfsio_session"; +pub const CSRF_FIELD_NAME: &str = "csrf_token"; +pub const CSRF_HEADER_NAME: &str = "x-csrf-token"; + +const SESSION_ID_BYTES: usize = 32; +const CSRF_TOKEN_BYTES: usize = 32; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FlashMessage { + pub category: String, + pub message: String, +} + +#[derive(Clone, Debug)] +pub struct SessionData { + pub user_id: Option, + pub display_name: Option, + pub csrf_token: String, + pub flash: Vec, + pub extra: HashMap, + last_accessed: Instant, +} + +impl SessionData { + pub fn new() -> Self { + Self { + user_id: None, + display_name: None, + csrf_token: generate_token(CSRF_TOKEN_BYTES), + flash: Vec::new(), + extra: HashMap::new(), + last_accessed: Instant::now(), + } + } + + pub fn is_authenticated(&self) -> bool { + self.user_id.is_some() + } + + pub fn push_flash(&mut self, category: impl Into, message: impl Into) { + self.flash.push(FlashMessage { + category: category.into(), + message: message.into(), + }); + } + + pub fn take_flash(&mut self) -> Vec { + std::mem::take(&mut self.flash) + } + + pub fn rotate_csrf(&mut self) { + self.csrf_token = generate_token(CSRF_TOKEN_BYTES); + } +} + +impl Default for SessionData { + fn default() -> Self { + Self::new() + } +} + +pub struct SessionStore { + sessions: RwLock>, + ttl: Duration, +} + +impl SessionStore { + pub fn new(ttl: Duration) -> Self { + Self { + sessions: RwLock::new(HashMap::new()), + ttl, + } + } + + pub fn create(&self) -> (String, SessionData) { + let id = generate_token(SESSION_ID_BYTES); + let data = SessionData::new(); + self.sessions.write().insert(id.clone(), data.clone()); + (id, data) + } + + pub fn get(&self, id: &str) -> Option { + let mut guard = self.sessions.write(); + let entry = guard.get_mut(id)?; + if entry.last_accessed.elapsed() > self.ttl { + guard.remove(id); + return None; + } + entry.last_accessed = Instant::now(); + Some(entry.clone()) + } + + pub fn save(&self, id: &str, data: SessionData) { + let mut guard = self.sessions.write(); + let mut updated = data; + updated.last_accessed = Instant::now(); + guard.insert(id.to_string(), updated); + } + + pub fn destroy(&self, id: &str) { + self.sessions.write().remove(id); + } + + pub fn sweep(&self) { + let ttl = self.ttl; + let mut guard = self.sessions.write(); + guard.retain(|_, data| data.last_accessed.elapsed() <= ttl); + } +} + +pub type SharedSessionStore = Arc; + +pub fn generate_token(bytes: usize) -> String { + let mut buf = vec![0u8; bytes]; + rand::thread_rng().fill_bytes(&mut buf); + URL_SAFE_NO_PAD.encode(&buf) +} + +pub fn csrf_tokens_match(a: &str, b: &str) -> bool { + if a.len() != b.len() { + return false; + } + subtle::ConstantTimeEq::ct_eq(a.as_bytes(), b.as_bytes()).into() +} diff --git a/crates/myfsio-server/src/state.rs b/crates/myfsio-server/src/state.rs new file mode 100644 index 0000000..37aa618 --- /dev/null +++ b/crates/myfsio-server/src/state.rs @@ -0,0 +1,267 @@ +use std::sync::Arc; +use std::time::Duration; + +use crate::config::ServerConfig; +use crate::services::access_logging::AccessLoggingService; +use crate::services::gc::GcService; +use crate::services::integrity::IntegrityService; +use crate::services::metrics::MetricsService; +use crate::services::peer_fetch::PeerFetcher; +use crate::services::replication::ReplicationManager; +use crate::services::s3_client::ClientOptions; +use crate::services::site_registry::SiteRegistry; +use crate::services::site_sync::SiteSyncWorker; +use crate::services::system_metrics::SystemMetricsService; +use crate::services::website_domains::WebsiteDomainStore; +use crate::session::SessionStore; +use crate::stores::connections::ConnectionStore; +use crate::templates::TemplateEngine; +use myfsio_auth::iam::IamService; +use myfsio_crypto::encryption::{EncryptionConfig, EncryptionService}; +use myfsio_crypto::kms::KmsService; +use myfsio_storage::fs_backend::{FsStorageBackend, FsStorageBackendConfig}; + +#[derive(Clone)] +pub struct AppState { + pub config: ServerConfig, + pub storage: Arc, + pub iam: Arc, + pub encryption: Option>, + pub kms: Option>, + pub gc: Option>, + pub integrity: Option>, + pub metrics: Option>, + pub system_metrics: Option>, + pub site_registry: Option>, + pub website_domains: Option>, + pub connections: Arc, + pub replication: Arc, + pub site_sync: Option>, + pub templates: Option>, + pub sessions: Arc, + pub access_logging: Arc, +} + +impl AppState { + pub fn new(config: ServerConfig) -> Self { + let storage = Arc::new(FsStorageBackend::new_with_config( + config.storage_root.clone(), + FsStorageBackendConfig { + object_key_max_length_bytes: config.object_key_max_length_bytes, + object_cache_max_size: config.object_cache_max_size, + bucket_config_cache_ttl: Duration::from_secs_f64( + config.bucket_config_cache_ttl_seconds, + ), + stream_chunk_size: config.stream_chunk_size, + }, + )); + let iam = Arc::new(IamService::new_with_secret( + config.iam_config_path.clone(), + config.secret_key.clone(), + )); + + let gc = if config.gc_enabled { + Some(Arc::new(GcService::new( + config.storage_root.clone(), + crate::services::gc::GcConfig { + interval_hours: config.gc_interval_hours, + temp_file_max_age_hours: config.gc_temp_file_max_age_hours, + multipart_max_age_days: config.gc_multipart_max_age_days, + lock_file_max_age_hours: config.gc_lock_file_max_age_hours, + quarantine_max_age_days: config.integrity_quarantine_retention_days, + dry_run: config.gc_dry_run, + }, + ))) + } else { + None + }; + + let metrics = if config.metrics_enabled { + Some(Arc::new(MetricsService::new( + &config.storage_root, + crate::services::metrics::MetricsConfig { + interval_minutes: config.metrics_interval_minutes, + retention_hours: config.metrics_retention_hours, + }, + ))) + } else { + None + }; + + let system_metrics = if config.metrics_history_enabled { + Some(Arc::new(SystemMetricsService::new( + &config.storage_root, + storage.clone(), + crate::services::system_metrics::SystemMetricsConfig { + interval_minutes: config.metrics_history_interval_minutes, + retention_hours: config.metrics_history_retention_hours, + }, + ))) + } else { + None + }; + + let site_registry = { + let registry = SiteRegistry::new(&config.storage_root); + if let (Some(site_id), Some(endpoint)) = + (config.site_id.as_deref(), config.site_endpoint.as_deref()) + { + registry.set_local_site(crate::services::site_registry::SiteInfo { + site_id: site_id.to_string(), + endpoint: endpoint.to_string(), + region: config.site_region.clone(), + priority: config.site_priority, + display_name: site_id.to_string(), + created_at: Some(chrono::Utc::now().to_rfc3339()), + }); + } + Some(Arc::new(registry)) + }; + + let website_domains = if config.website_hosting_enabled { + Some(Arc::new(WebsiteDomainStore::new(&config.storage_root))) + } else { + None + }; + + let connections = Arc::new(ConnectionStore::new(&config.storage_root)); + + let replication = Arc::new(ReplicationManager::new( + storage.clone(), + connections.clone(), + &config.storage_root, + Duration::from_secs(config.replication_connect_timeout_secs), + Duration::from_secs(config.replication_read_timeout_secs), + config.replication_max_retries, + config.replication_streaming_threshold_bytes, + config.replication_max_failures_per_bucket, + )); + + let site_sync = if config.site_sync_enabled { + Some(Arc::new(SiteSyncWorker::new( + storage.clone(), + connections.clone(), + replication.clone(), + config.storage_root.clone(), + config.site_sync_interval_secs, + config.site_sync_batch_size, + Duration::from_secs(config.site_sync_connect_timeout_secs), + Duration::from_secs(config.site_sync_read_timeout_secs), + config.site_sync_max_retries, + config.site_sync_clock_skew_tolerance, + ))) + } else { + None + }; + + let integrity_peer_fetcher: Option> = if let Some(ref ss) = site_sync { + Some(ss.peer_fetcher()) + } else { + Some(Arc::new(PeerFetcher::new( + storage.clone(), + connections.clone(), + replication.clone(), + ClientOptions { + connect_timeout: Duration::from_secs(config.site_sync_connect_timeout_secs), + read_timeout: Duration::from_secs(config.site_sync_read_timeout_secs), + max_attempts: config.site_sync_max_retries, + }, + ))) + }; + + let integrity = if config.integrity_enabled { + Some(Arc::new(IntegrityService::new( + storage.clone(), + &config.storage_root, + crate::services::integrity::IntegrityConfig { + interval_hours: config.integrity_interval_hours, + batch_size: config.integrity_batch_size, + auto_heal: config.integrity_auto_heal, + dry_run: config.integrity_dry_run, + heal_concurrency: config.integrity_heal_concurrency, + quarantine_retention_days: config.integrity_quarantine_retention_days, + }, + integrity_peer_fetcher, + ))) + } else { + None + }; + + let templates = init_templates(&config.templates_dir); + let access_logging = Arc::new(AccessLoggingService::new(&config.storage_root)); + let session_ttl = Duration::from_secs(config.session_lifetime_days.saturating_mul(86_400)); + Self { + config, + storage, + iam, + encryption: None, + kms: None, + gc, + integrity, + metrics, + system_metrics, + site_registry, + website_domains, + connections, + replication, + site_sync, + templates, + sessions: Arc::new(SessionStore::new(session_ttl)), + access_logging, + } + } + + pub async fn new_with_encryption(config: ServerConfig) -> Self { + let mut state = Self::new(config.clone()); + + let keys_dir = config.storage_root.join(".myfsio.sys").join("keys"); + + let kms = if config.kms_enabled { + match KmsService::new(&keys_dir).await { + Ok(k) => Some(Arc::new(k)), + Err(e) => { + tracing::error!("Failed to initialize KMS: {}", e); + None + } + } + } else { + None + }; + + let encryption = if config.encryption_enabled { + match myfsio_crypto::kms::load_or_create_master_key(&keys_dir).await { + Ok(master_key) => Some(Arc::new(EncryptionService::with_config( + master_key, + kms.clone(), + EncryptionConfig { + chunk_size: config.encryption_chunk_size_bytes, + }, + ))), + Err(e) => { + tracing::error!("Failed to initialize encryption: {}", e); + None + } + } + } else { + None + }; + + state.encryption = encryption; + state.kms = kms; + state + } +} + +fn init_templates(templates_dir: &std::path::Path) -> Option> { + let glob = format!("{}/*.html", templates_dir.display()).replace('\\', "/"); + match TemplateEngine::new(&glob) { + Ok(engine) => { + crate::handlers::ui_pages::register_ui_endpoints(&engine); + Some(Arc::new(engine)) + } + Err(e) => { + tracing::error!("Template engine init failed: {}", e); + None + } + } +} diff --git a/crates/myfsio-server/src/stores/connections.rs b/crates/myfsio-server/src/stores/connections.rs new file mode 100644 index 0000000..7b2b3e5 --- /dev/null +++ b/crates/myfsio-server/src/stores/connections.rs @@ -0,0 +1,94 @@ +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteConnection { + pub id: String, + pub name: String, + pub endpoint_url: String, + pub access_key: String, + pub secret_key: String, + #[serde(default = "default_region")] + pub region: String, +} + +fn default_region() -> String { + "us-east-1".to_string() +} + +pub struct ConnectionStore { + path: PathBuf, + inner: Arc>>, +} + +impl ConnectionStore { + pub fn new(storage_root: &Path) -> Self { + let path = storage_root + .join(".myfsio.sys") + .join("config") + .join("connections.json"); + let inner = Arc::new(RwLock::new(load_from_disk(&path))); + Self { path, inner } + } + + pub fn reload(&self) { + let loaded = load_from_disk(&self.path); + *self.inner.write() = loaded; + } + + pub fn list(&self) -> Vec { + self.inner.read().clone() + } + + pub fn get(&self, id: &str) -> Option { + self.inner.read().iter().find(|c| c.id == id).cloned() + } + + pub fn add(&self, connection: RemoteConnection) -> std::io::Result<()> { + { + let mut guard = self.inner.write(); + if let Some(existing) = guard.iter_mut().find(|c| c.id == connection.id) { + *existing = connection; + } else { + guard.push(connection); + } + } + self.save() + } + + pub fn delete(&self, id: &str) -> std::io::Result { + let removed = { + let mut guard = self.inner.write(); + let before = guard.len(); + guard.retain(|c| c.id != id); + guard.len() != before + }; + if removed { + self.save()?; + } + Ok(removed) + } + + fn save(&self) -> std::io::Result<()> { + if let Some(parent) = self.path.parent() { + std::fs::create_dir_all(parent)?; + } + let snapshot = self.inner.read().clone(); + let bytes = serde_json::to_vec_pretty(&snapshot) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + std::fs::write(&self.path, bytes) + } +} + +fn load_from_disk(path: &Path) -> Vec { + if !path.exists() { + return Vec::new(); + } + match std::fs::read_to_string(path) { + Ok(text) => serde_json::from_str(&text).unwrap_or_default(), + Err(_) => Vec::new(), + } +} diff --git a/crates/myfsio-server/src/stores/mod.rs b/crates/myfsio-server/src/stores/mod.rs new file mode 100644 index 0000000..a67f22c --- /dev/null +++ b/crates/myfsio-server/src/stores/mod.rs @@ -0,0 +1 @@ +pub mod connections; diff --git a/crates/myfsio-server/src/templates.rs b/crates/myfsio-server/src/templates.rs new file mode 100644 index 0000000..153fce3 --- /dev/null +++ b/crates/myfsio-server/src/templates.rs @@ -0,0 +1,355 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use parking_lot::RwLock; +use serde_json::Value; +use tera::{Context, Error as TeraError, Tera}; + +pub type EndpointResolver = + Arc) -> Option + Send + Sync>; + +#[derive(Clone)] +pub struct TemplateEngine { + tera: Arc>, + endpoints: Arc>>, +} + +impl TemplateEngine { + pub fn new(template_glob: &str) -> Result { + let mut tera = Tera::new(template_glob)?; + tera.set_escape_fn(html_escape); + register_filters(&mut tera); + + let endpoints: Arc>> = Arc::new(RwLock::new(HashMap::new())); + + register_functions(&mut tera, endpoints.clone()); + + Ok(Self { + tera: Arc::new(RwLock::new(tera)), + endpoints, + }) + } + + pub fn register_endpoint(&self, name: &str, path_template: &str) { + self.endpoints + .write() + .insert(name.to_string(), path_template.to_string()); + } + + pub fn register_endpoints(&self, pairs: &[(&str, &str)]) { + let mut guard = self.endpoints.write(); + for (n, p) in pairs { + guard.insert((*n).to_string(), (*p).to_string()); + } + } + + pub fn render(&self, name: &str, context: &Context) -> Result { + self.tera.read().render(name, context) + } + + pub fn reload(&self) -> Result<(), TeraError> { + self.tera.write().full_reload() + } +} + +fn html_escape(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + for c in input.chars() { + match c { + '&' => out.push_str("&"), + '<' => out.push_str("<"), + '>' => out.push_str(">"), + '"' => out.push_str("""), + '\'' => out.push_str("'"), + _ => out.push(c), + } + } + out +} + +fn register_filters(tera: &mut Tera) { + tera.register_filter("format_datetime", format_datetime_filter); + tera.register_filter("filesizeformat", filesizeformat_filter); + tera.register_filter("slice", slice_filter); +} + +fn register_functions(tera: &mut Tera, endpoints: Arc>>) { + let endpoints_for_url = endpoints.clone(); + tera.register_function( + "url_for", + move |args: &HashMap| -> tera::Result { + let endpoint = args + .get("endpoint") + .and_then(|v| v.as_str()) + .ok_or_else(|| tera::Error::msg("url_for requires endpoint"))?; + if endpoint == "static" { + let filename = args.get("filename").and_then(|v| v.as_str()).unwrap_or(""); + return Ok(Value::String(format!("/static/{}", filename))); + } + let path = match endpoints_for_url.read().get(endpoint) { + Some(p) => p.clone(), + None => { + return Ok(Value::String(format!("/__missing__/{}", endpoint))); + } + }; + Ok(Value::String(substitute_path_params(&path, args))) + }, + ); + + tera.register_function( + "csrf_token", + |args: &HashMap| -> tera::Result { + if let Some(token) = args.get("token").and_then(|v| v.as_str()) { + return Ok(Value::String(token.to_string())); + } + Ok(Value::String(String::new())) + }, + ); +} + +fn substitute_path_params(template: &str, args: &HashMap) -> String { + let mut path = template.to_string(); + let mut query: Vec<(String, String)> = Vec::new(); + for (k, v) in args { + if k == "endpoint" || k == "filename" { + continue; + } + let value_str = value_to_string(v); + let placeholder = format!("{{{}}}", k); + if path.contains(&placeholder) { + let encoded = urlencode_path(&value_str); + path = path.replace(&placeholder, &encoded); + } else { + query.push((k.clone(), value_str)); + } + } + if !query.is_empty() { + let qs: Vec = query + .into_iter() + .map(|(k, v)| format!("{}={}", urlencode_query(&k), urlencode_query(&v))) + .collect(); + path.push('?'); + path.push_str(&qs.join("&")); + } + path +} + +fn value_to_string(v: &Value) -> String { + match v { + Value::String(s) => s.clone(), + Value::Number(n) => n.to_string(), + Value::Bool(b) => b.to_string(), + Value::Null => String::new(), + other => other.to_string(), + } +} + +const UNRESERVED: &percent_encoding::AsciiSet = &percent_encoding::NON_ALPHANUMERIC + .remove(b'-') + .remove(b'_') + .remove(b'.') + .remove(b'~'); + +fn urlencode_path(s: &str) -> String { + percent_encoding::utf8_percent_encode(s, UNRESERVED).to_string() +} + +fn urlencode_query(s: &str) -> String { + percent_encoding::utf8_percent_encode(s, UNRESERVED).to_string() +} + +fn format_datetime_filter(value: &Value, args: &HashMap) -> tera::Result { + let format = args + .get("format") + .and_then(|v| v.as_str()) + .unwrap_or("%Y-%m-%d %H:%M:%S UTC"); + + let dt: Option> = match value { + Value::String(s) => DateTime::parse_from_rfc3339(s) + .ok() + .map(|d| d.with_timezone(&Utc)) + .or_else(|| { + DateTime::parse_from_rfc2822(s) + .ok() + .map(|d| d.with_timezone(&Utc)) + }), + Value::Number(n) => n.as_f64().and_then(|f| { + let secs = f as i64; + let nanos = ((f - secs as f64) * 1_000_000_000.0) as u32; + DateTime::::from_timestamp(secs, nanos) + }), + _ => None, + }; + + match dt { + Some(d) => Ok(Value::String(d.format(format).to_string())), + None => Ok(value.clone()), + } +} + +fn slice_filter(value: &Value, args: &HashMap) -> tera::Result { + let start = args.get("start").and_then(|v| v.as_i64()).unwrap_or(0); + let end = args.get("end").and_then(|v| v.as_i64()); + + match value { + Value::String(s) => { + let chars: Vec = s.chars().collect(); + let len = chars.len() as i64; + let norm = |i: i64| -> usize { + if i < 0 { + (len + i).max(0) as usize + } else { + i.min(len) as usize + } + }; + let s_idx = norm(start); + let e_idx = match end { + Some(e) => norm(e), + None => len as usize, + }; + let e_idx = e_idx.max(s_idx); + Ok(Value::String(chars[s_idx..e_idx].iter().collect())) + } + Value::Array(arr) => { + let len = arr.len() as i64; + let norm = |i: i64| -> usize { + if i < 0 { + (len + i).max(0) as usize + } else { + i.min(len) as usize + } + }; + let s_idx = norm(start); + let e_idx = match end { + Some(e) => norm(e), + None => len as usize, + }; + let e_idx = e_idx.max(s_idx); + Ok(Value::Array(arr[s_idx..e_idx].to_vec())) + } + Value::Null => Ok(Value::String(String::new())), + _ => Err(tera::Error::msg("slice: unsupported value type")), + } +} + +fn filesizeformat_filter(value: &Value, _args: &HashMap) -> tera::Result { + let bytes = match value { + Value::Number(n) => n.as_f64().unwrap_or(0.0), + Value::String(s) => s.parse::().unwrap_or(0.0), + _ => 0.0, + }; + + const UNITS: [&str; 6] = ["B", "KB", "MB", "GB", "TB", "PB"]; + let mut size = bytes; + let mut unit = 0; + while size >= 1024.0 && unit < UNITS.len() - 1 { + size /= 1024.0; + unit += 1; + } + let formatted = if unit == 0 { + format!("{} {}", size as u64, UNITS[unit]) + } else { + format!("{:.1} {}", size, UNITS[unit]) + }; + Ok(Value::String(formatted)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_engine() -> TemplateEngine { + let tmp = tempfile::TempDir::new().unwrap(); + let tpl = tmp.path().join("t.html"); + std::fs::write(&tpl, "").unwrap(); + let glob = format!("{}/*.html", tmp.path().display()); + let engine = TemplateEngine::new(&glob).unwrap(); + engine.register_endpoints(&[ + ("ui.buckets_overview", "/ui/buckets"), + ("ui.bucket_detail", "/ui/buckets/{bucket_name}"), + ( + "ui.abort_multipart_upload", + "/ui/buckets/{bucket_name}/multipart/{upload_id}/abort", + ), + ]); + engine + } + + fn render_inline(engine: &TemplateEngine, tpl: &str) -> String { + let mut tera = engine.tera.write(); + tera.add_raw_template("__inline__", tpl).unwrap(); + drop(tera); + engine.render("__inline__", &Context::new()).unwrap() + } + + #[test] + fn static_url() { + let e = test_engine(); + let out = render_inline( + &e, + "{{ url_for(endpoint='static', filename='css/main.css') }}", + ); + assert_eq!(out, "/static/css/main.css"); + } + + #[test] + fn path_param_substitution() { + let e = test_engine(); + let out = render_inline( + &e, + "{{ url_for(endpoint='ui.bucket_detail', bucket_name='my-bucket') }}", + ); + assert_eq!(out, "/ui/buckets/my-bucket"); + } + + #[test] + fn extra_args_become_query() { + let e = test_engine(); + let out = render_inline( + &e, + "{{ url_for(endpoint='ui.bucket_detail', bucket_name='b', tab='replication') }}", + ); + assert_eq!(out, "/ui/buckets/b?tab=replication"); + } + + #[test] + fn filesizeformat_basic() { + let v = filesizeformat_filter(&Value::Number(1024.into()), &HashMap::new()).unwrap(); + assert_eq!(v, Value::String("1.0 KB".into())); + let v = filesizeformat_filter(&Value::Number(1_048_576.into()), &HashMap::new()).unwrap(); + assert_eq!(v, Value::String("1.0 MB".into())); + let v = filesizeformat_filter(&Value::Number(500.into()), &HashMap::new()).unwrap(); + assert_eq!(v, Value::String("500 B".into())); + } + + #[test] + fn project_templates_parse() { + let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("templates"); + path.push("*.html"); + let glob = path.to_string_lossy().replace('\\', "/"); + let engine = TemplateEngine::new(&glob).expect("Tera parse failed"); + let names: Vec = engine + .tera + .read() + .get_template_names() + .map(|s| s.to_string()) + .collect(); + assert!( + names.len() >= 10, + "expected 10+ templates, got {}", + names.len() + ); + } + + #[test] + fn format_datetime_rfc3339() { + let v = format_datetime_filter( + &Value::String("2024-06-15T12:34:56Z".into()), + &HashMap::new(), + ) + .unwrap(); + assert_eq!(v, Value::String("2024-06-15 12:34:56 UTC".into())); + } +} diff --git a/static/css/main.css b/crates/myfsio-server/static/css/main.css similarity index 98% rename from static/css/main.css rename to crates/myfsio-server/static/css/main.css index 2a54ee0..e48174d 100644 --- a/static/css/main.css +++ b/crates/myfsio-server/static/css/main.css @@ -1093,6 +1093,26 @@ html.sidebar-will-collapse .sidebar-user { letter-spacing: 0.08em; } +[data-theme='dark'] .docs-table .table-secondary, +[data-theme='dark'] .docs-section .table-secondary { + --bs-table-bg: rgba(148, 163, 184, 0.14); + --bs-table-striped-bg: rgba(148, 163, 184, 0.16); + --bs-table-hover-bg: rgba(148, 163, 184, 0.2); + --bs-table-color: var(--myfsio-text); + color: var(--myfsio-text); +} + +[data-theme='dark'] .docs-table .table-secondary th, +[data-theme='dark'] .docs-table .table-secondary td, +[data-theme='dark'] .docs-table .table-secondary strong, +[data-theme='dark'] .docs-table .table-secondary code, +[data-theme='dark'] .docs-section .table-secondary th, +[data-theme='dark'] .docs-section .table-secondary td, +[data-theme='dark'] .docs-section .table-secondary strong, +[data-theme='dark'] .docs-section .table-secondary code { + color: var(--myfsio-text); +} + .main-content:has(.docs-sidebar) { overflow-x: visible; } @@ -1576,6 +1596,11 @@ html.sidebar-will-collapse .sidebar-user { border: 1px solid var(--myfsio-card-border); } +.policy-editor-disabled { + opacity: 0.72; + cursor: not-allowed; +} + .objects-table-container { max-height: 600px; overflow-y: auto; @@ -1988,7 +2013,9 @@ pre { } [data-theme='dark'] pre { - background-color: rgba(248, 250, 252, 0.05); + background-color: #111827; + border-color: rgba(148, 163, 184, 0.24); + color: #e5eefb; } pre code { @@ -1997,6 +2024,16 @@ pre code { color: inherit; } +[data-theme='dark'] .docs-section .bg-light { + background-color: #182235 !important; + border: 1px solid rgba(148, 163, 184, 0.18); + color: #e5eefb; +} + +[data-theme='dark'] .docs-section .bg-light .text-muted { + color: #a9b6c8 !important; +} + .docs-section + .docs-section { margin-top: 1.25rem; } diff --git a/static/images/MyFSIO.ico b/crates/myfsio-server/static/images/MyFSIO.ico similarity index 100% rename from static/images/MyFSIO.ico rename to crates/myfsio-server/static/images/MyFSIO.ico diff --git a/static/images/MyFSIO.png b/crates/myfsio-server/static/images/MyFSIO.png similarity index 100% rename from static/images/MyFSIO.png rename to crates/myfsio-server/static/images/MyFSIO.png diff --git a/static/js/bucket-detail-main.js b/crates/myfsio-server/static/js/bucket-detail-main.js similarity index 95% rename from static/js/bucket-detail-main.js rename to crates/myfsio-server/static/js/bucket-detail-main.js index 3f4edb2..7a7ff79 100644 --- a/static/js/bucket-detail-main.js +++ b/crates/myfsio-server/static/js/bucket-detail-main.js @@ -336,6 +336,72 @@ } }; + const renderObjectsLimit = (totalObjects, maxObjects) => { + if (maxObjects && maxObjects > 0) { + const pct = Math.min(100, Math.floor(totalObjects / maxObjects * 100)); + const cls = pct >= 90 ? 'bg-danger' : pct >= 75 ? 'bg-warning' : 'bg-success'; + return '
' + + '
' + + '
' + + '
' + pct + '% of ' + maxObjects.toLocaleString() + ' limit
'; + } + return '
No limit
'; + }; + + const renderBytesLimit = (totalBytes, maxBytes) => { + if (maxBytes && maxBytes > 0) { + const pct = Math.min(100, Math.floor(totalBytes / maxBytes * 100)); + const cls = pct >= 90 ? 'bg-danger' : pct >= 75 ? 'bg-warning' : 'bg-success'; + return '
' + + '
' + + '
' + + '
' + pct + '% of ' + formatBytes(maxBytes) + ' limit
'; + } + return '
No limit
'; + }; + + const redrawUsageLimits = () => { + const objectsCard = document.querySelector('[data-usage-objects]'); + const objectsLimit = document.querySelector('[data-usage-objects-limit]'); + if (objectsCard && objectsLimit) { + const totalObjects = parseInt(objectsCard.dataset.totalObjects || '0', 10); + const maxObjectsRaw = objectsCard.dataset.maxObjects; + const maxObjects = maxObjectsRaw ? parseInt(maxObjectsRaw, 10) : 0; + objectsLimit.innerHTML = renderObjectsLimit(totalObjects, maxObjects); + } + const bytesCard = document.querySelector('[data-usage-bytes]'); + const bytesLimit = document.querySelector('[data-usage-bytes-limit]'); + if (bytesCard && bytesLimit) { + const totalBytes = parseInt(bytesCard.dataset.totalBytes || '0', 10); + const maxBytesRaw = bytesCard.dataset.maxBytes; + const maxBytes = maxBytesRaw ? parseInt(maxBytesRaw, 10) : 0; + bytesLimit.innerHTML = renderBytesLimit(totalBytes, maxBytes); + } + }; + + const refreshBucketUsage = async () => { + try { + const bucketName = objectsContainer?.dataset.bucket; + if (!bucketName) return; + const url = `/ui/buckets/${encodeURIComponent(bucketName)}/stats`; + const response = await fetch(url, { headers: { 'Accept': 'application/json' } }); + if (!response.ok) return; + const data = await response.json(); + + const objectsCard = document.querySelector('[data-usage-objects]'); + const objectsValue = document.querySelector('[data-usage-objects-value]'); + if (objectsCard) objectsCard.dataset.totalObjects = String(data.total_objects); + if (objectsValue) objectsValue.textContent = data.total_objects.toLocaleString(); + + const bytesCard = document.querySelector('[data-usage-bytes]'); + const bytesValue = document.querySelector('[data-usage-bytes-value]'); + if (bytesCard) bytesCard.dataset.totalBytes = String(data.total_bytes); + if (bytesValue) bytesValue.textContent = formatBytes(data.total_bytes); + + redrawUsageLimits(); + } catch (e) { } + }; + let topSpacer = null; let bottomSpacer = null; @@ -486,7 +552,13 @@ let scrollTimeout = null; const handleVirtualScroll = () => { if (scrollTimeout) cancelAnimationFrame(scrollTimeout); - scrollTimeout = requestAnimationFrame(renderVirtualRows); + scrollTimeout = requestAnimationFrame(() => { + renderVirtualRows(); + const c = document.querySelector('.objects-table-container'); + if (c && c.scrollHeight - c.scrollTop - c.clientHeight < 500) { + if (typeof loadMoreOnSentinel === 'function') loadMoreOnSentinel(); + } + }); }; const refreshVirtualList = () => { @@ -497,6 +569,11 @@ if (allObjects.length === 0 && streamFolders.length === 0 && !hasMoreObjects) { showEmptyState(); } else { + const isFiltering = currentFilterTerm && currentFilterTerm.length > 0; + const title = isFiltering ? 'No matches' : 'Empty folder'; + const body = isFiltering + ? `No objects match "${escapeHtml(currentFilterTerm)}".` + : `This folder contains no objects${hasMoreObjects ? ' yet. Loading more...' : '.'}`; objectsTableBody.innerHTML = ` @@ -506,8 +583,8 @@ -
Empty folder
-

This folder contains no objects${hasMoreObjects ? ' yet. Loading more...' : '.'}

+
${title}
+

${body}

@@ -660,6 +737,10 @@ break; case 'count': totalObjectCount = msg.total_count || 0; + if (!currentPrefix) { + bucketTotalObjects = totalObjectCount; + updateObjectCountBadge(); + } if (objectsLoadingRow) { const loadingText = objectsLoadingRow.querySelector('p'); if (loadingText) loadingText.textContent = `Loading 0 of ${totalObjectCount.toLocaleString()} objects...`; @@ -770,7 +851,7 @@ } totalObjectCount = data.total_count || 0; - if (!append && !currentPrefix && !useDelimiterMode) bucketTotalObjects = totalObjectCount; + if (!append && !currentPrefix) bucketTotalObjects = totalObjectCount; nextContinuationToken = data.next_continuation_token; if (!append && objectsLoadingRow) { @@ -907,12 +988,32 @@ scrollContainer.addEventListener('scroll', handleVirtualScroll, { passive: true }); } + const isSentinelVisible = () => { + if (!scrollSentinel) return false; + const rect = scrollSentinel.getBoundingClientRect(); + if (scrollContainer) { + const cr = scrollContainer.getBoundingClientRect(); + return rect.top <= cr.bottom + 500 && rect.bottom >= cr.top - 500; + } + return rect.top <= window.innerHeight + 500 && rect.bottom >= -500; + }; + + const loadMoreOnSentinel = () => { + if (searchResults !== null) { + if (searchNextToken && !searchLoading) { + performServerSearch(currentFilterTerm, true); + } + return; + } + if (hasMoreObjects && !isLoadingObjects) { + loadObjects(true); + } + }; + if (scrollSentinel && scrollContainer) { const containerObserver = new IntersectionObserver((entries) => { entries.forEach(entry => { - if (entry.isIntersecting && hasMoreObjects && !isLoadingObjects) { - loadObjects(true); - } + if (entry.isIntersecting) loadMoreOnSentinel(); }); }, { root: scrollContainer, @@ -923,9 +1024,7 @@ const viewportObserver = new IntersectionObserver((entries) => { entries.forEach(entry => { - if (entry.isIntersecting && hasMoreObjects && !isLoadingObjects) { - loadObjects(true); - } + if (entry.isIntersecting) loadMoreOnSentinel(); }); }, { root: null, @@ -1161,6 +1260,11 @@ }); if (folders.length === 0 && files.length === 0) { + const isFiltering = currentFilterTerm && currentFilterTerm.length > 0; + const title = isFiltering ? 'No matches' : 'Empty folder'; + const body = isFiltering + ? `No objects match "${escapeHtml(currentFilterTerm)}".` + : 'This folder contains no objects.'; const emptyRow = document.createElement('tr'); emptyRow.innerHTML = ` @@ -1170,8 +1274,8 @@ -
Empty folder
-

This folder contains no objects.

+
${title}
+

${body}

`; @@ -1265,20 +1369,33 @@ let publicPolicyTemplate = normalizePolicyTemplate(policyTextarea?.dataset.publicTemplate || ''); let customPolicyDraft = policyTextarea?.value || ''; + const policyReadonlyHint = document.getElementById('policyReadonlyHint'); + const presetButtons = Array.from(document.querySelectorAll('.preset-btn[data-preset]')); + + const setActivePolicyPreset = (preset) => { + if (policyPreset) { + policyPreset.value = preset; + } + presetButtons.forEach(button => { + button.classList.toggle('active', button.dataset.preset === preset); + }); + }; const setPolicyTextareaState = (readonly) => { if (!policyTextarea) return; if (readonly) { policyTextarea.setAttribute('readonly', 'readonly'); policyTextarea.classList.add('bg-body-secondary'); + policyTextarea.classList.add('policy-editor-disabled'); + policyTextarea.setAttribute('aria-disabled', 'true'); } else { policyTextarea.removeAttribute('readonly'); policyTextarea.classList.remove('bg-body-secondary'); + policyTextarea.classList.remove('policy-editor-disabled'); + policyTextarea.removeAttribute('aria-disabled'); } }; - const policyReadonlyHint = document.getElementById('policyReadonlyHint'); - const applyPolicyPreset = (preset) => { if (!policyTextarea || !policyMode) return; const isPresetMode = preset === 'private' || preset === 'public'; @@ -1310,18 +1427,15 @@ } }); - const presetButtons = document.querySelectorAll('.preset-btn[data-preset]'); presetButtons.forEach(btn => { btn.addEventListener('click', () => { - const preset = btn.dataset.preset; - if (policyPreset) policyPreset.value = preset; - presetButtons.forEach(b => b.classList.remove('active')); - btn.classList.add('active'); - applyPolicyPreset(preset); + setActivePolicyPreset(btn.dataset.preset); + applyPolicyPreset(btn.dataset.preset); }); }); if (policyPreset) { + setActivePolicyPreset(policyPreset.value || policyPreset.dataset.default || 'custom'); applyPolicyPreset(policyPreset.value || policyPreset.dataset.default || 'custom'); } @@ -1341,6 +1455,11 @@ }); const bulkActionsWrapper = document.getElementById('bulk-actions-wrapper'); + const bulkDownloadButton = document.querySelector('[data-bulk-download-trigger]'); + const updateBulkDownloadState = () => { + if (!bulkDownloadButton) return; + bulkDownloadButton.disabled = selectedRows.size === 0; + }; const updateBulkDeleteState = () => { const selectedCount = selectedRows.size; if (bulkDeleteButton) { @@ -1367,6 +1486,7 @@ selectAllCheckbox.checked = visibleSelectedCount > 0 && visibleSelectedCount === total && total > 0; selectAllCheckbox.indeterminate = visibleSelectedCount > 0 && visibleSelectedCount < total; } + updateBulkDownloadState(); }; function toggleRowSelection(row, shouldSelect) { @@ -1481,6 +1601,7 @@ previewPanel.classList.add('d-none'); activeRow = null; loadObjects(false); + refreshBucketUsage(); } catch (error) { bulkDeleteModal?.hide(); showMessage({ title: 'Delete failed', body: (error && error.message) || 'Unable to delete selected objects', variant: 'danger' }); @@ -1956,6 +2077,7 @@ previewPanel.classList.add('d-none'); activeRow = null; loadObjects(false); + refreshBucketUsage(); } catch (err) { if (deleteModal) deleteModal.hide(); showMessage({ title: 'Delete failed', body: err.message || 'Unable to delete object', variant: 'danger' }); @@ -2192,47 +2314,69 @@ const filterWarningText = document.getElementById('filter-warning-text'); const folderViewStatus = document.getElementById('folder-view-status'); - const updateFilterWarning = () => { - if (!filterWarning) return; - const isFiltering = currentFilterTerm.length > 0; - if (isFiltering && hasMoreObjects) { - filterWarning.classList.remove('d-none'); - } else { - filterWarning.classList.add('d-none'); - } - }; - let searchDebounceTimer = null; let searchAbortController = null; let searchResults = null; + let searchNextToken = null; + let searchLoading = false; + const SEARCH_PAGE_SIZE = 500; - const performServerSearch = async (term) => { - if (searchAbortController) searchAbortController.abort(); - searchAbortController = new AbortController(); + const updateFilterWarning = () => { + if (!filterWarning) return; + filterWarning.classList.add('d-none'); + }; + const performServerSearch = async (term, append = false) => { + if (!append && searchAbortController) searchAbortController.abort(); + if (append && (searchLoading || !searchNextToken)) return; + if (!append) { + searchAbortController = new AbortController(); + } + searchLoading = true; + if (append && loadMoreSpinner) loadMoreSpinner.classList.remove('d-none'); + + let succeeded = false; try { - const params = new URLSearchParams({ q: term, limit: '500' }); + const params = new URLSearchParams({ q: term, limit: String(SEARCH_PAGE_SIZE) }); if (currentPrefix) params.set('prefix', currentPrefix); + if (append && searchNextToken) params.set('start_after', searchNextToken); const searchUrl = objectsStreamUrl.replace('/stream', '/search'); const response = await fetch(`${searchUrl}?${params}`, { - signal: searchAbortController.signal + signal: searchAbortController?.signal }); if (!response.ok) throw new Error(`HTTP ${response.status}`); const data = await response.json(); - searchResults = (data.results || []).map(obj => processStreamObject(obj)); + const newResults = (data.results || []).map(obj => processStreamObject(obj)); + if (append && Array.isArray(searchResults)) { + searchResults = searchResults.concat(newResults); + } else { + searchResults = newResults; + } + searchNextToken = data.truncated ? (data.next_token || null) : null; memoizedVisibleItems = null; memoizedInputs = { objectCount: -1, folderCount: -1, prefix: null, filterTerm: null }; refreshVirtualList(); if (loadMoreStatus) { const countText = searchResults.length.toLocaleString(); - const truncated = data.truncated ? '+' : ''; - loadMoreStatus.textContent = `${countText}${truncated} result${searchResults.length !== 1 ? 's' : ''}`; + const more = searchNextToken ? '+' : ''; + const noun = searchResults.length === 1 ? 'result' : 'results'; + loadMoreStatus.textContent = searchNextToken + ? `${countText}${more} ${noun} (scroll to load more)` + : `${countText} ${noun}`; } + succeeded = true; } catch (e) { if (e.name === 'AbortError') return; if (loadMoreStatus) { - loadMoreStatus.textContent = 'Search failed'; + loadMoreStatus.textContent = 'Search failed (scroll to retry)'; } + } finally { + searchLoading = false; + if (loadMoreSpinner) loadMoreSpinner.classList.add('d-none'); + } + + if (succeeded && searchNextToken && !searchLoading && isSentinelVisible()) { + performServerSearch(currentFilterTerm, true); } }; @@ -2252,6 +2396,7 @@ if (!isFiltering && wasFiltering) { if (searchAbortController) searchAbortController.abort(); searchResults = null; + searchNextToken = null; memoizedVisibleItems = null; memoizedInputs = { objectCount: -1, folderCount: -1, prefix: null, filterTerm: null }; if (loadMoreStatus) { @@ -2827,6 +2972,7 @@ activeXHRs.push(xhr); xhr.open('POST', uploadForm.action, true); xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest'); + xhr.setRequestHeader('X-CSRFToken', csrfToken || ''); const removeXHR = () => { const idx = activeXHRs.indexOf(xhr); @@ -3060,6 +3206,7 @@ } else if (errorCount > 0) { showMessage({ title: 'Upload failed', body: `${errorCount} file(s) failed to upload.`, variant: 'danger' }); } + if (successCount > 0) refreshBucketUsage(); }; const performBulkUpload = async (files) => { @@ -3127,25 +3274,12 @@ }); uploadForm.addEventListener('submit', async (event) => { + event.preventDefault(); const files = uploadFileInput.files; if (!files || files.length === 0) return; const keyPrefix = (uploadKeyPrefix?.value || '').trim(); - if (files.length === 1 && !keyPrefix) { - const customKey = uploadForm.querySelector('input[name="object_key"]')?.value?.trim(); - if (customKey) { - - if (uploadSubmitBtn) { - uploadSubmitBtn.disabled = true; - if (uploadBtnText) uploadBtnText.textContent = 'Uploading...'; - } - return; - } - } - - event.preventDefault(); - if (uploadSubmitBtn) { uploadSubmitBtn.disabled = true; if (uploadBtnText) uploadBtnText.textContent = 'Uploading...'; @@ -3240,15 +3374,8 @@ } } - const bulkDownloadButton = document.querySelector('[data-bulk-download-trigger]'); const bulkDownloadEndpoint = document.getElementById('objects-drop-zone')?.dataset.bulkDownloadEndpoint; - const updateBulkDownloadState = () => { - if (!bulkDownloadButton) return; - const selectedCount = document.querySelectorAll('[data-object-select]:checked').length; - bulkDownloadButton.disabled = selectedCount === 0; - }; - selectAllCheckbox?.addEventListener('change', (event) => { const shouldSelect = Boolean(event.target?.checked); @@ -3283,7 +3410,6 @@ }); updateBulkDeleteState(); - setTimeout(updateBulkDownloadState, 0); }); bulkDownloadButton?.addEventListener('click', async () => { @@ -3613,15 +3739,6 @@ algoAes256Radio?.addEventListener('change', updateKmsKeyVisibility); algoKmsRadio?.addEventListener('change', updateKmsKeyVisibility); - disableEncryptionBtn?.addEventListener('click', () => { - if (encryptionAction && encryptionForm) { - if (confirm('Are you sure you want to disable default encryption? New objects will not be encrypted automatically.')) { - encryptionAction.value = 'disable'; - encryptionForm.submit(); - } - } - }); - const targetBucketInput = document.getElementById('target_bucket'); const targetBucketFeedback = document.getElementById('target_bucket_feedback'); @@ -4340,10 +4457,25 @@ }); if (lifecycleHistoryCard) { - loadLifecycleHistory(); - if (window.pollingManager) { - window.pollingManager.start('lifecycle', loadLifecycleHistory); + const lifecycleTab = document.getElementById('lifecycle-tab'); + const lifecyclePane = document.getElementById('lifecycle-pane'); + const startLifecyclePolling = () => { + if (window.pollingManager) { + window.pollingManager.start('lifecycle', loadLifecycleHistory); + } else { + loadLifecycleHistory(); + } + }; + const stopLifecyclePolling = () => { + if (window.pollingManager) { + window.pollingManager.stop('lifecycle'); + } + }; + if (lifecyclePane && lifecyclePane.classList.contains('show') && lifecyclePane.classList.contains('active')) { + startLifecyclePolling(); } + lifecycleTab?.addEventListener('shown.bs.tab', startLifecyclePolling); + lifecycleTab?.addEventListener('hidden.bs.tab', stopLifecyclePolling); } if (corsCard) loadCorsRules(); @@ -4553,6 +4685,16 @@ var maxObjInput = document.getElementById('max_objects'); if (maxMbInput) maxMbInput.value = maxBytes ? Math.floor(maxBytes / 1048576) : ''; if (maxObjInput) maxObjInput.value = maxObjects || ''; + + var objectsCard = document.querySelector('[data-usage-objects]'); + if (objectsCard) { + objectsCard.dataset.maxObjects = maxObjects && maxObjects > 0 ? String(maxObjects) : ''; + } + var bytesCard = document.querySelector('[data-usage-bytes]'); + if (bytesCard) { + bytesCard.dataset.maxBytes = maxBytes && maxBytes > 0 ? String(maxBytes) : ''; + } + redrawUsageLimits(); } function updatePolicyCard(hasPolicy, preset) { @@ -4653,17 +4795,24 @@ }); } - var disableEncBtn = document.getElementById('disableEncryptionBtn'); - if (disableEncBtn) { - disableEncBtn.addEventListener('click', function () { + var confirmDisableEncBtn = document.getElementById('confirmDisableEncryptionBtn'); + if (confirmDisableEncBtn) { + confirmDisableEncBtn.addEventListener('click', function () { var form = document.getElementById('encryptionForm'); if (!form) return; document.getElementById('encryptionAction').value = 'disable'; + var modalEl = document.getElementById('disableEncryptionModal'); + var modal = modalEl ? bootstrap.Modal.getInstance(modalEl) : null; window.UICore.submitFormAjax(form, { successMessage: 'Encryption disabled', onSuccess: function (data) { document.getElementById('encryptionAction').value = 'enable'; + if (modal) modal.hide(); updateEncryptionCard(false, null); + }, + onError: function () { + document.getElementById('encryptionAction').value = 'enable'; + if (modal) modal.hide(); } }); }); @@ -4819,7 +4968,7 @@ e.preventDefault(); window.UICore.submitFormAjax(deleteBucketForm, { onSuccess: function () { - sessionStorage.setItem('flashMessage', JSON.stringify({ title: 'Bucket deleted', variant: 'success' })); + sessionStorage.setItem('flashMessage', JSON.stringify({ title: 'Success', body: 'Bucket deleted', variant: 'success' })); window.location.href = window.BucketDetailConfig?.endpoints?.bucketsOverview || '/ui/buckets'; } }); diff --git a/static/js/bucket-detail-operations.js b/crates/myfsio-server/static/js/bucket-detail-operations.js similarity index 100% rename from static/js/bucket-detail-operations.js rename to crates/myfsio-server/static/js/bucket-detail-operations.js diff --git a/static/js/bucket-detail-upload.js b/crates/myfsio-server/static/js/bucket-detail-upload.js similarity index 99% rename from static/js/bucket-detail-upload.js rename to crates/myfsio-server/static/js/bucket-detail-upload.js index 9daa6d1..6e0c6a7 100644 --- a/static/js/bucket-detail-upload.js +++ b/crates/myfsio-server/static/js/bucket-detail-upload.js @@ -344,6 +344,7 @@ window.BucketDetailUpload = (function() { const xhr = new XMLHttpRequest(); xhr.open('POST', formAction, true); xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest'); + xhr.setRequestHeader('X-CSRFToken', csrfToken || ''); xhr.upload.addEventListener('progress', (e) => { if (e.lengthComputable) { diff --git a/static/js/bucket-detail-utils.js b/crates/myfsio-server/static/js/bucket-detail-utils.js similarity index 100% rename from static/js/bucket-detail-utils.js rename to crates/myfsio-server/static/js/bucket-detail-utils.js diff --git a/static/js/connections-management.js b/crates/myfsio-server/static/js/connections-management.js similarity index 100% rename from static/js/connections-management.js rename to crates/myfsio-server/static/js/connections-management.js diff --git a/static/js/iam-management.js b/crates/myfsio-server/static/js/iam-management.js similarity index 87% rename from static/js/iam-management.js rename to crates/myfsio-server/static/js/iam-management.js index ef5cde7..ce9b8f3 100644 --- a/static/js/iam-management.js +++ b/crates/myfsio-server/static/js/iam-management.js @@ -15,7 +15,10 @@ window.IAMManagement = (function() { var currentRotateKey = null; var currentEditKey = null; var currentDeleteKey = null; + var currentEditAccessKey = null; + var currentDeleteAccessKey = null; var currentExpiryKey = null; + var currentExpiryAccessKey = null; var ALL_S3_ACTIONS = [ 'list', 'read', 'write', 'delete', 'share', 'policy', @@ -58,6 +61,20 @@ window.IAMManagement = (function() { return bucket === '*' ? 'All Buckets' : bucket; } + function buildUserUrl(template, userId) { + return template.replace('USER_ID', encodeURIComponent(userId)); + } + + function getUserByIdentifier(identifier) { + return users.find(function(u) { + return u.user_id === identifier || u.access_key === identifier; + }) || null; + } + + function getUserById(userId) { + return users.find(function(u) { return u.user_id === userId; }) || null; + } + function init(config) { users = config.users || []; currentUserKey = config.currentUserKey || null; @@ -129,8 +146,8 @@ window.IAMManagement = (function() { } } - function getUserPolicies(accessKey) { - var user = users.find(function(u) { return u.access_key === accessKey; }); + function getUserPolicies(identifier) { + var user = getUserByIdentifier(identifier); return user ? JSON.stringify(user.policies, null, 2) : ''; } @@ -142,7 +159,7 @@ window.IAMManagement = (function() { function setupPolicyEditor() { var userLabelEl = document.getElementById('policyEditorUserLabel'); - var userInputEl = document.getElementById('policyEditorUser'); + var userInputEl = document.getElementById('policyEditorUserId'); var textareaEl = document.getElementById('policyEditorDocument'); document.querySelectorAll('[data-policy-template]').forEach(function(button) { @@ -153,12 +170,13 @@ window.IAMManagement = (function() { document.querySelectorAll('[data-policy-editor]').forEach(function(button) { button.addEventListener('click', function() { - var key = button.getAttribute('data-access-key'); - if (!key) return; + var userId = button.dataset.userId; + var accessKey = button.dataset.accessKey || userId; + if (!userId) return; - userLabelEl.textContent = key; - userInputEl.value = key; - textareaEl.value = getUserPolicies(key); + userLabelEl.textContent = accessKey; + userInputEl.value = userId; + textareaEl.value = getUserPolicies(userId); policyModal.show(); }); @@ -213,11 +231,13 @@ window.IAMManagement = (function() { document.querySelectorAll('[data-edit-user]').forEach(function(btn) { btn.addEventListener('click', function() { - var key = btn.dataset.editUser; + var key = btn.dataset.userId; + var accessKey = btn.dataset.accessKey || key; var name = btn.dataset.displayName; currentEditKey = key; + currentEditAccessKey = accessKey; editUserDisplayName.value = name; - editUserForm.action = endpoints.updateUser.replace('ACCESS_KEY', key); + editUserForm.action = buildUserUrl(endpoints.updateUser, key); editUserModal.show(); }); }); @@ -230,12 +250,14 @@ window.IAMManagement = (function() { document.querySelectorAll('[data-delete-user]').forEach(function(btn) { btn.addEventListener('click', function() { - var key = btn.dataset.deleteUser; + var key = btn.dataset.userId; + var accessKey = btn.dataset.accessKey || key; currentDeleteKey = key; - deleteUserLabel.textContent = key; - deleteUserForm.action = endpoints.deleteUser.replace('ACCESS_KEY', key); + currentDeleteAccessKey = accessKey; + deleteUserLabel.textContent = accessKey; + deleteUserForm.action = buildUserUrl(endpoints.deleteUser, key); - if (key === currentUserKey) { + if (accessKey === currentUserKey) { deleteSelfWarning.classList.remove('d-none'); } else { deleteSelfWarning.classList.add('d-none'); @@ -258,8 +280,8 @@ window.IAMManagement = (function() { document.querySelectorAll('[data-rotate-user]').forEach(function(btn) { btn.addEventListener('click', function() { - currentRotateKey = btn.dataset.rotateUser; - rotateUserLabel.textContent = currentRotateKey; + currentRotateKey = btn.dataset.userId; + rotateUserLabel.textContent = btn.dataset.accessKey || currentRotateKey; rotateSecretConfirm.classList.remove('d-none'); rotateSecretResult.classList.add('d-none'); @@ -278,7 +300,7 @@ window.IAMManagement = (function() { window.UICore.setButtonLoading(confirmRotateBtn, true, 'Rotating...'); try { - var url = endpoints.rotateSecret.replace('ACCESS_KEY', currentRotateKey); + var url = buildUserUrl(endpoints.rotateSecret, currentRotateKey); var response = await fetch(url, { method: 'POST', headers: { @@ -327,10 +349,11 @@ window.IAMManagement = (function() { function openExpiryModal(key, expiresAt) { currentExpiryKey = key; + var user = getUserByIdentifier(key); var label = document.getElementById('expiryUserLabel'); var input = document.getElementById('expiryDateInput'); var form = document.getElementById('expiryForm'); - if (label) label.textContent = key; + if (label) label.textContent = currentExpiryAccessKey || (user ? user.access_key : key); if (expiresAt) { try { var dt = new Date(expiresAt); @@ -342,7 +365,7 @@ window.IAMManagement = (function() { } else { if (input) input.value = ''; } - if (form) form.action = endpoints.updateExpiry.replace('ACCESS_KEY', key); + if (form) form.action = buildUserUrl(endpoints.updateExpiry, key); var modalEl = document.getElementById('expiryModal'); if (modalEl) { var modal = bootstrap.Modal.getOrCreateInstance(modalEl); @@ -354,7 +377,8 @@ window.IAMManagement = (function() { document.querySelectorAll('[data-expiry-user]').forEach(function(btn) { btn.addEventListener('click', function(e) { e.preventDefault(); - openExpiryModal(btn.dataset.expiryUser, btn.dataset.expiresAt || ''); + currentExpiryAccessKey = btn.dataset.accessKey || btn.dataset.userId; + openExpiryModal(btn.dataset.userId, btn.dataset.expiresAt || ''); }); }); @@ -396,7 +420,12 @@ window.IAMManagement = (function() { } } - function createUserCardHtml(accessKey, displayName, policies) { + function createUserCardHtml(user) { + var userId = user.user_id || ''; + var accessKey = user.access_key || userId; + var displayName = user.display_name || accessKey; + var policies = user.policies || []; + var expiresAt = user.expires_at || ''; var admin = isAdminUser(policies); var cardClass = 'card h-100 iam-user-card' + (admin ? ' iam-admin-card' : ''); var roleBadge = admin @@ -418,7 +447,7 @@ window.IAMManagement = (function() { } var esc = window.UICore.escapeHtml; - return '
' + + return '
' + '
' + '
' + '
' + @@ -447,31 +476,36 @@ window.IAMManagement = (function() { '' + '' + '
' + '
' + '
Bucket Permissions
' + '
' + policyBadges + '
' + - '' + '
'; } - function attachUserCardHandlers(cardElement, accessKey, displayName) { + function attachUserCardHandlers(cardElement, user) { + var userId = user.user_id; + var accessKey = user.access_key; + var displayName = user.display_name; + var expiresAt = user.expires_at || ''; var editBtn = cardElement.querySelector('[data-edit-user]'); if (editBtn) { editBtn.addEventListener('click', function() { - currentEditKey = accessKey; + currentEditKey = userId; + currentEditAccessKey = accessKey; document.getElementById('editUserDisplayName').value = displayName; - document.getElementById('editUserForm').action = endpoints.updateUser.replace('ACCESS_KEY', accessKey); + document.getElementById('editUserForm').action = buildUserUrl(endpoints.updateUser, userId); editUserModal.show(); }); } @@ -479,9 +513,10 @@ window.IAMManagement = (function() { var deleteBtn = cardElement.querySelector('[data-delete-user]'); if (deleteBtn) { deleteBtn.addEventListener('click', function() { - currentDeleteKey = accessKey; + currentDeleteKey = userId; + currentDeleteAccessKey = accessKey; document.getElementById('deleteUserLabel').textContent = accessKey; - document.getElementById('deleteUserForm').action = endpoints.deleteUser.replace('ACCESS_KEY', accessKey); + document.getElementById('deleteUserForm').action = buildUserUrl(endpoints.deleteUser, userId); var deleteSelfWarning = document.getElementById('deleteSelfWarning'); if (accessKey === currentUserKey) { deleteSelfWarning.classList.remove('d-none'); @@ -495,7 +530,7 @@ window.IAMManagement = (function() { var rotateBtn = cardElement.querySelector('[data-rotate-user]'); if (rotateBtn) { rotateBtn.addEventListener('click', function() { - currentRotateKey = accessKey; + currentRotateKey = userId; document.getElementById('rotateUserLabel').textContent = accessKey; document.getElementById('rotateSecretConfirm').classList.remove('d-none'); document.getElementById('rotateSecretResult').classList.add('d-none'); @@ -510,7 +545,8 @@ window.IAMManagement = (function() { if (expiryBtn) { expiryBtn.addEventListener('click', function(e) { e.preventDefault(); - openExpiryModal(accessKey, ''); + currentExpiryAccessKey = accessKey; + openExpiryModal(userId, expiresAt); }); } @@ -518,8 +554,8 @@ window.IAMManagement = (function() { if (policyBtn) { policyBtn.addEventListener('click', function() { document.getElementById('policyEditorUserLabel').textContent = accessKey; - document.getElementById('policyEditorUser').value = accessKey; - document.getElementById('policyEditorDocument').value = getUserPolicies(accessKey); + document.getElementById('policyEditorUserId').value = userId; + document.getElementById('policyEditorDocument').value = getUserPolicies(userId); policyModal.show(); }); } @@ -604,15 +640,18 @@ window.IAMManagement = (function() { } if (usersGrid) { - var cardHtml = createUserCardHtml(data.access_key, data.display_name, data.policies); - usersGrid.insertAdjacentHTML('beforeend', cardHtml); - var newCard = usersGrid.lastElementChild; - attachUserCardHandlers(newCard, data.access_key, data.display_name); - users.push({ + var newUser = { + user_id: data.user_id, access_key: data.access_key, display_name: data.display_name, + expires_at: data.expires_at || '', policies: data.policies || [] - }); + }; + var cardHtml = createUserCardHtml(newUser); + usersGrid.insertAdjacentHTML('beforeend', cardHtml); + var newCard = usersGrid.lastElementChild; + attachUserCardHandlers(newCard, newUser); + users.push(newUser); updateUserCount(); } } @@ -624,21 +663,21 @@ window.IAMManagement = (function() { if (policyEditorForm) { policyEditorForm.addEventListener('submit', function(e) { e.preventDefault(); - var userInputEl = document.getElementById('policyEditorUser'); - var key = userInputEl.value; - if (!key) return; + var userInputEl = document.getElementById('policyEditorUserId'); + var userId = userInputEl.value; + if (!userId) return; var template = policyEditorForm.dataset.actionTemplate; - policyEditorForm.action = template.replace('ACCESS_KEY_PLACEHOLDER', key); + policyEditorForm.action = template.replace('USER_ID_PLACEHOLDER', encodeURIComponent(userId)); window.UICore.submitFormAjax(policyEditorForm, { successMessage: 'Policies updated', onSuccess: function(data) { policyModal.hide(); - var userCard = document.querySelector('[data-access-key="' + key + '"]'); + var userCard = document.querySelector('.iam-user-item[data-user-id="' + userId + '"]'); if (userCard) { - var cardEl = userCard.closest('.iam-user-card'); + var cardEl = userCard.querySelector('.iam-user-card'); var badgeContainer = cardEl ? cardEl.querySelector('[data-policy-badges]') : null; if (badgeContainer && data.policies) { var badges = data.policies.map(function(p) { @@ -667,7 +706,7 @@ window.IAMManagement = (function() { } } - var userIndex = users.findIndex(function(u) { return u.access_key === key; }); + var userIndex = users.findIndex(function(u) { return u.user_id === userId; }); if (userIndex >= 0 && data.policies) { users[userIndex].policies = data.policies; } @@ -687,7 +726,7 @@ window.IAMManagement = (function() { editUserModal.hide(); var newName = data.display_name || document.getElementById('editUserDisplayName').value; - var editBtn = document.querySelector('[data-edit-user="' + key + '"]'); + var editBtn = document.querySelector('[data-edit-user][data-user-id="' + key + '"]'); if (editBtn) { editBtn.setAttribute('data-display-name', newName); var card = editBtn.closest('.iam-user-card'); @@ -704,12 +743,12 @@ window.IAMManagement = (function() { } } - var userIndex = users.findIndex(function(u) { return u.access_key === key; }); + var userIndex = users.findIndex(function(u) { return u.user_id === key; }); if (userIndex >= 0) { users[userIndex].display_name = newName; } - if (key === currentUserKey) { + if (currentEditAccessKey === currentUserKey) { document.querySelectorAll('.sidebar-user .user-name').forEach(function(el) { var truncated = newName.length > 16 ? newName.substring(0, 16) + '...' : newName; el.textContent = truncated; @@ -734,12 +773,12 @@ window.IAMManagement = (function() { onSuccess: function(data) { deleteUserModal.hide(); - if (key === currentUserKey) { + if (currentDeleteAccessKey === currentUserKey) { window.location.href = '/ui/'; return; } - var deleteBtn = document.querySelector('[data-delete-user="' + key + '"]'); + var deleteBtn = document.querySelector('[data-delete-user][data-user-id="' + key + '"]'); if (deleteBtn) { var cardCol = deleteBtn.closest('[class*="col-"]'); if (cardCol) { @@ -747,7 +786,7 @@ window.IAMManagement = (function() { } } - users = users.filter(function(u) { return u.access_key !== key; }); + users = users.filter(function(u) { return u.user_id !== key; }); updateUserCount(); } }); diff --git a/static/js/ui-core.js b/crates/myfsio-server/static/js/ui-core.js similarity index 95% rename from static/js/ui-core.js rename to crates/myfsio-server/static/js/ui-core.js index 4c20162..dde8f10 100644 --- a/static/js/ui-core.js +++ b/crates/myfsio-server/static/js/ui-core.js @@ -35,6 +35,8 @@ window.UICore = (function() { var successMessage = options.successMessage || 'Operation completed'; var formData = new FormData(form); + var hasFileInput = !!form.querySelector('input[type="file"]'); + var requestBody = hasFileInput ? formData : new URLSearchParams(formData); var csrfToken = getCsrfToken(); var submitBtn = form.querySelector('[type="submit"]'); var originalHtml = submitBtn ? submitBtn.innerHTML : ''; @@ -46,14 +48,18 @@ window.UICore = (function() { } var formAction = form.getAttribute('action') || form.action; + var headers = { + 'X-CSRF-Token': csrfToken, + 'Accept': 'application/json', + 'X-Requested-With': 'XMLHttpRequest' + }; + if (!hasFileInput) { + headers['Content-Type'] = 'application/x-www-form-urlencoded;charset=UTF-8'; + } var response = await fetch(formAction, { method: form.getAttribute('method') || 'POST', - headers: { - 'X-CSRFToken': csrfToken, - 'Accept': 'application/json', - 'X-Requested-With': 'XMLHttpRequest' - }, - body: formData, + headers: headers, + body: requestBody, redirect: 'follow' }); diff --git a/templates/404.html b/crates/myfsio-server/templates/404.html similarity index 80% rename from templates/404.html rename to crates/myfsio-server/templates/404.html index edee52f..077dd9d 100644 --- a/templates/404.html +++ b/crates/myfsio-server/templates/404.html @@ -5,7 +5,7 @@

We can't find that page

The requested console route isn't available in MyFSIO. Double-check the URL or head back to your buckets.

{% endblock %} diff --git a/templates/500.html b/crates/myfsio-server/templates/500.html similarity index 78% rename from templates/500.html rename to crates/myfsio-server/templates/500.html index b8de624..51540ea 100644 --- a/templates/500.html +++ b/crates/myfsio-server/templates/500.html @@ -5,7 +5,7 @@

Internal Server Error

Something went wrong on our end. Please try again later or contact support.

{% endblock %} diff --git a/crates/myfsio-server/templates/_convert.py b/crates/myfsio-server/templates/_convert.py new file mode 100644 index 0000000..87e5508 --- /dev/null +++ b/crates/myfsio-server/templates/_convert.py @@ -0,0 +1,292 @@ +import os +import re +import sys + +TEMPLATE_DIR = os.path.dirname(os.path.abspath(__file__)) + +TERNARY_RE = re.compile( + r"""(\{\{\s*) + (?:"([^"]*)"|'([^']*)') # literal A + \s+if\s+ + ([^{}]+?) # condition + \s+else\s+ + (?:"([^"]*)"|'([^']*)') # literal B + (\s*\}\})""", + re.VERBOSE, +) + +TERNARY_SET_RE = re.compile( + r"""(\{%\s*set\s+([A-Za-z_][A-Za-z_0-9]*)\s*=\s*) + (?:"([^"]*)"|'([^']*)') + \s+if\s+ + ([^{}]+?) + \s+else\s+ + (?:"([^"]*)"|'([^']*)') + (\s*%\})""", + re.VERBOSE, +) + + +def convert_single_quoted_strings_in_expressions(text: str) -> str: + """Inside {{...}} or {%...%}, swap ' for " around tokens that look like strings.""" + def fix(m): + body = m.group(2) + body_fixed = re.sub(r"'([^'\\\n]*)'", r'"\1"', body) + return m.group(1) + body_fixed + m.group(3) + + return re.sub( + r"(\{[{%])([^{}]*?)([}%]\})", + fix, + text, + flags=re.DOTALL, + ) + + +def convert_inline_ternary(text: str) -> str: + def repl_expr(m): + a = m.group(2) if m.group(2) is not None else m.group(3) + cond = m.group(4) + b = m.group(5) if m.group(5) is not None else m.group(6) + return ( + '{% if ' + cond + ' %}' + a + '{% else %}' + b + '{% endif %}' + ) + + def repl_set(m): + varname = m.group(2) + a = m.group(3) if m.group(3) is not None else m.group(4) + cond = m.group(5) + b = m.group(6) if m.group(6) is not None else m.group(7) + return ( + '{% if ' + cond + ' %}{% set ' + varname + ' = "' + a + '" %}' + '{% else %}{% set ' + varname + ' = "' + b + '" %}{% endif %}' + ) + + prev = None + while prev != text: + prev = text + text = TERNARY_SET_RE.sub(repl_set, text) + text = TERNARY_RE.sub(repl_expr, text) + return text + + +def convert_request_args(text: str) -> str: + text = re.sub( + r'request\.args\.get\(\s*"([^"]+)"\s*,\s*"([^"]*)"\s*\)', + r'request_args.\1 | default(value="\2")', + text, + ) + text = re.sub( + r'request\.args\.get\(\s*"([^"]+)"\s*\)', + r'request_args.\1', + text, + ) + text = text.replace('request.endpoint', 'current_endpoint') + return text + + +def convert_items_keys(text: str) -> str: + text = re.sub(r'\.items\(\)', '', text) + text = re.sub(r'\.keys\(\)', '', text) + text = re.sub(r'\.values\(\)', '', text) + return text + + +def convert_tojson(text: str) -> str: + text = re.sub(r'\|\s*tojson\b', '| json_encode | safe', text) + return text + + +def convert_is_none(text: str) -> str: + text = re.sub(r'\bis\s+not\s+none\b', '!= null', text) + text = re.sub(r'\bis\s+none\b', '== null', text) + return text + + +def convert_namespace(text: str) -> str: + def repl(m): + body = m.group(1) + assigns = [a.strip() for a in body.split(',')] + return '{# namespace shim #}' + + text = re.sub( + r'\{%\s*set\s+ns\s*=\s*namespace\(([^)]*)\)\s*%\}', + repl, + text, + ) + text = re.sub(r'\bns\.([A-Za-z_][A-Za-z_0-9]*)\s*=\s*', r'{% set_global \1 = ', text) + text = re.sub(r'\bns\.([A-Za-z_][A-Za-z_0-9]*)', r'\1', text) + return text + + +def convert_url_for_positional(text: str) -> str: + """url_for("x", ...) -> url_for(endpoint="x", ...)""" + def repl(m): + prefix = m.group(1) + endpoint = m.group(2) + rest = m.group(3) or '' + rest = rest.strip() + if rest.startswith(','): + rest = rest[1:].strip() + if rest: + return f'{prefix}(endpoint="{endpoint}", {rest})' + return f'{prefix}(endpoint="{endpoint}")' + + pattern = re.compile(r'(url_for)\(\s*"([^"]+)"\s*((?:,[^()]*)?)\)') + prev = None + while prev != text: + prev = text + text = pattern.sub(repl, text) + return text + + +def convert_d_filter(text: str) -> str: + text = re.sub(r'\|\s*d\(\s*([^)]*?)\s*\)', lambda m: f'| default(value={m.group(1) or 0})', text) + return text + + +def convert_replace_filter(text: str) -> str: + def repl(m): + a = m.group(1) + b = m.group(2) + return f'| replace(from="{a}", to="{b}")' + text = re.sub(r'\|\s*replace\(\s*"([^"]*)"\s*,\s*"([^"]*)"\s*\)', repl, text) + return text + + +def convert_truncate_filter(text: str) -> str: + def repl(m): + n = m.group(1) + return f'| truncate(length={n})' + text = re.sub(r'\|\s*truncate\(\s*(\d+)\s*(?:,[^)]*)?\)', repl, text) + return text + + +def convert_strip_method(text: str) -> str: + text = re.sub(r'(\b[A-Za-z_][A-Za-z_0-9.\[\]"]*)\s*\.\s*strip\(\s*\)', r'\1 | trim', text) + return text + + +def convert_split_method(text: str) -> str: + def repl(m): + obj = m.group(1) + sep = m.group(2) + return f'{obj} | split(pat="{sep}")' + text = re.sub(r'(\b[A-Za-z_][A-Za-z_0-9.]*)\s*\.\s*split\(\s*"([^"]*)"\s*\)', repl, text) + return text + + +def convert_python_slice(text: str) -> str: + def repl_colon(m): + obj = m.group(1) + start = m.group(2) or '0' + end = m.group(3) + if start.startswith('-') or (end and end.startswith('-')): + return m.group(0) + if end: + return f'{obj} | slice(start={start}, end={end})' + return f'{obj} | slice(start={start})' + + def repl_neg_end(m): + obj = m.group(1) + n = m.group(2) + return f'{obj} | slice(start=-{n})' + + text = re.sub( + r'(\b[A-Za-z_][A-Za-z_0-9.]*)\[\s*(-?\d*)\s*:\s*(-?\d*)\s*\]', + repl_colon, + text, + ) + text = re.sub( + r'(\b[A-Za-z_][A-Za-z_0-9.]*)\|\s*slice\(start=-(\d+)\s*,\s*end=\s*\)', + repl_neg_end, + text, + ) + return text + + +def convert_inline_ternary_expr(text: str) -> str: + """Handle arbitrary ternary inside {{ ... }}: A if COND else B -> {% if COND %}A{% else %}B{% endif %}""" + out_lines = [] + for line in text.split('\n'): + out_lines.append(_convert_line_ternary(line)) + return '\n'.join(out_lines) + + +def _convert_line_ternary(line: str) -> str: + if '{{' not in line or ' if ' not in line or ' else ' not in line: + return line + prev = None + while prev != line: + prev = line + m = re.search(r'\{\{\s*([^{}]+?)\s+if\s+([^{}]+?)\s+else\s+([^{}]+?)\s*\}\}', line) + if not m: + break + replacement = '{% if ' + m.group(2) + ' %}{{ ' + m.group(1) + ' }}{% else %}{{ ' + m.group(3) + ' }}{% endif %}' + line = line[:m.start()] + replacement + line[m.end():] + return line + + +def convert_dict_get(text: str) -> str: + """Convert X.get("key", default) -> X.key | default(value=default) when simple.""" + pattern = re.compile( + r'([A-Za-z_][A-Za-z_0-9]*(?:\.[A-Za-z_][A-Za-z_0-9]*)*)' + r'\.get\(\s*"([A-Za-z_][A-Za-z_0-9]*)"\s*(?:,\s*([^(){}]+?))?\s*\)' + ) + + def repl(m): + obj = m.group(1) + key = m.group(2) + default = (m.group(3) or '').strip() + if default: + return f'{obj}.{key} | default(value={default})' + return f'{obj}.{key}' + + prev = None + while prev != text: + prev = text + text = pattern.sub(repl, text) + return text + + +def convert_file(path: str) -> bool: + with open(path, 'r', encoding='utf-8') as f: + original = f.read() + text = original + text = convert_single_quoted_strings_in_expressions(text) + text = convert_inline_ternary(text) + text = convert_request_args(text) + text = convert_items_keys(text) + text = convert_tojson(text) + text = convert_is_none(text) + text = convert_namespace(text) + text = convert_dict_get(text) + text = convert_url_for_positional(text) + text = convert_d_filter(text) + text = convert_replace_filter(text) + text = convert_truncate_filter(text) + text = convert_strip_method(text) + text = convert_split_method(text) + text = convert_python_slice(text) + text = convert_inline_ternary_expr(text) + if text != original: + with open(path, 'w', encoding='utf-8', newline='\n') as f: + f.write(text) + return True + return False + + +def main(): + changed = [] + for name in sorted(os.listdir(TEMPLATE_DIR)): + if not name.endswith('.html'): + continue + p = os.path.join(TEMPLATE_DIR, name) + if convert_file(p): + changed.append(name) + print('Changed:', len(changed)) + for c in changed: + print(' -', c) + + +if __name__ == '__main__': + main() diff --git a/templates/base.html b/crates/myfsio-server/templates/base.html similarity index 87% rename from templates/base.html rename to crates/myfsio-server/templates/base.html index 445b799..e5b9564 100644 --- a/templates/base.html +++ b/crates/myfsio-server/templates/base.html @@ -3,10 +3,10 @@ - {% if principal %}{% endif %} + {% if principal %}{% endif %} MyFSIO Console - - + + - +
@@ -40,8 +40,8 @@ - - MyFSIO logo + + MyFSIO logo MyFSIO @@ -68,33 +68,33 @@ {% if principal %} -
- + + {% if can_manage_replication %} {% endif %} {% if can_manage_lifecycle %} {% endif %} {% if can_manage_cors %} @@ -85,7 +85,7 @@
-
+
@@ -164,14 +164,14 @@ class="table-responsive objects-table-container drop-zone" id="objects-drop-zone" data-bucket="{{ bucket_name }}" - data-versioning="{{ 'true' if versioning_enabled else 'false' }}" + data-versioning="{% if versioning_enabled %}true{% else %}false{% endif %}" data-objects-api="{{ objects_api_url }}" data-objects-stream="{{ objects_stream_url }}" - data-bulk-delete-endpoint="{{ url_for('ui.bulk_delete_objects', bucket_name=bucket_name) }}" - data-bulk-download-endpoint="{{ url_for('ui.bulk_download_objects', bucket_name=bucket_name) }}" + data-bulk-delete-endpoint="{{ url_for(endpoint="ui.bulk_delete_objects", bucket_name=bucket_name) }}" + data-bulk-download-endpoint="{{ url_for(endpoint="ui.bulk_download_objects", bucket_name=bucket_name) }}" data-folders-url="{{ folders_url }}" data-buckets-for-copy-url="{{ buckets_for_copy_url }}" - data-bucket-total-objects="{{ bucket_stats.get('objects', 0) }}" + data-bucket-total-objects="{{ bucket_stats.objects | default(value=0) }}" > @@ -360,7 +360,7 @@ -
+
@@ -395,14 +395,8 @@ {% endif %} {% if can_edit_policy %} - {% set preset_choice = 'custom' %} - {% if not bucket_policy %} - {% set preset_choice = 'private' %} - {% elif bucket_policy_text and bucket_policy_text.strip() == default_policy.strip() %} - {% set preset_choice = 'public' %} - {% endif %} - - + +
@@ -415,19 +409,19 @@ Choose a template
- - -
- +
@@ -610,7 +604,7 @@
-
+
@@ -632,6 +626,16 @@

All previous versions of objects are preserved. You can roll back accidental changes or deletions at any time.

+ {% elif versioning_suspended %} + {% else %} {% endif %} @@ -654,8 +658,8 @@ Suspend Versioning {% else %} - - + + - {% if enc_algorithm %} - - {% endif %}
{% else %} @@ -859,32 +866,33 @@ Storage Quota
- {% set max_bytes = bucket_quota.get('max_bytes') %} - {% set max_objects = bucket_quota.get('max_objects') %} - {% set has_quota = max_bytes is not none or max_objects is not none %} - {% set current_objects = bucket_stats.get('objects', 0) %} - {% set version_count = bucket_stats.get('version_count', 0) %} - {% set total_objects = bucket_stats.get('total_objects', current_objects) %} - {% set current_bytes = bucket_stats.get('bytes', 0) %} - {% set version_bytes = bucket_stats.get('version_bytes', 0) %} - {% set total_bytes = bucket_stats.get('total_bytes', current_bytes) %} + {% set max_bytes = bucket_quota.max_bytes %} + {% set max_objects = bucket_quota.max_objects %} + {% set current_objects = bucket_stats.objects | default(value=0) %} + {% set version_count = bucket_stats.version_count | default(value=0) %} + {% set total_objects = bucket_stats.total_objects | default(value=current_objects) %} + {% set current_bytes = bucket_stats.bytes | default(value=0) %} + {% set version_bytes = bucket_stats.version_bytes | default(value=0) %} + {% set total_bytes = bucket_stats.total_bytes | default(value=current_bytes) %}
Current Usage
-
-
{{ total_objects }}
+
+
{{ total_objects }}
Total Objects
- {% if max_objects is not none %} +
+ {% if has_max_objects %}
- {% set obj_pct = (total_objects / max_objects * 100) | int if max_objects > 0 else 0 %} -
+ {% if max_objects > 0 %}{% set obj_pct = total_objects / max_objects * 100 | int %}{% else %}{% set obj_pct = 0 %}{% endif %} +
{{ obj_pct }}% of {{ max_objects }} limit
{% else %}
No limit
{% endif %} +
{% if version_count > 0 %}
({{ current_objects }} current + {{ version_count }} versions) @@ -893,18 +901,20 @@
-
-
{{ total_bytes | filesizeformat }}
+
+
{{ total_bytes | filesizeformat }}
Total Storage
- {% if max_bytes is not none %} +
+ {% if has_max_bytes %}
- {% set bytes_pct = (total_bytes / max_bytes * 100) | int if max_bytes > 0 else 0 %} -
+ {% if max_bytes > 0 %}{% set bytes_pct = total_bytes / max_bytes * 100 | int %}{% else %}{% set bytes_pct = 0 %}{% endif %} +
{{ bytes_pct }}% of {{ max_bytes | filesizeformat }} limit
{% else %}
No limit
{% endif %} +
{% if version_bytes > 0 %}
({{ current_bytes | filesizeformat }} current + {{ version_bytes | filesizeformat }} versions) @@ -921,14 +931,14 @@
- Storage quota enabled + Storage quota active

- {% if max_bytes is not none and max_objects is not none %} - Limited to {{ max_bytes | filesizeformat }} and {{ max_objects }} objects. - {% elif max_bytes is not none %} - Limited to {{ max_bytes | filesizeformat }} storage. - {% else %} - Limited to {{ max_objects }} objects. + {% if has_max_bytes and has_max_objects %} + This bucket is limited to {{ max_bytes | filesizeformat }} storage and {{ max_objects }} objects. + {% elif has_max_bytes %} + This bucket is limited to {{ max_bytes | filesizeformat }} storage. + {% elif has_max_objects %} + This bucket is limited to {{ max_objects }} objects. {% endif %}

@@ -947,15 +957,15 @@ {% endif %} {% if can_manage_quota %} -
- + +
MB
@@ -965,7 +975,7 @@
Maximum number of objects allowed. Leave empty for unlimited.
@@ -1049,19 +1059,19 @@ {% elif website_config %}
-

No domains mapped to this bucket. Manage domains

+

No domains mapped to this bucket. Manage domains

{% endif %} {% if can_manage_website %} - - + +
The default page served for directory paths (e.g., index.html).
@@ -1069,17 +1079,17 @@
Optional. The page served for 404 errors.
-
{% if can_manage_replication %} -
+
@@ -1201,9 +1211,9 @@
Replication Active — - {% if replication_rule.mode == 'bidirectional' %} + {% if replication_rule.mode == "bidirectional" %} Bi-directional sync enabled with LWW conflict resolution. - {% elif replication_rule.mode == 'all' %} + {% elif replication_rule.mode == "all" %} All objects (existing + new) are being replicated. {% else %} New uploads to this bucket are automatically replicated. @@ -1223,7 +1233,7 @@
-
+
@@ -1296,7 +1306,7 @@
Mode
- {% if replication_rule.mode == 'bidirectional' %}Bidirectional{% elif replication_rule.mode == 'all' %}All Objects{% else %}New Only{% endif %} + {% if replication_rule.mode == "bidirectional" %}Bidirectional{% elif replication_rule.mode == "all" %}All Objects{% else %}New Only{% endif %}
@@ -1314,11 +1324,11 @@
- + @@ -175,7 +175,7 @@ Create Replication Rules - + Skip for Now @@ -189,7 +189,7 @@
No buckets yet

Create some buckets first, then come back to set up replication.

- + Go to Buckets diff --git a/templates/sites.html b/crates/myfsio-server/templates/sites.html similarity index 97% rename from templates/sites.html rename to crates/myfsio-server/templates/sites.html index d2b9fc3..41dad8d 100644 --- a/templates/sites.html +++ b/crates/myfsio-server/templates/sites.html @@ -24,7 +24,7 @@ {% endif %} - {{ peers|length }} peer{{ 's' if peers|length != 1 else '' }} + {{ peers|length }} peer{% if peers|length != 1 %}s{% else %}{% endif %} @@ -42,38 +42,38 @@

This site's configuration

- - + +
Unique identifier for this site
Public URL for this site
+ value="{% if local_site %}{{ local_site.region }}{% else %}{{ config_site_region }}{% endif %}">
+ value="{% if local_site %}{{ local_site.priority }}{% else %}{{ 100 }}{% endif %}" min="0">
Lower = preferred
@@ -108,8 +108,8 @@
- - + +
@@ -198,7 +198,7 @@
- -
{{ conn.region }}{{ conn.access_key[:8] }}...{{ conn.access_key[-4:] }}{{ conn.access_key | slice(start=0, end=8) }}...{{ conn.access_key | slice(start=-4) }}
- +
{% if peer.is_healthy == true %} @@ -225,7 +225,7 @@
- {{ peer.display_name or peer.site_id }} + {% if peer.display_name %}{{ peer.display_name }}{% else %}{{ peer.site_id }}{% endif %} {% if peer.display_name and peer.display_name != peer.site_id %}
{{ peer.site_id }} {% endif %} @@ -234,8 +234,8 @@
- {% set parsed = peer.endpoint.split('//') %} - {% if parsed|length > 1 %}{{ parsed[1].split('/')[0] }}{% else %}{{ peer.endpoint }}{% endif %} + {% set parsed = peer.endpoint | split(pat="//") %} + {% if parsed|length > 1 %}{% set host_parts = parsed[1] | split(pat="/") %}{{ host_parts[0] }}{% else %}{{ peer.endpoint }}{% endif %} - +
Platform{{ platform }}
Python{{ python_version }}
Rust Extension - {% if has_rust %} - Loaded - {% else %} - Not loaded - {% endif %} +
Engine + Rust (native)
@@ -148,7 +143,7 @@
Interval: {{ gc_status.interval_hours }}h
-
Dry run: {{ "Yes" if gc_status.dry_run else "No" }}
+
Dry run: {% if gc_status.dry_run %}Yes{% else %}No{% endif %}
Temp max age: {{ gc_status.temp_file_max_age_hours }}h
Lock max age: {{ gc_status.lock_file_max_age_hours }}h
Multipart max age: {{ gc_status.multipart_max_age_days }}d
@@ -181,7 +176,13 @@ {{ exec.timestamp_display }} {% set r = exec.result %} - {{ (r.temp_files_deleted|d(0)) + (r.multipart_uploads_deleted|d(0)) + (r.lock_files_deleted|d(0)) + (r.orphaned_metadata_deleted|d(0)) + (r.orphaned_versions_deleted|d(0)) + (r.empty_dirs_removed|d(0)) }} + {% set t1 = r.temp_files_deleted | default(value=0) %} + {% set t2 = r.multipart_uploads_deleted | default(value=0) %} + {% set t3 = r.lock_files_deleted | default(value=0) %} + {% set t4 = r.orphaned_metadata_deleted | default(value=0) %} + {% set t5 = r.orphaned_versions_deleted | default(value=0) %} + {% set t6 = r.empty_dirs_removed | default(value=0) %} + {{ t1 + t2 + t3 + t4 + t5 + t6 }} {{ exec.bytes_freed_display }} @@ -283,9 +284,9 @@
Interval: {{ integrity_status.interval_hours }}h
-
Dry run: {{ "Yes" if integrity_status.dry_run else "No" }}
+
Dry run: {% if integrity_status.dry_run %}Yes{% else %}No{% endif %}
Batch size: {{ integrity_status.batch_size }}
-
Auto-heal: {{ "Yes" if integrity_status.auto_heal else "No" }}
+
Auto-heal: {% if integrity_status.auto_heal %}Yes{% else %}No{% endif %}
@@ -314,16 +315,22 @@ {% for exec in integrity_history %} {{ exec.timestamp_display }} - {{ exec.result.objects_scanned|d(0) }} + {{ exec.result.objects_scanned| default(value=0) }} - {% set total_issues = (exec.result.corrupted_objects|d(0)) + (exec.result.orphaned_objects|d(0)) + (exec.result.phantom_metadata|d(0)) + (exec.result.stale_versions|d(0)) + (exec.result.etag_cache_inconsistencies|d(0)) + (exec.result.legacy_metadata_drifts|d(0)) %} + {% set i1 = exec.result.corrupted_objects | default(value=0) %} + {% set i2 = exec.result.orphaned_objects | default(value=0) %} + {% set i3 = exec.result.phantom_metadata | default(value=0) %} + {% set i4 = exec.result.stale_versions | default(value=0) %} + {% set i5 = exec.result.etag_cache_inconsistencies | default(value=0) %} + {% set i6 = exec.result.legacy_metadata_drifts | default(value=0) %} + {% set total_issues = i1 + i2 + i3 + i4 + i5 + i6 %} {% if total_issues > 0 %} {{ total_issues }} {% else %} 0 {% endif %} - {{ exec.result.issues_healed|d(0) }} + {{ exec.result.issues_healed| default(value=0) }} {% if exec.dry_run %} Dry @@ -387,7 +394,7 @@ return (i === 0 ? b : b.toFixed(1)) + ' ' + units[i]; } - var _displayTimezone = {{ display_timezone|tojson }}; + var _displayTimezone = {{ display_timezone| json_encode | safe }}; function formatTimestamp(ts) { var d = new Date(ts * 1000); @@ -407,8 +414,9 @@ ''; function _gcRefreshHistory() { - fetch('{{ url_for("ui.system_gc_history") }}?limit=10', { - headers: {'X-CSRFToken': csrfToken} + fetch('{{ url_for(endpoint="ui.system_gc_history") }}?limit=10', { + headers: {'X-CSRFToken': csrfToken}, + cache: 'no-store' }) .then(function (r) { return r.json(); }) .then(function (hist) { @@ -446,8 +454,9 @@ } function _integrityRefreshHistory() { - fetch('{{ url_for("ui.system_integrity_history") }}?limit=10', { - headers: {'X-CSRFToken': csrfToken} + fetch('{{ url_for(endpoint="ui.system_integrity_history") }}?limit=10', { + headers: {'X-CSRFToken': csrfToken}, + cache: 'no-store' }) .then(function (r) { return r.json(); }) .then(function (hist) { @@ -542,8 +551,9 @@ } function _gcPoll() { - fetch('{{ url_for("ui.system_gc_status") }}', { - headers: {'X-CSRFToken': csrfToken} + fetch('{{ url_for(endpoint="ui.system_gc_status") }}', { + headers: {'X-CSRFToken': csrfToken}, + cache: 'no-store' }) .then(function (r) { return r.json(); }) .then(function (status) { @@ -554,8 +564,9 @@ } else { _gcSetScanning(false); _gcRefreshHistory(); - fetch('{{ url_for("ui.system_gc_history") }}?limit=1', { - headers: {'X-CSRFToken': csrfToken} + fetch('{{ url_for(endpoint="ui.system_gc_history") }}?limit=1', { + headers: {'X-CSRFToken': csrfToken}, + cache: 'no-store' }) .then(function (r) { return r.json(); }) .then(function (hist) { @@ -577,7 +588,7 @@ document.getElementById('gcResult').classList.add('d-none'); _gcSetScanning(true); - fetch('{{ url_for("ui.system_gc_run") }}', { + fetch('{{ url_for(endpoint="ui.system_gc_run") }}', { method: 'POST', headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrfToken}, body: JSON.stringify({dry_run: dryRun}) @@ -596,7 +607,13 @@ body.textContent = data.error; return; } - _gcPollTimer = setTimeout(_gcPoll, 2000); + if (data.status === 'started' || data.scanning === true || data.running === true) { + _gcPollTimer = setTimeout(_gcPoll, 2000); + return; + } + _gcSetScanning(false); + _gcShowResult(data, dryRun); + _gcRefreshHistory(); }) .catch(function (err) { _gcSetScanning(false); @@ -672,8 +689,9 @@ } function _integrityPoll() { - fetch('{{ url_for("ui.system_integrity_status") }}', { - headers: {'X-CSRFToken': csrfToken} + fetch('{{ url_for(endpoint="ui.system_integrity_status") }}', { + headers: {'X-CSRFToken': csrfToken}, + cache: 'no-store' }) .then(function (r) { return r.json(); }) .then(function (status) { @@ -684,8 +702,9 @@ } else { _integritySetScanning(false); _integrityRefreshHistory(); - fetch('{{ url_for("ui.system_integrity_history") }}?limit=1', { - headers: {'X-CSRFToken': csrfToken} + fetch('{{ url_for(endpoint="ui.system_integrity_history") }}?limit=1', { + headers: {'X-CSRFToken': csrfToken}, + cache: 'no-store' }) .then(function (r) { return r.json(); }) .then(function (hist) { @@ -707,7 +726,7 @@ document.getElementById('integrityResult').classList.add('d-none'); _integritySetScanning(true); - fetch('{{ url_for("ui.system_integrity_run") }}', { + fetch('{{ url_for(endpoint="ui.system_integrity_run") }}', { method: 'POST', headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrfToken}, body: JSON.stringify({dry_run: dryRun, auto_heal: autoHeal}) @@ -726,7 +745,13 @@ body.textContent = data.error; return; } - _integrityPollTimer = setTimeout(_integrityPoll, 2000); + if (data.status === 'started' || data.scanning === true || data.running === true) { + _integrityPollTimer = setTimeout(_integrityPoll, 2000); + return; + } + _integritySetScanning(false); + _integrityShowResult(data, dryRun, autoHeal); + _integrityRefreshHistory(); }) .catch(function (err) { _integritySetScanning(false); diff --git a/templates/website_domains.html b/crates/myfsio-server/templates/website_domains.html similarity index 97% rename from templates/website_domains.html rename to crates/myfsio-server/templates/website_domains.html index c7c0da4..58e2c18 100644 --- a/templates/website_domains.html +++ b/crates/myfsio-server/templates/website_domains.html @@ -16,7 +16,7 @@
- {{ mappings|length }} mapping{{ 's' if mappings|length != 1 else '' }} + {{ mappings|length }} mapping{% if mappings|length != 1 %}s{% else %}{% endif %}
@@ -34,8 +34,8 @@

Point a custom domain to a bucket

- - + +
- +